Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first set of KVM updates from Paolo Bonzini:
"PPC:
- minor code cleanups
x86:
- PCID emulation and CR3 caching for shadow page tables
- nested VMX live migration
- nested VMCS shadowing
- optimized IPI hypercall
- some optimizations
ARM will come next week"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits)
kvm: x86: Set highest physical address bits in non-present/reserved SPTEs
KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c
KVM: X86: Implement PV IPIs in linux guest
KVM: X86: Add kvm hypervisor init time platform setup callback
KVM: X86: Implement "send IPI" hypercall
KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs()
KVM: x86: Skip pae_root shadow allocation if tdp enabled
KVM/MMU: Combine flushing remote tlb in mmu_set_spte()
KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible
KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible
KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup
KVM: vmx: move struct host_state usage to struct loaded_vmcs
KVM: vmx: compute need to reload FS/GS/LDT on demand
KVM: nVMX: remove a misleading comment regarding vmcs02 fields
KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state()
KVM: vmx: add dedicated utility to access guest's kernel_gs_base
KVM: vmx: track host_state.loaded using a loaded_vmcs pointer
KVM: vmx: refactor segmentation code in vmx_save_host_state()
kvm: nVMX: Fix fault priority for VMX operations
kvm: nVMX: Fix fault vector for VMX operation at CPL > 0
...
This commit is contained in:
@@ -3561,6 +3561,62 @@ Returns: 0 on success,
|
||||
-ENOENT on deassign if the conn_id isn't registered
|
||||
-EEXIST on assign if the conn_id is already registered
|
||||
|
||||
4.114 KVM_GET_NESTED_STATE
|
||||
|
||||
Capability: KVM_CAP_NESTED_STATE
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_nested_state (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
Errors:
|
||||
E2BIG: the total state size (including the fixed-size part of struct
|
||||
kvm_nested_state) exceeds the value of 'size' specified by
|
||||
the user; the size required will be written into size.
|
||||
|
||||
struct kvm_nested_state {
|
||||
__u16 flags;
|
||||
__u16 format;
|
||||
__u32 size;
|
||||
union {
|
||||
struct kvm_vmx_nested_state vmx;
|
||||
struct kvm_svm_nested_state svm;
|
||||
__u8 pad[120];
|
||||
};
|
||||
__u8 data[0];
|
||||
};
|
||||
|
||||
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
|
||||
|
||||
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
|
||||
|
||||
struct kvm_vmx_nested_state {
|
||||
__u64 vmxon_pa;
|
||||
__u64 vmcs_pa;
|
||||
|
||||
struct {
|
||||
__u16 flags;
|
||||
} smm;
|
||||
};
|
||||
|
||||
This ioctl copies the vcpu's nested virtualization state from the kernel to
|
||||
userspace.
|
||||
|
||||
The maximum size of the state, including the fixed-size part of struct
|
||||
kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
|
||||
the KVM_CHECK_EXTENSION ioctl().
|
||||
|
||||
4.115 KVM_SET_NESTED_STATE
|
||||
|
||||
Capability: KVM_CAP_NESTED_STATE
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_nested_state (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For
|
||||
the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|
||||
|| || can be enabled by setting bit 2
|
||||
|| || when writing to msr 0x4b564d02
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
|
||||
|| || before using paravirtualized
|
||||
|| || send IPIs.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|
||||
|| || per-cpu warps are expected in
|
||||
|| || kvmclock.
|
||||
|
||||
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
|
||||
|
||||
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
|
||||
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
|
||||
|
||||
6. KVM_HC_SEND_IPI
|
||||
------------------------
|
||||
Architecture: x86
|
||||
Status: active
|
||||
Purpose: Send IPIs to multiple vCPUs.
|
||||
|
||||
a0: lower part of the bitmap of destination APIC IDs
|
||||
a1: higher part of the bitmap of destination APIC IDs
|
||||
a2: the lowest APIC ID in bitmap
|
||||
a3: APIC ICR
|
||||
|
||||
The hypercall lets a guest send multicast IPIs, with at most 128
|
||||
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
|
||||
hypercall in 32-bit mode. The destinations are represented by a
|
||||
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
|
||||
a0 corresponds to the APIC ID in the third argument (a2), bit 1
|
||||
corresponds to the APIC ID a2+1, and so on.
|
||||
|
||||
Returns the number of CPUs to which the IPIs were delivered successfully.
|
||||
|
||||
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
|
||||
#define SPLIT_HACK_MASK 0xff000000
|
||||
#define SPLIT_HACK_OFFS 0xfb000000
|
||||
|
||||
/*
|
||||
* This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
|
||||
* [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
|
||||
* (but not its actual threading mode, which is not available) to avoid
|
||||
* collisions.
|
||||
*
|
||||
* The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
|
||||
* 0) unchanged: if the guest is filling each VCORE completely then it will be
|
||||
* using consecutive IDs and it will fill the space without any packing.
|
||||
*
|
||||
* For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
|
||||
* KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
|
||||
* added to avoid collisions.
|
||||
*
|
||||
* VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
|
||||
* possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
|
||||
* can be safely packed into the second half of each VCORE by adding an offset
|
||||
* of (stride / 2).
|
||||
*
|
||||
* Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
|
||||
* (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
|
||||
* VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
|
||||
*
|
||||
* Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
|
||||
* stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
|
||||
* must be free to use.
|
||||
*
|
||||
* (The offsets for each block are stored in block_offsets[], indexed by the
|
||||
* block number if the stride is 8. For cases where the guest's stride is less
|
||||
* than 8, we can re-use the block_offsets array by multiplying the block
|
||||
* number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
|
||||
*/
|
||||
static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
|
||||
{
|
||||
const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
|
||||
int stride = kvm->arch.emul_smt_mode;
|
||||
int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
|
||||
u32 packed_id;
|
||||
|
||||
if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
|
||||
return 0;
|
||||
packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
|
||||
if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
|
||||
return 0;
|
||||
return packed_id;
|
||||
}
|
||||
|
||||
#endif /* __ASM_KVM_BOOK3S_H__ */
|
||||
|
||||
@@ -42,7 +42,14 @@
|
||||
#define KVM_USER_MEM_SLOTS 512
|
||||
|
||||
#include <asm/cputhreads.h>
|
||||
#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
|
||||
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
|
||||
|
||||
#else
|
||||
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
||||
@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
|
||||
gva_t vaddr_accessed;
|
||||
pgd_t *pgdir;
|
||||
|
||||
u8 io_gpr; /* GPR used as IO source/target */
|
||||
u16 io_gpr; /* GPR used as IO source/target */
|
||||
u8 mmio_host_swabbed;
|
||||
u8 mmio_sign_extend;
|
||||
/* conversion between single and double precision */
|
||||
@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
|
||||
*/
|
||||
u8 mmio_vsx_copy_nums;
|
||||
u8 mmio_vsx_offset;
|
||||
u8 mmio_vsx_tx_sx_enabled;
|
||||
u8 mmio_vmx_copy_nums;
|
||||
u8 mmio_vmx_offset;
|
||||
u8 mmio_copy_type;
|
||||
@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
|
||||
#define KVMPPC_VCPU_BUSY_IN_HOST 2
|
||||
|
||||
/* Values for vcpu->arch.io_gpr */
|
||||
#define KVM_MMIO_REG_MASK 0x001f
|
||||
#define KVM_MMIO_REG_EXT_MASK 0xffe0
|
||||
#define KVM_MMIO_REG_MASK 0x003f
|
||||
#define KVM_MMIO_REG_EXT_MASK 0xffc0
|
||||
#define KVM_MMIO_REG_GPR 0x0000
|
||||
#define KVM_MMIO_REG_FPR 0x0020
|
||||
#define KVM_MMIO_REG_QPR 0x0040
|
||||
#define KVM_MMIO_REG_FQPR 0x0060
|
||||
#define KVM_MMIO_REG_VSX 0x0080
|
||||
#define KVM_MMIO_REG_VMX 0x00c0
|
||||
#define KVM_MMIO_REG_FPR 0x0040
|
||||
#define KVM_MMIO_REG_QPR 0x0080
|
||||
#define KVM_MMIO_REG_FQPR 0x00c0
|
||||
#define KVM_MMIO_REG_VSX 0x0100
|
||||
#define KVM_MMIO_REG_VMX 0x0180
|
||||
|
||||
#define __KVM_HAVE_ARCH_WQP
|
||||
#define __KVM_HAVE_CREATE_DEVICE
|
||||
|
||||
@@ -163,7 +163,7 @@
|
||||
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
|
||||
#define PSSCR_SD 0x00400000 /* Status Disable */
|
||||
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
|
||||
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
|
||||
#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
|
||||
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
|
||||
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
|
||||
|
||||
|
||||
@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
|
||||
if ((tbltmp->it_page_shift <= stt->page_shift) &&
|
||||
(tbltmp->it_offset << tbltmp->it_page_shift ==
|
||||
stt->offset << stt->page_shift) &&
|
||||
(tbltmp->it_size << tbltmp->it_page_shift ==
|
||||
(tbltmp->it_size << tbltmp->it_page_shift >=
|
||||
stt->size << stt->page_shift)) {
|
||||
/*
|
||||
* Reference the table to avoid races with
|
||||
@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
{
|
||||
struct kvmppc_spapr_tce_table *stt = NULL;
|
||||
struct kvmppc_spapr_tce_table *siter;
|
||||
unsigned long npages, size;
|
||||
unsigned long npages, size = args->size;
|
||||
int ret = -ENOMEM;
|
||||
int i;
|
||||
|
||||
@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
|
||||
return -EINVAL;
|
||||
|
||||
size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
|
||||
npages = kvmppc_tce_pages(size);
|
||||
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
|
||||
if (ret)
|
||||
|
||||
@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
|
||||
* and SPURR count and should be set according to the number of
|
||||
* online threads in the vcore being run.
|
||||
*/
|
||||
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
|
||||
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
|
||||
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
|
||||
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
|
||||
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
|
||||
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
|
||||
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
|
||||
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
|
||||
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
|
||||
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
|
||||
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
|
||||
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
|
||||
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
|
||||
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
|
||||
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
|
||||
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
|
||||
|
||||
static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
|
||||
RWMR_RPA_P8_1THREAD,
|
||||
@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
|
||||
return threads_per_subcore;
|
||||
}
|
||||
|
||||
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
|
||||
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
|
||||
{
|
||||
struct kvmppc_vcore *vcore;
|
||||
|
||||
@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
|
||||
init_swait_queue_head(&vcore->wq);
|
||||
vcore->preempt_tb = TB_NIL;
|
||||
vcore->lpcr = kvm->arch.lpcr;
|
||||
vcore->first_vcpuid = core * kvm->arch.smt_mode;
|
||||
vcore->first_vcpuid = id;
|
||||
vcore->kvm = kvm;
|
||||
INIT_LIST_HEAD(&vcore->preempt_list);
|
||||
|
||||
@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
|
||||
mutex_lock(&kvm->lock);
|
||||
vcore = NULL;
|
||||
err = -EINVAL;
|
||||
core = id / kvm->arch.smt_mode;
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||
if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
|
||||
pr_devel("KVM: VCPU ID too high\n");
|
||||
core = KVM_MAX_VCORES;
|
||||
} else {
|
||||
BUG_ON(kvm->arch.smt_mode != 1);
|
||||
core = kvmppc_pack_vcpu_id(kvm, id);
|
||||
}
|
||||
} else {
|
||||
core = id / kvm->arch.smt_mode;
|
||||
}
|
||||
if (core < KVM_MAX_VCORES) {
|
||||
vcore = kvm->arch.vcores[core];
|
||||
if (!vcore) {
|
||||
if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||
pr_devel("KVM: collision on id %u", id);
|
||||
vcore = NULL;
|
||||
} else if (!vcore) {
|
||||
err = -ENOMEM;
|
||||
vcore = kvmppc_vcore_create(kvm, core);
|
||||
vcore = kvmppc_vcore_create(kvm,
|
||||
id & ~(kvm->arch.smt_mode - 1));
|
||||
kvm->arch.vcores[core] = vcore;
|
||||
kvm->arch.online_vcores++;
|
||||
}
|
||||
@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
|
||||
pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
/* presence of intc confirmed - node can be dropped again */
|
||||
of_node_put(np);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
|
||||
{
|
||||
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
|
||||
}
|
||||
|
||||
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
|
||||
struct kvmppc_xive_src_block *sb,
|
||||
struct kvmppc_xive_irq_state *state)
|
||||
@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
|
||||
*/
|
||||
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
|
||||
xive_native_configure_irq(hw_num,
|
||||
xive->vp_base + state->act_server,
|
||||
xive_vp(xive, state->act_server),
|
||||
MASKED, state->number);
|
||||
/* set old_p so we can track if an H_EOI was done */
|
||||
state->old_p = true;
|
||||
@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
|
||||
*/
|
||||
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
|
||||
xive_native_configure_irq(hw_num,
|
||||
xive->vp_base + state->act_server,
|
||||
xive_vp(xive, state->act_server),
|
||||
state->act_priority, state->number);
|
||||
/* If an EOI is needed, do it here */
|
||||
if (!state->old_p)
|
||||
@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
|
||||
kvmppc_xive_select_irq(state, &hw_num, NULL);
|
||||
|
||||
return xive_native_configure_irq(hw_num,
|
||||
xive->vp_base + server,
|
||||
xive_vp(xive, server),
|
||||
prio, state->number);
|
||||
}
|
||||
|
||||
@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
|
||||
* which is fine for a never started interrupt.
|
||||
*/
|
||||
xive_native_configure_irq(hw_irq,
|
||||
xive->vp_base + state->act_server,
|
||||
xive_vp(xive, state->act_server),
|
||||
state->act_priority, state->number);
|
||||
|
||||
/*
|
||||
@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
|
||||
|
||||
/* Reconfigure the IPI */
|
||||
xive_native_configure_irq(state->ipi_number,
|
||||
xive->vp_base + state->act_server,
|
||||
xive_vp(xive, state->act_server),
|
||||
state->act_priority, state->number);
|
||||
|
||||
/*
|
||||
@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||
pr_devel("Duplicate !\n");
|
||||
return -EEXIST;
|
||||
}
|
||||
if (cpu >= KVM_MAX_VCPUS) {
|
||||
if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
|
||||
pr_devel("Out of bounds !\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||
xc->xive = xive;
|
||||
xc->vcpu = vcpu;
|
||||
xc->server_num = cpu;
|
||||
xc->vp_id = xive->vp_base + cpu;
|
||||
xc->vp_id = xive_vp(xive, cpu);
|
||||
xc->mfrr = 0xff;
|
||||
xc->valid = true;
|
||||
|
||||
|
||||
@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
|
||||
* if mmio_vsx_tx_sx_enabled == 1, copy data between
|
||||
* VSR[32..63] and memory
|
||||
*/
|
||||
vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
|
||||
vcpu->arch.mmio_vsx_copy_nums = 0;
|
||||
vcpu->arch.mmio_vsx_offset = 0;
|
||||
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
|
||||
@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
emulated = kvmppc_handle_vsx_load(run, vcpu,
|
||||
KVM_MMIO_REG_VSX | (op.reg & 0x1f),
|
||||
io_size_each, 1, op.type & SIGNEXT);
|
||||
KVM_MMIO_REG_VSX|op.reg, io_size_each,
|
||||
1, op.type & SIGNEXT);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
emulated = kvmppc_handle_vsx_store(run, vcpu,
|
||||
op.reg & 0x1f, io_size_each, 1);
|
||||
op.reg, io_size_each, 1);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
|
||||
if (offset == -1)
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
|
||||
val.vval = VCPU_VSX_VR(vcpu, index);
|
||||
if (index >= 32) {
|
||||
val.vval = VCPU_VSX_VR(vcpu, index - 32);
|
||||
val.vsxval[offset] = gpr;
|
||||
VCPU_VSX_VR(vcpu, index) = val.vval;
|
||||
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
|
||||
} else {
|
||||
VCPU_VSX_FPR(vcpu, index, offset) = gpr;
|
||||
}
|
||||
@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg val;
|
||||
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
|
||||
|
||||
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
|
||||
val.vval = VCPU_VSX_VR(vcpu, index);
|
||||
if (index >= 32) {
|
||||
val.vval = VCPU_VSX_VR(vcpu, index - 32);
|
||||
val.vsxval[0] = gpr;
|
||||
val.vsxval[1] = gpr;
|
||||
VCPU_VSX_VR(vcpu, index) = val.vval;
|
||||
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
|
||||
} else {
|
||||
VCPU_VSX_FPR(vcpu, index, 0) = gpr;
|
||||
VCPU_VSX_FPR(vcpu, index, 1) = gpr;
|
||||
@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg val;
|
||||
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
|
||||
|
||||
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
|
||||
if (index >= 32) {
|
||||
val.vsx32val[0] = gpr;
|
||||
val.vsx32val[1] = gpr;
|
||||
val.vsx32val[2] = gpr;
|
||||
val.vsx32val[3] = gpr;
|
||||
VCPU_VSX_VR(vcpu, index) = val.vval;
|
||||
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
|
||||
} else {
|
||||
val.vsx32val[0] = gpr;
|
||||
val.vsx32val[1] = gpr;
|
||||
@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
|
||||
if (offset == -1)
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
|
||||
val.vval = VCPU_VSX_VR(vcpu, index);
|
||||
if (index >= 32) {
|
||||
val.vval = VCPU_VSX_VR(vcpu, index - 32);
|
||||
val.vsx32val[offset] = gpr32;
|
||||
VCPU_VSX_VR(vcpu, index) = val.vval;
|
||||
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
|
||||
} else {
|
||||
dword_offset = offset / 2;
|
||||
word_offset = offset % 2;
|
||||
@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
|
||||
if (rs < 32) {
|
||||
*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
|
||||
} else {
|
||||
reg.vval = VCPU_VSX_VR(vcpu, rs);
|
||||
reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
|
||||
*val = reg.vsxval[vsx_offset];
|
||||
}
|
||||
break;
|
||||
@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
|
||||
if (rs < 32) {
|
||||
dword_offset = vsx_offset / 2;
|
||||
word_offset = vsx_offset % 2;
|
||||
reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
|
||||
*val = reg.vsx32val[word_offset];
|
||||
} else {
|
||||
reg.vval = VCPU_VSX_VR(vcpu, rs);
|
||||
reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
|
||||
*val = reg.vsx32val[vsx_offset];
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
|
||||
__u8 reserved1c0[8]; /* 0x01c0 */
|
||||
#define ECD_HOSTREGMGMT 0x20000000
|
||||
#define ECD_MEF 0x08000000
|
||||
#define ECD_ETOKENF 0x02000000
|
||||
__u32 ecd; /* 0x01c8 */
|
||||
__u8 reserved1cc[18]; /* 0x01cc */
|
||||
__u64 pp; /* 0x01de */
|
||||
@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
|
||||
seqcount_t cputm_seqcount;
|
||||
__u64 cputm_start;
|
||||
bool gs_enabled;
|
||||
bool skey_enabled;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
|
||||
struct page *pages[KVM_MAX_VCPUS];
|
||||
};
|
||||
|
||||
struct kvm_s390_migration_state {
|
||||
unsigned long bitmap_size; /* in bits (number of guest pages) */
|
||||
atomic64_t dirty_pages; /* number of dirty pages */
|
||||
unsigned long *pgste_bitmap;
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
void *sca;
|
||||
int use_esca;
|
||||
@@ -828,7 +824,8 @@ struct kvm_arch{
|
||||
struct kvm_s390_vsie vsie;
|
||||
u8 epdx;
|
||||
u64 epoch;
|
||||
struct kvm_s390_migration_state *migration_state;
|
||||
int migration_mode;
|
||||
atomic64_t cmma_dirty_pages;
|
||||
/* subset of available cpu features enabled by user space */
|
||||
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
||||
struct kvm_s390_gisa *gisa;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
/*
|
||||
* KVM s390 specific structures and definitions
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008, 2018
|
||||
*
|
||||
* Author(s): Carsten Otte <cotte@de.ibm.com>
|
||||
* Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
|
||||
#define KVM_SYNC_FPRS (1UL << 8)
|
||||
#define KVM_SYNC_GSCB (1UL << 9)
|
||||
#define KVM_SYNC_BPBC (1UL << 10)
|
||||
#define KVM_SYNC_ETOKEN (1UL << 11)
|
||||
/* length and alignment of the sdnx as a power of two */
|
||||
#define SDNXC 8
|
||||
#define SDNXL (1UL << SDNXC)
|
||||
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
|
||||
struct {
|
||||
__u64 reserved1[2];
|
||||
__u64 gscb[4];
|
||||
__u64 etoken;
|
||||
__u64 etoken_extension;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
||||
*/
|
||||
static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_s390_migration_state *mgs;
|
||||
struct kvm_memory_slot *ms;
|
||||
/* should be the only one */
|
||||
struct kvm_memslots *slots;
|
||||
unsigned long ram_pages;
|
||||
unsigned long ram_pages = 0;
|
||||
int slotnr;
|
||||
|
||||
/* migration mode already enabled */
|
||||
if (kvm->arch.migration_state)
|
||||
if (kvm->arch.migration_mode)
|
||||
return 0;
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
if (!slots || !slots->used_slots)
|
||||
return -EINVAL;
|
||||
|
||||
mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
|
||||
if (!mgs)
|
||||
return -ENOMEM;
|
||||
kvm->arch.migration_state = mgs;
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
/*
|
||||
* Get the first slot. They are reverse sorted by base_gfn, so
|
||||
* the first slot is also the one at the end of the address
|
||||
* space. We have verified above that at least one slot is
|
||||
* present.
|
||||
*/
|
||||
ms = slots->memslots;
|
||||
/* round up so we only use full longs */
|
||||
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
|
||||
/* allocate enough bytes to store all the bits */
|
||||
mgs->pgste_bitmap = vmalloc(ram_pages / 8);
|
||||
if (!mgs->pgste_bitmap) {
|
||||
kfree(mgs);
|
||||
kvm->arch.migration_state = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mgs->bitmap_size = ram_pages;
|
||||
atomic64_set(&mgs->dirty_pages, ram_pages);
|
||||
/* mark all the pages in active slots as dirty */
|
||||
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
|
||||
ms = slots->memslots + slotnr;
|
||||
bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
|
||||
}
|
||||
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
|
||||
if (!kvm->arch.use_cmma) {
|
||||
kvm->arch.migration_mode = 1;
|
||||
return 0;
|
||||
}
|
||||
/* mark all the pages in active slots as dirty */
|
||||
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
|
||||
ms = slots->memslots + slotnr;
|
||||
/*
|
||||
* The second half of the bitmap is only used on x86,
|
||||
* and would be wasted otherwise, so we put it to good
|
||||
* use here to keep track of the state of the storage
|
||||
* attributes.
|
||||
*/
|
||||
memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
|
||||
ram_pages += ms->npages;
|
||||
}
|
||||
atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
|
||||
kvm->arch.migration_mode = 1;
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||
*/
|
||||
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_s390_migration_state *mgs;
|
||||
|
||||
/* migration mode already disabled */
|
||||
if (!kvm->arch.migration_state)
|
||||
if (!kvm->arch.migration_mode)
|
||||
return 0;
|
||||
mgs = kvm->arch.migration_state;
|
||||
kvm->arch.migration_state = NULL;
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
kvm->arch.migration_mode = 0;
|
||||
if (kvm->arch.use_cmma)
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
|
||||
/* We have to wait for the essa emulation to finish */
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
vfree(mgs->pgste_bitmap);
|
||||
}
|
||||
kfree(mgs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
||||
static int kvm_s390_vm_get_migration(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 mig = (kvm->arch.migration_state != NULL);
|
||||
u64 mig = kvm->arch.migration_mode;
|
||||
|
||||
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
|
||||
return -ENXIO;
|
||||
@@ -1652,6 +1626,134 @@ out:
|
||||
/* for consistency */
|
||||
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
|
||||
|
||||
/*
|
||||
* Similar to gfn_to_memslot, but returns the index of a memslot also when the
|
||||
* address falls in a hole. In that case the index of one of the memslots
|
||||
* bordering the hole is returned.
|
||||
*/
|
||||
static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
|
||||
{
|
||||
int start = 0, end = slots->used_slots;
|
||||
int slot = atomic_read(&slots->lru_slot);
|
||||
struct kvm_memory_slot *memslots = slots->memslots;
|
||||
|
||||
if (gfn >= memslots[slot].base_gfn &&
|
||||
gfn < memslots[slot].base_gfn + memslots[slot].npages)
|
||||
return slot;
|
||||
|
||||
while (start < end) {
|
||||
slot = start + (end - start) / 2;
|
||||
|
||||
if (gfn >= memslots[slot].base_gfn)
|
||||
end = slot;
|
||||
else
|
||||
start = slot + 1;
|
||||
}
|
||||
|
||||
if (gfn >= memslots[start].base_gfn &&
|
||||
gfn < memslots[start].base_gfn + memslots[start].npages) {
|
||||
atomic_set(&slots->lru_slot, start);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
|
||||
u8 *res, unsigned long bufsize)
|
||||
{
|
||||
unsigned long pgstev, hva, cur_gfn = args->start_gfn;
|
||||
|
||||
args->count = 0;
|
||||
while (args->count < bufsize) {
|
||||
hva = gfn_to_hva(kvm, cur_gfn);
|
||||
/*
|
||||
* We return an error if the first value was invalid, but we
|
||||
* return successfully if at least one value was copied.
|
||||
*/
|
||||
if (kvm_is_error_hva(hva))
|
||||
return args->count ? 0 : -EFAULT;
|
||||
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
|
||||
pgstev = 0;
|
||||
res[args->count++] = (pgstev >> 24) & 0x43;
|
||||
cur_gfn++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
|
||||
unsigned long cur_gfn)
|
||||
{
|
||||
int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
|
||||
struct kvm_memory_slot *ms = slots->memslots + slotidx;
|
||||
unsigned long ofs = cur_gfn - ms->base_gfn;
|
||||
|
||||
if (ms->base_gfn + ms->npages <= cur_gfn) {
|
||||
slotidx--;
|
||||
/* If we are above the highest slot, wrap around */
|
||||
if (slotidx < 0)
|
||||
slotidx = slots->used_slots - 1;
|
||||
|
||||
ms = slots->memslots + slotidx;
|
||||
ofs = 0;
|
||||
}
|
||||
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
|
||||
while ((slotidx > 0) && (ofs >= ms->npages)) {
|
||||
slotidx--;
|
||||
ms = slots->memslots + slotidx;
|
||||
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
|
||||
}
|
||||
return ms->base_gfn + ofs;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
|
||||
u8 *res, unsigned long bufsize)
|
||||
{
|
||||
unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memory_slot *ms;
|
||||
|
||||
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
|
||||
ms = gfn_to_memslot(kvm, cur_gfn);
|
||||
args->count = 0;
|
||||
args->start_gfn = cur_gfn;
|
||||
if (!ms)
|
||||
return 0;
|
||||
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
|
||||
mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
|
||||
|
||||
while (args->count < bufsize) {
|
||||
hva = gfn_to_hva(kvm, cur_gfn);
|
||||
if (kvm_is_error_hva(hva))
|
||||
return 0;
|
||||
/* Decrement only if we actually flipped the bit to 0 */
|
||||
if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
|
||||
atomic64_dec(&kvm->arch.cmma_dirty_pages);
|
||||
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
|
||||
pgstev = 0;
|
||||
/* Save the value */
|
||||
res[args->count++] = (pgstev >> 24) & 0x43;
|
||||
/* If the next bit is too far away, stop. */
|
||||
if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
|
||||
return 0;
|
||||
/* If we reached the previous "next", find the next one */
|
||||
if (cur_gfn == next_gfn)
|
||||
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
|
||||
/* Reached the end of memory or of the buffer, stop */
|
||||
if ((next_gfn >= mem_end) ||
|
||||
(next_gfn - args->start_gfn >= bufsize))
|
||||
return 0;
|
||||
cur_gfn++;
|
||||
/* Reached the end of the current memslot, take the next one. */
|
||||
if (cur_gfn - ms->base_gfn >= ms->npages) {
|
||||
ms = gfn_to_memslot(kvm, cur_gfn);
|
||||
if (!ms)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function searches for the next page with dirty CMMA attributes, and
|
||||
* saves the attributes in the buffer up to either the end of the buffer or
|
||||
@@ -1663,22 +1765,18 @@ out:
|
||||
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
||||
struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
struct kvm_s390_migration_state *s = kvm->arch.migration_state;
|
||||
unsigned long bufsize, hva, pgstev, i, next, cur;
|
||||
int srcu_idx, peek, r = 0, rr;
|
||||
u8 *res;
|
||||
unsigned long bufsize;
|
||||
int srcu_idx, peek, ret;
|
||||
u8 *values;
|
||||
|
||||
cur = args->start_gfn;
|
||||
i = next = pgstev = 0;
|
||||
|
||||
if (unlikely(!kvm->arch.use_cmma))
|
||||
if (!kvm->arch.use_cmma)
|
||||
return -ENXIO;
|
||||
/* Invalid/unsupported flags were specified */
|
||||
if (args->flags & ~KVM_S390_CMMA_PEEK)
|
||||
return -EINVAL;
|
||||
/* Migration mode query, and we are not doing a migration */
|
||||
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
|
||||
if (!peek && !s)
|
||||
if (!peek && !kvm->arch.migration_mode)
|
||||
return -EINVAL;
|
||||
/* CMMA is disabled or was not used, or the buffer has length zero */
|
||||
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
|
||||
@@ -1686,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!peek) {
|
||||
/* We are not peeking, and there are no dirty pages */
|
||||
if (!atomic64_read(&s->dirty_pages)) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
|
||||
args->start_gfn);
|
||||
if (cur >= s->bitmap_size) /* nothing found, loop back */
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
|
||||
if (cur >= s->bitmap_size) { /* again! (very unlikely) */
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
|
||||
/* We are not peeking, and there are no dirty pages */
|
||||
if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
res = vmalloc(bufsize);
|
||||
if (!res)
|
||||
values = vmalloc(bufsize);
|
||||
if (!values)
|
||||
return -ENOMEM;
|
||||
|
||||
args->start_gfn = cur;
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
while (i < bufsize) {
|
||||
hva = gfn_to_hva(kvm, cur);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
/* decrement only if we actually flipped the bit to 0 */
|
||||
if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
|
||||
atomic64_dec(&s->dirty_pages);
|
||||
r = get_pgste(kvm->mm, hva, &pgstev);
|
||||
if (r < 0)
|
||||
pgstev = 0;
|
||||
/* save the value */
|
||||
res[i++] = (pgstev >> 24) & 0x43;
|
||||
/*
|
||||
* if the next bit is too far away, stop.
|
||||
* if we reached the previous "next", find the next one
|
||||
*/
|
||||
if (!peek) {
|
||||
if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
|
||||
break;
|
||||
if (cur == next)
|
||||
next = find_next_bit(s->pgste_bitmap,
|
||||
s->bitmap_size, cur + 1);
|
||||
/* reached the end of the bitmap or of the buffer, stop */
|
||||
if ((next >= s->bitmap_size) ||
|
||||
(next >= args->start_gfn + bufsize))
|
||||
break;
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
if (peek)
|
||||
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
|
||||
else
|
||||
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
args->count = i;
|
||||
args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
|
||||
|
||||
rr = copy_to_user((void __user *)args->values, res, args->count);
|
||||
if (rr)
|
||||
r = -EFAULT;
|
||||
if (kvm->arch.migration_mode)
|
||||
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
|
||||
else
|
||||
args->remaining = 0;
|
||||
|
||||
vfree(res);
|
||||
return r;
|
||||
if (copy_to_user((void __user *)args->values, values, args->count))
|
||||
ret = -EFAULT;
|
||||
|
||||
vfree(values);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2192,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
kvm_s390_destroy_adapters(kvm);
|
||||
kvm_s390_clear_float_irqs(kvm);
|
||||
kvm_s390_vsie_destroy(kvm);
|
||||
if (kvm->arch.migration_state) {
|
||||
vfree(kvm->arch.migration_state->pgste_bitmap);
|
||||
kfree(kvm->arch.migration_state);
|
||||
}
|
||||
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
|
||||
}
|
||||
|
||||
@@ -2353,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
|
||||
if (test_kvm_facility(vcpu->kvm, 133))
|
||||
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
|
||||
if (test_kvm_facility(vcpu->kvm, 156))
|
||||
vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
|
||||
/* fprs can be synchronized via vrs, even if the guest has no vx. With
|
||||
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
|
||||
*/
|
||||
@@ -2602,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
if (test_kvm_facility(vcpu->kvm, 139))
|
||||
vcpu->arch.sie_block->ecd |= ECD_MEF;
|
||||
|
||||
if (test_kvm_facility(vcpu->kvm, 156))
|
||||
vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
|
||||
if (vcpu->arch.sie_block->gd) {
|
||||
vcpu->arch.sie_block->eca |= ECA_AIV;
|
||||
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
|
||||
@@ -3520,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
/* SIE will load etoken directly from SDNX and therefore kvm_run */
|
||||
|
||||
kvm_run->kvm_dirty_regs = 0;
|
||||
}
|
||||
@@ -3559,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
__ctl_clear_bit(2, 4);
|
||||
vcpu->arch.host_gscb = NULL;
|
||||
}
|
||||
|
||||
/* SIE will save etoken directly into SDNX and therefore kvm_run */
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
|
||||
@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
||||
int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc;
|
||||
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
|
||||
|
||||
trace_kvm_s390_skey_related_inst(vcpu);
|
||||
/* Already enabled? */
|
||||
if (vcpu->kvm->arch.use_skf &&
|
||||
!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
|
||||
!kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
|
||||
if (vcpu->arch.skey_enabled)
|
||||
return 0;
|
||||
|
||||
rc = s390_enable_skey();
|
||||
@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
|
||||
if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
|
||||
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
|
||||
if (!vcpu->kvm->arch.use_skf)
|
||||
sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
|
||||
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
|
||||
else
|
||||
sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
|
||||
vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
|
||||
vcpu->arch.skey_enabled = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
|
||||
if (clear_user((void __user *)vmaddr, PAGE_SIZE))
|
||||
if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
}
|
||||
|
||||
@@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
/*
|
||||
* Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
|
||||
*/
|
||||
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
{
|
||||
struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
|
||||
int r1, r2, nappended, entries;
|
||||
unsigned long gfn, hva, res, pgstev, ptev;
|
||||
unsigned long *cbrlo;
|
||||
@@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
cbrlo[entries] = gfn << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
if (orc && gfn < ms->bitmap_size) {
|
||||
/* increment only if we are really flipping the bit to 1 */
|
||||
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
|
||||
atomic64_inc(&ms->dirty_pages);
|
||||
if (orc) {
|
||||
struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
|
||||
|
||||
/* Increment only if we are really flipping the bit */
|
||||
if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
|
||||
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
|
||||
}
|
||||
|
||||
return nappended;
|
||||
@@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
: ESSA_SET_STABLE_IF_RESIDENT))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
if (likely(!vcpu->kvm->arch.migration_state)) {
|
||||
if (!vcpu->kvm->arch.migration_mode) {
|
||||
/*
|
||||
* CMMA is enabled in the KVM settings, but is disabled in
|
||||
* the SIE block and in the mm_context, and we are not doing
|
||||
@@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
/* Retry the ESSA instruction */
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
} else {
|
||||
/* Account for the possible extra cbrl entry */
|
||||
i = do_essa(vcpu, orc);
|
||||
int srcu_idx;
|
||||
|
||||
down_read(&vcpu->kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
i = __do_essa(vcpu, orc);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
|
||||
up_read(&vcpu->kvm->mm->mmap_sem);
|
||||
if (i < 0)
|
||||
return i;
|
||||
/* Account for the possible extra cbrl entry */
|
||||
entries += i;
|
||||
}
|
||||
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
/*
|
||||
* kvm nested virtualization support for s390x
|
||||
*
|
||||
* Copyright IBM Corp. 2016
|
||||
* Copyright IBM Corp. 2016, 2018
|
||||
*
|
||||
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
*/
|
||||
@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
if (test_kvm_facility(vcpu->kvm, 139))
|
||||
scb_s->ecd |= scb_o->ecd & ECD_MEF;
|
||||
|
||||
/* etoken */
|
||||
if (test_kvm_facility(vcpu->kvm, 156))
|
||||
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
|
||||
|
||||
prepare_ibc(vcpu, vsie_page);
|
||||
rc = shadow_crycb(vcpu, vsie_page);
|
||||
out:
|
||||
@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
vsie_page->riccbd_gpa = gpa;
|
||||
scb_s->riccbd = hpa;
|
||||
}
|
||||
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
|
||||
if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
|
||||
(scb_s->ecd & ECD_ETOKENF)) {
|
||||
unsigned long sdnxc;
|
||||
|
||||
gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
|
||||
@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
* - < 0 if an error occurred
|
||||
*/
|
||||
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
__releases(vcpu->kvm->srcu)
|
||||
__acquires(vcpu->kvm->srcu)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
* numbering scheme from the Princples of Operations: most significant bit
|
||||
* has bit number 0.
|
||||
*
|
||||
* Copyright IBM Corp. 2015
|
||||
* Copyright IBM Corp. 2015, 2018
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = {
|
||||
|
||||
.name = "FACILITIES_KVM_CPUMODEL",
|
||||
.bits = (int[]){
|
||||
156, /* etoken facility */
|
||||
-1 /* END */
|
||||
}
|
||||
},
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
obj-y := hv_init.o mmu.o
|
||||
obj-y := hv_init.o mmu.o nested.o
|
||||
obj-$(CONFIG_X86_64) += hv_apic.o
|
||||
|
||||
56
arch/x86/hyperv/nested.c
Normal file
56
arch/x86/hyperv/nested.c
Normal file
@@ -0,0 +1,56 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Hyper-V nested virtualization code.
|
||||
*
|
||||
* Copyright (C) 2018, Microsoft, Inc.
|
||||
*
|
||||
* Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#include <asm/trace/hyperv.h>
|
||||
|
||||
int hyperv_flush_guest_mapping(u64 as)
|
||||
{
|
||||
struct hv_guest_mapping_flush **flush_pcpu;
|
||||
struct hv_guest_mapping_flush *flush;
|
||||
u64 status;
|
||||
unsigned long flags;
|
||||
int ret = -ENOTSUPP;
|
||||
|
||||
if (!hv_hypercall_pg)
|
||||
goto fault;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
flush_pcpu = (struct hv_guest_mapping_flush **)
|
||||
this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
flush = *flush_pcpu;
|
||||
|
||||
if (unlikely(!flush)) {
|
||||
local_irq_restore(flags);
|
||||
goto fault;
|
||||
}
|
||||
|
||||
flush->address_space = as;
|
||||
flush->flags = 0;
|
||||
|
||||
status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
|
||||
flush, NULL);
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (!(status & HV_HYPERCALL_RESULT_MASK))
|
||||
ret = 0;
|
||||
|
||||
fault:
|
||||
trace_hyperv_nested_flush_guest_mapping(as, ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
|
||||
@@ -310,6 +310,7 @@ struct ms_hyperv_tsc_page {
|
||||
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
|
||||
|
||||
/* Nested features (CPUID 0x4000000A) EAX */
|
||||
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
||||
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
||||
|
||||
struct hv_reenlightenment_control {
|
||||
@@ -351,6 +352,7 @@ struct hv_tsc_emulation_status {
|
||||
#define HVCALL_SEND_IPI_EX 0x0015
|
||||
#define HVCALL_POST_MESSAGE 0x005c
|
||||
#define HVCALL_SIGNAL_EVENT 0x005d
|
||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
|
||||
|
||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
|
||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
|
||||
@@ -742,6 +744,12 @@ struct ipi_arg_ex {
|
||||
struct hv_vpset vp_set;
|
||||
};
|
||||
|
||||
/* HvFlushGuestPhysicalAddressSpace hypercalls */
|
||||
struct hv_guest_mapping_flush {
|
||||
u64 address_space;
|
||||
u64 flags;
|
||||
};
|
||||
|
||||
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
|
||||
struct hv_tlb_flush {
|
||||
u64 address_space;
|
||||
|
||||
@@ -55,6 +55,7 @@
|
||||
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
|
||||
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
|
||||
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
|
||||
#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
|
||||
#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
|
||||
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
|
||||
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
|
||||
@@ -76,13 +77,13 @@
|
||||
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
|
||||
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
|
||||
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
|
||||
#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
|
||||
|
||||
#define CR0_RESERVED_BITS \
|
||||
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
|
||||
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
|
||||
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
|
||||
|
||||
#define CR3_PCID_INVD BIT_64(63)
|
||||
#define CR4_RESERVED_BITS \
|
||||
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
|
||||
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
|
||||
@@ -326,6 +327,16 @@ struct rsvd_bits_validate {
|
||||
u64 bad_mt_xwr;
|
||||
};
|
||||
|
||||
struct kvm_mmu_root_info {
|
||||
gpa_t cr3;
|
||||
hpa_t hpa;
|
||||
};
|
||||
|
||||
#define KVM_MMU_ROOT_INFO_INVALID \
|
||||
((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
|
||||
|
||||
#define KVM_MMU_NUM_PREV_ROOTS 3
|
||||
|
||||
/*
|
||||
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
|
||||
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
|
||||
@@ -345,7 +356,7 @@ struct kvm_mmu {
|
||||
struct x86_exception *exception);
|
||||
int (*sync_page)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp);
|
||||
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
|
||||
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
|
||||
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte);
|
||||
hpa_t root_hpa;
|
||||
@@ -354,6 +365,7 @@ struct kvm_mmu {
|
||||
u8 shadow_root_level;
|
||||
u8 ept_ad;
|
||||
bool direct_map;
|
||||
struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
|
||||
|
||||
/*
|
||||
* Bitmap; bit set = permission fault
|
||||
@@ -978,6 +990,15 @@ struct kvm_x86_ops {
|
||||
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
|
||||
void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
|
||||
int (*tlb_remote_flush)(struct kvm *kvm);
|
||||
|
||||
/*
|
||||
* Flush any TLB entries associated with the given GVA.
|
||||
* Does not need to flush GPA->HPA mappings.
|
||||
* Can potentially get non-canonical addresses through INVLPGs, which
|
||||
* the implementation may choose to ignore if appropriate.
|
||||
*/
|
||||
void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
|
||||
|
||||
void (*run)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_exit)(struct kvm_vcpu *vcpu);
|
||||
@@ -1090,6 +1111,14 @@ struct kvm_x86_ops {
|
||||
|
||||
void (*setup_mce)(struct kvm_vcpu *vcpu);
|
||||
|
||||
int (*get_nested_state)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_state __user *user_kvm_nested_state,
|
||||
unsigned user_data_size);
|
||||
int (*set_nested_state)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_state __user *user_kvm_nested_state,
|
||||
struct kvm_nested_state *kvm_state);
|
||||
void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
|
||||
|
||||
int (*smi_allowed)(struct kvm_vcpu *vcpu);
|
||||
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
|
||||
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
|
||||
@@ -1122,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
|
||||
return kvm_x86_ops->vm_free(kvm);
|
||||
}
|
||||
|
||||
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
{
|
||||
if (kvm_x86_ops->tlb_remote_flush &&
|
||||
!kvm_x86_ops->tlb_remote_flush(kvm))
|
||||
return 0;
|
||||
else
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
int kvm_mmu_module_init(void);
|
||||
void kvm_mmu_module_exit(void);
|
||||
|
||||
@@ -1273,6 +1312,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
|
||||
return !!(*irq_state);
|
||||
}
|
||||
|
||||
#define KVM_MMU_ROOT_CURRENT BIT(0)
|
||||
#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
|
||||
#define KVM_MMU_ROOTS_ALL (~0UL)
|
||||
|
||||
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
|
||||
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
|
||||
|
||||
@@ -1284,7 +1327,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
|
||||
int kvm_mmu_load(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
|
||||
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
|
||||
struct x86_exception *exception);
|
||||
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
@@ -1303,7 +1346,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
|
||||
void *insn, int insn_len);
|
||||
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
|
||||
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
|
||||
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
|
||||
|
||||
void kvm_enable_tdp(void);
|
||||
void kvm_disable_tdp(void);
|
||||
@@ -1418,6 +1462,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
||||
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
|
||||
unsigned long ipi_bitmap_high, int min,
|
||||
unsigned long icr, int op_64_bit);
|
||||
|
||||
u64 kvm_get_arch_capabilities(void);
|
||||
void kvm_define_shared_msr(unsigned index, u32 msr);
|
||||
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
||||
|
||||
@@ -347,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
|
||||
void set_hv_tscchange_cb(void (*cb)(void));
|
||||
void clear_hv_tscchange_cb(void);
|
||||
void hyperv_stop_tsc_emulation(void);
|
||||
int hyperv_flush_guest_mapping(u64 as);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void hv_apic_init(void);
|
||||
@@ -366,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
|
||||
@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
|
||||
__entry->addr, __entry->end)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hyperv_nested_flush_guest_mapping,
|
||||
TP_PROTO(u64 as, int ret),
|
||||
TP_ARGS(as, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, as)
|
||||
__field(int, ret)
|
||||
),
|
||||
TP_fast_assign(__entry->as = as;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hyperv_send_ipi_mask,
|
||||
TP_PROTO(const struct cpumask *cpus,
|
||||
int vector),
|
||||
|
||||
@@ -378,4 +378,41 @@ struct kvm_sync_regs {
|
||||
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
|
||||
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
|
||||
|
||||
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
|
||||
|
||||
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
|
||||
|
||||
struct kvm_vmx_nested_state {
|
||||
__u64 vmxon_pa;
|
||||
__u64 vmcs_pa;
|
||||
|
||||
struct {
|
||||
__u16 flags;
|
||||
} smm;
|
||||
};
|
||||
|
||||
/* for KVM_CAP_NESTED_STATE */
|
||||
struct kvm_nested_state {
|
||||
/* KVM_STATE_* flags */
|
||||
__u16 flags;
|
||||
|
||||
/* 0 for VMX, 1 for SVM. */
|
||||
__u16 format;
|
||||
|
||||
/* 128 for SVM, 128 + VMCS size for VMX. */
|
||||
__u32 size;
|
||||
|
||||
union {
|
||||
/* VMXON, VMCS */
|
||||
struct kvm_vmx_nested_state vmx;
|
||||
|
||||
/* Pad the header to 128 bytes. */
|
||||
__u8 pad[120];
|
||||
};
|
||||
|
||||
__u8 data[0];
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#define KVM_FEATURE_PV_UNHALT 7
|
||||
#define KVM_FEATURE_PV_TLB_FLUSH 9
|
||||
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
|
||||
#define KVM_FEATURE_PV_SEND_IPI 11
|
||||
|
||||
#define KVM_HINTS_REALTIME 0
|
||||
|
||||
|
||||
@@ -444,6 +444,98 @@ static void __init sev_map_percpu_data(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
|
||||
|
||||
static void __send_ipi_mask(const struct cpumask *mask, int vector)
|
||||
{
|
||||
unsigned long flags;
|
||||
int cpu, apic_id, icr;
|
||||
int min = 0, max = 0;
|
||||
#ifdef CONFIG_X86_64
|
||||
__uint128_t ipi_bitmap = 0;
|
||||
#else
|
||||
u64 ipi_bitmap = 0;
|
||||
#endif
|
||||
|
||||
if (cpumask_empty(mask))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
switch (vector) {
|
||||
default:
|
||||
icr = APIC_DM_FIXED | vector;
|
||||
break;
|
||||
case NMI_VECTOR:
|
||||
icr = APIC_DM_NMI;
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, mask) {
|
||||
apic_id = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
if (!ipi_bitmap) {
|
||||
min = max = apic_id;
|
||||
} else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
|
||||
ipi_bitmap <<= min - apic_id;
|
||||
min = apic_id;
|
||||
} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
|
||||
max = apic_id < max ? max : apic_id;
|
||||
} else {
|
||||
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
|
||||
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
|
||||
min = max = apic_id;
|
||||
ipi_bitmap = 0;
|
||||
}
|
||||
__set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
|
||||
}
|
||||
|
||||
if (ipi_bitmap) {
|
||||
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
|
||||
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
|
||||
{
|
||||
__send_ipi_mask(mask, vector);
|
||||
}
|
||||
|
||||
static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
|
||||
{
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
struct cpumask new_mask;
|
||||
const struct cpumask *local_mask;
|
||||
|
||||
cpumask_copy(&new_mask, mask);
|
||||
cpumask_clear_cpu(this_cpu, &new_mask);
|
||||
local_mask = &new_mask;
|
||||
__send_ipi_mask(local_mask, vector);
|
||||
}
|
||||
|
||||
static void kvm_send_ipi_allbutself(int vector)
|
||||
{
|
||||
kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
|
||||
}
|
||||
|
||||
static void kvm_send_ipi_all(int vector)
|
||||
{
|
||||
__send_ipi_mask(cpu_online_mask, vector);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the IPI entry points
|
||||
*/
|
||||
static void kvm_setup_pv_ipi(void)
|
||||
{
|
||||
apic->send_IPI_mask = kvm_send_ipi_mask;
|
||||
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
|
||||
apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
|
||||
apic->send_IPI_all = kvm_send_ipi_all;
|
||||
pr_info("KVM setup pv IPIs\n");
|
||||
}
|
||||
|
||||
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
native_smp_prepare_cpus(max_cpus);
|
||||
@@ -611,13 +703,27 @@ static uint32_t __init kvm_detect(void)
|
||||
return kvm_cpuid_base();
|
||||
}
|
||||
|
||||
static void __init kvm_apic_init(void)
|
||||
{
|
||||
#if defined(CONFIG_SMP)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
|
||||
kvm_setup_pv_ipi();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init kvm_init_platform(void)
|
||||
{
|
||||
kvmclock_init();
|
||||
x86_platform.apic_post_init = kvm_apic_init;
|
||||
}
|
||||
|
||||
const __initconst struct hypervisor_x86 x86_hyper_kvm = {
|
||||
.name = "KVM",
|
||||
.detect = kvm_detect,
|
||||
.type = X86_HYPER_KVM,
|
||||
.init.init_platform = kvmclock_init,
|
||||
.init.guest_late_init = kvm_guest_init,
|
||||
.init.x2apic_available = kvm_para_available,
|
||||
.init.init_platform = kvm_init_platform,
|
||||
};
|
||||
|
||||
static __init int activate_jump_labels(void)
|
||||
@@ -736,6 +842,10 @@ void __init kvm_spinlock_init(void)
|
||||
if (kvm_para_has_hint(KVM_HINTS_REALTIME))
|
||||
return;
|
||||
|
||||
/* Don't use the pvqspinlock code if there is only 1 vCPU. */
|
||||
if (num_possible_cpus() == 1)
|
||||
return;
|
||||
|
||||
__pv_init_lock_hash();
|
||||
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
|
||||
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
|
||||
|
||||
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
|
||||
(1 << KVM_FEATURE_PV_UNHALT) |
|
||||
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
|
||||
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
|
||||
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
|
||||
(1 << KVM_FEATURE_PV_SEND_IPI);
|
||||
|
||||
if (sched_info_on())
|
||||
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
|
||||
|
||||
@@ -4191,7 +4191,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
|
||||
maxphyaddr = 36;
|
||||
rsvd = rsvd_bits(maxphyaddr, 63);
|
||||
if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
|
||||
rsvd &= ~CR3_PCID_INVD;
|
||||
rsvd &= ~X86_CR3_PCID_NOFLUSH;
|
||||
}
|
||||
|
||||
if (new_val & rsvd)
|
||||
|
||||
@@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
int ret;
|
||||
|
||||
if (!synic->active)
|
||||
if (!synic->active && !host)
|
||||
return 1;
|
||||
|
||||
trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
|
||||
@@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
|
||||
static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
|
||||
bool host)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!synic->active)
|
||||
if (!synic->active && !host)
|
||||
return 1;
|
||||
|
||||
ret = 0;
|
||||
@@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
hv->hv_tsc_emulation_status = data;
|
||||
break;
|
||||
case HV_X64_MSR_TIME_REF_COUNT:
|
||||
/* read-only, but still ignore it if host-initiated */
|
||||
if (!host)
|
||||
return 1;
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
|
||||
msr, data);
|
||||
@@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
|
||||
data, host);
|
||||
}
|
||||
case HV_X64_MSR_TSC_FREQUENCY:
|
||||
case HV_X64_MSR_APIC_FREQUENCY:
|
||||
/* read-only, but still ignore it if host-initiated */
|
||||
if (!host)
|
||||
return 1;
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
|
||||
msr, data);
|
||||
@@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
bool host)
|
||||
{
|
||||
u64 data = 0;
|
||||
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
|
||||
@@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
case HV_X64_MSR_SIMP:
|
||||
case HV_X64_MSR_EOM:
|
||||
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
||||
return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
|
||||
return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
|
||||
case HV_X64_MSR_STIMER0_CONFIG:
|
||||
case HV_X64_MSR_STIMER1_CONFIG:
|
||||
case HV_X64_MSR_STIMER2_CONFIG:
|
||||
@@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
return kvm_hv_set_msr(vcpu, msr, data, host);
|
||||
}
|
||||
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
|
||||
{
|
||||
if (kvm_hv_msr_partition_wide(msr)) {
|
||||
int r;
|
||||
@@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
return r;
|
||||
} else
|
||||
return kvm_hv_get_msr(vcpu, msr, pdata);
|
||||
return kvm_hv_get_msr(vcpu, msr, pdata, host);
|
||||
}
|
||||
|
||||
static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
|
||||
|
||||
@@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
|
||||
}
|
||||
|
||||
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host);
|
||||
|
||||
bool kvm_hv_hypercall_enabled(struct kvm *kvm);
|
||||
int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
||||
irq->level, irq->trig_mode, dest_map);
|
||||
}
|
||||
|
||||
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
|
||||
unsigned long ipi_bitmap_high, int min,
|
||||
unsigned long icr, int op_64_bit)
|
||||
{
|
||||
int i;
|
||||
struct kvm_apic_map *map;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_lapic_irq irq = {0};
|
||||
int cluster_size = op_64_bit ? 64 : 32;
|
||||
int count = 0;
|
||||
|
||||
irq.vector = icr & APIC_VECTOR_MASK;
|
||||
irq.delivery_mode = icr & APIC_MODE_MASK;
|
||||
irq.level = (icr & APIC_INT_ASSERT) != 0;
|
||||
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
|
||||
|
||||
if (icr & APIC_DEST_MASK)
|
||||
return -KVM_EINVAL;
|
||||
if (icr & APIC_SHORT_MASK)
|
||||
return -KVM_EINVAL;
|
||||
|
||||
rcu_read_lock();
|
||||
map = rcu_dereference(kvm->arch.apic_map);
|
||||
|
||||
/* Bits above cluster_size are masked in the caller. */
|
||||
for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
|
||||
vcpu = map->phys_map[min + i]->vcpu;
|
||||
count += kvm_apic_set_irq(vcpu, &irq, NULL);
|
||||
}
|
||||
|
||||
min += cluster_size;
|
||||
for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
|
||||
vcpu = map->phys_map[min + i]->vcpu;
|
||||
count += kvm_apic_set_irq(vcpu, &irq, NULL);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return count;
|
||||
}
|
||||
|
||||
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
|
||||
{
|
||||
|
||||
|
||||
@@ -178,7 +178,24 @@ struct kvm_shadow_walk_iterator {
|
||||
unsigned index;
|
||||
};
|
||||
|
||||
#define for_each_shadow_entry(_vcpu, _addr, _walker) \
|
||||
static const union kvm_mmu_page_role mmu_base_role_mask = {
|
||||
.cr0_wp = 1,
|
||||
.cr4_pae = 1,
|
||||
.nxe = 1,
|
||||
.smep_andnot_wp = 1,
|
||||
.smap_andnot_wp = 1,
|
||||
.smm = 1,
|
||||
.guest_mode = 1,
|
||||
.ad_disabled = 1,
|
||||
};
|
||||
|
||||
#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker) \
|
||||
for (shadow_walk_init_using_root(&(_walker), (_vcpu), \
|
||||
(_root), (_addr)); \
|
||||
shadow_walk_okay(&(_walker)); \
|
||||
shadow_walk_next(&(_walker)))
|
||||
|
||||
#define for_each_shadow_entry(_vcpu, _addr, _walker) \
|
||||
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
|
||||
shadow_walk_okay(&(_walker)); \
|
||||
shadow_walk_next(&(_walker)))
|
||||
@@ -221,7 +238,20 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
|
||||
PT64_EPT_EXECUTABLE_MASK;
|
||||
static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
|
||||
|
||||
/*
|
||||
* This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
|
||||
* to guard against L1TF attacks.
|
||||
*/
|
||||
static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
|
||||
|
||||
/*
|
||||
* The number of high-order 1 bits to use in the mask above.
|
||||
*/
|
||||
static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
|
||||
|
||||
static void mmu_spte_set(u64 *sptep, u64 spte);
|
||||
static union kvm_mmu_page_role
|
||||
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
|
||||
{
|
||||
@@ -308,9 +338,13 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
{
|
||||
unsigned int gen = kvm_current_mmio_generation(vcpu);
|
||||
u64 mask = generation_mmio_spte_mask(gen);
|
||||
u64 gpa = gfn << PAGE_SHIFT;
|
||||
|
||||
access &= ACC_WRITE_MASK | ACC_USER_MASK;
|
||||
mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
|
||||
mask |= shadow_mmio_value | access;
|
||||
mask |= gpa | shadow_nonpresent_or_rsvd_mask;
|
||||
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
|
||||
<< shadow_nonpresent_or_rsvd_mask_len;
|
||||
|
||||
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
||||
mmu_spte_set(sptep, mask);
|
||||
@@ -323,8 +357,14 @@ static bool is_mmio_spte(u64 spte)
|
||||
|
||||
static gfn_t get_mmio_spte_gfn(u64 spte)
|
||||
{
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
|
||||
return (spte & ~mask) >> PAGE_SHIFT;
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
|
||||
shadow_nonpresent_or_rsvd_mask;
|
||||
u64 gpa = spte & ~mask;
|
||||
|
||||
gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
|
||||
& shadow_nonpresent_or_rsvd_mask;
|
||||
|
||||
return gpa >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static unsigned get_mmio_spte_access(u64 spte)
|
||||
@@ -381,7 +421,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||
|
||||
static void kvm_mmu_clear_all_pte_masks(void)
|
||||
static void kvm_mmu_reset_all_pte_masks(void)
|
||||
{
|
||||
shadow_user_mask = 0;
|
||||
shadow_accessed_mask = 0;
|
||||
@@ -391,6 +431,18 @@ static void kvm_mmu_clear_all_pte_masks(void)
|
||||
shadow_mmio_mask = 0;
|
||||
shadow_present_mask = 0;
|
||||
shadow_acc_track_mask = 0;
|
||||
|
||||
/*
|
||||
* If the CPU has 46 or less physical address bits, then set an
|
||||
* appropriate mask to guard against L1TF attacks. Otherwise, it is
|
||||
* assumed that the CPU is not vulnerable to L1TF.
|
||||
*/
|
||||
if (boot_cpu_data.x86_phys_bits <
|
||||
52 - shadow_nonpresent_or_rsvd_mask_len)
|
||||
shadow_nonpresent_or_rsvd_mask =
|
||||
rsvd_bits(boot_cpu_data.x86_phys_bits -
|
||||
shadow_nonpresent_or_rsvd_mask_len,
|
||||
boot_cpu_data.x86_phys_bits - 1);
|
||||
}
|
||||
|
||||
static int is_cpuid_PSE36(void)
|
||||
@@ -1986,7 +2038,7 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -2117,12 +2169,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list)
|
||||
{
|
||||
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
|
||||
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
|
||||
if (sp->role.cr4_pae != !!is_pae(vcpu)
|
||||
|| vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
|
||||
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
|
||||
return false;
|
||||
}
|
||||
@@ -2392,11 +2440,12 @@ out:
|
||||
return sp;
|
||||
}
|
||||
|
||||
static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
|
||||
struct kvm_vcpu *vcpu, u64 addr)
|
||||
static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator,
|
||||
struct kvm_vcpu *vcpu, hpa_t root,
|
||||
u64 addr)
|
||||
{
|
||||
iterator->addr = addr;
|
||||
iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
|
||||
iterator->shadow_addr = root;
|
||||
iterator->level = vcpu->arch.mmu.shadow_root_level;
|
||||
|
||||
if (iterator->level == PT64_ROOT_4LEVEL &&
|
||||
@@ -2405,6 +2454,12 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
|
||||
--iterator->level;
|
||||
|
||||
if (iterator->level == PT32E_ROOT_LEVEL) {
|
||||
/*
|
||||
* prev_root is currently only used for 64-bit hosts. So only
|
||||
* the active root_hpa is valid here.
|
||||
*/
|
||||
BUG_ON(root != vcpu->arch.mmu.root_hpa);
|
||||
|
||||
iterator->shadow_addr
|
||||
= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
|
||||
iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
|
||||
@@ -2414,6 +2469,13 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
|
||||
}
|
||||
}
|
||||
|
||||
static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
|
||||
struct kvm_vcpu *vcpu, u64 addr)
|
||||
{
|
||||
shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
|
||||
addr);
|
||||
}
|
||||
|
||||
static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
|
||||
{
|
||||
if (iterator->level < PT_PAGE_TABLE_LEVEL)
|
||||
@@ -2702,6 +2764,45 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
kvm_unsync_page(vcpu, sp);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to ensure that the marking of unsync pages is visible
|
||||
* before the SPTE is updated to allow writes because
|
||||
* kvm_mmu_sync_roots() checks the unsync flags without holding
|
||||
* the MMU lock and so can race with this. If the SPTE was updated
|
||||
* before the page had been marked as unsync-ed, something like the
|
||||
* following could happen:
|
||||
*
|
||||
* CPU 1 CPU 2
|
||||
* ---------------------------------------------------------------------
|
||||
* 1.2 Host updates SPTE
|
||||
* to be writable
|
||||
* 2.1 Guest writes a GPTE for GVA X.
|
||||
* (GPTE being in the guest page table shadowed
|
||||
* by the SP from CPU 1.)
|
||||
* This reads SPTE during the page table walk.
|
||||
* Since SPTE.W is read as 1, there is no
|
||||
* fault.
|
||||
*
|
||||
* 2.2 Guest issues TLB flush.
|
||||
* That causes a VM Exit.
|
||||
*
|
||||
* 2.3 kvm_mmu_sync_pages() reads sp->unsync.
|
||||
* Since it is false, so it just returns.
|
||||
*
|
||||
* 2.4 Guest accesses GVA X.
|
||||
* Since the mapping in the SP was not updated,
|
||||
* so the old mapping for GVA X incorrectly
|
||||
* gets used.
|
||||
* 1.1 Host marks SP
|
||||
* as unsync
|
||||
* (sp->unsync = true)
|
||||
*
|
||||
* The write barrier below ensures that 1.1 happens before 1.2 and thus
|
||||
* the situation in 2.4 does not arise. The implicit barrier in 2.2
|
||||
* pairs with this write barrier.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2724,6 +2825,10 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Bits which may be returned by set_spte() */
|
||||
#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
|
||||
#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
|
||||
|
||||
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
unsigned pte_access, int level,
|
||||
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
|
||||
@@ -2800,7 +2905,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
|
||||
pgprintk("%s: found shadow page for %llx, marking ro\n",
|
||||
__func__, gfn);
|
||||
ret = 1;
|
||||
ret |= SET_SPTE_WRITE_PROTECTED_PT;
|
||||
pte_access &= ~ACC_WRITE_MASK;
|
||||
spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
|
||||
}
|
||||
@@ -2816,7 +2921,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
|
||||
set_pte:
|
||||
if (mmu_spte_update(sptep, spte))
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
|
||||
done:
|
||||
return ret;
|
||||
}
|
||||
@@ -2827,7 +2932,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
||||
{
|
||||
int was_rmapped = 0;
|
||||
int rmap_count;
|
||||
int set_spte_ret;
|
||||
int ret = RET_PF_RETRY;
|
||||
bool flush = false;
|
||||
|
||||
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
|
||||
*sptep, write_fault, gfn);
|
||||
@@ -2844,22 +2951,25 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
||||
|
||||
child = page_header(pte & PT64_BASE_ADDR_MASK);
|
||||
drop_parent_pte(child, sptep);
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
flush = true;
|
||||
} else if (pfn != spte_to_pfn(*sptep)) {
|
||||
pgprintk("hfn old %llx new %llx\n",
|
||||
spte_to_pfn(*sptep), pfn);
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
flush = true;
|
||||
} else
|
||||
was_rmapped = 1;
|
||||
}
|
||||
|
||||
if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
|
||||
true, host_writable)) {
|
||||
set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
|
||||
speculative, true, host_writable);
|
||||
if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
|
||||
if (write_fault)
|
||||
ret = RET_PF_EMULATE;
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
}
|
||||
if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
|
||||
if (unlikely(is_mmio_spte(*sptep)))
|
||||
ret = RET_PF_EMULATE;
|
||||
@@ -3358,26 +3468,47 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||
*root_hpa = INVALID_PAGE;
|
||||
}
|
||||
|
||||
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
|
||||
/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
|
||||
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
|
||||
{
|
||||
int i;
|
||||
LIST_HEAD(invalid_list);
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
|
||||
|
||||
if (!VALID_PAGE(mmu->root_hpa))
|
||||
return;
|
||||
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
|
||||
|
||||
/* Before acquiring the MMU lock, see if we need to do any real work. */
|
||||
if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
|
||||
VALID_PAGE(mmu->prev_roots[i].hpa))
|
||||
break;
|
||||
|
||||
if (i == KVM_MMU_NUM_PREV_ROOTS)
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
|
||||
(mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
|
||||
mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
|
||||
} else {
|
||||
for (i = 0; i < 4; ++i)
|
||||
if (mmu->pae_root[i] != 0)
|
||||
mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
|
||||
&invalid_list);
|
||||
mmu->root_hpa = INVALID_PAGE;
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
|
||||
mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
|
||||
&invalid_list);
|
||||
|
||||
if (free_active_root) {
|
||||
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
|
||||
(mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
|
||||
mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
|
||||
&invalid_list);
|
||||
} else {
|
||||
for (i = 0; i < 4; ++i)
|
||||
if (mmu->pae_root[i] != 0)
|
||||
mmu_free_root_page(vcpu->kvm,
|
||||
&mmu->pae_root[i],
|
||||
&invalid_list);
|
||||
mmu->root_hpa = INVALID_PAGE;
|
||||
}
|
||||
}
|
||||
|
||||
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
|
||||
@@ -3546,7 +3677,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
|
||||
return mmu_alloc_shadow_roots(vcpu);
|
||||
}
|
||||
|
||||
static void mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvm_mmu_page *sp;
|
||||
@@ -3558,14 +3689,39 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
sp = page_header(root);
|
||||
|
||||
/*
|
||||
* Even if another CPU was marking the SP as unsync-ed
|
||||
* simultaneously, any guest page table changes are not
|
||||
* guaranteed to be visible anyway until this VCPU issues a TLB
|
||||
* flush strictly after those changes are made. We only need to
|
||||
* ensure that the other CPU sets these flags before any actual
|
||||
* changes to the page tables are made. The comments in
|
||||
* mmu_need_write_protect() describe what could go wrong if this
|
||||
* requirement isn't satisfied.
|
||||
*/
|
||||
if (!smp_load_acquire(&sp->unsync) &&
|
||||
!smp_load_acquire(&sp->unsync_children))
|
||||
return;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
|
||||
mmu_sync_children(vcpu, sp);
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
@@ -3575,13 +3731,8 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
mmu_sync_children(vcpu, sp);
|
||||
}
|
||||
}
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
}
|
||||
|
||||
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
mmu_sync_roots(vcpu);
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
|
||||
@@ -3948,16 +4099,107 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
context->root_level = 0;
|
||||
context->shadow_root_level = PT32E_ROOT_LEVEL;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = true;
|
||||
context->nx = false;
|
||||
}
|
||||
|
||||
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* Find out if a previously cached root matching the new CR3/role is available.
|
||||
* The current root is also inserted into the cache.
|
||||
* If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
|
||||
* returned.
|
||||
* Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
|
||||
* false is returned. This root should now be freed by the caller.
|
||||
*/
|
||||
static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||
union kvm_mmu_page_role new_role)
|
||||
{
|
||||
kvm_mmu_free_roots(vcpu);
|
||||
uint i;
|
||||
struct kvm_mmu_root_info root;
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
|
||||
root.cr3 = mmu->get_cr3(vcpu);
|
||||
root.hpa = mmu->root_hpa;
|
||||
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
|
||||
swap(root, mmu->prev_roots[i]);
|
||||
|
||||
if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
|
||||
page_header(root.hpa) != NULL &&
|
||||
new_role.word == page_header(root.hpa)->role.word)
|
||||
break;
|
||||
}
|
||||
|
||||
mmu->root_hpa = root.hpa;
|
||||
|
||||
return i < KVM_MMU_NUM_PREV_ROOTS;
|
||||
}
|
||||
|
||||
static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||
union kvm_mmu_page_role new_role,
|
||||
bool skip_tlb_flush)
|
||||
{
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
|
||||
/*
|
||||
* For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
|
||||
* having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
|
||||
* later if necessary.
|
||||
*/
|
||||
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
|
||||
mmu->root_level >= PT64_ROOT_4LEVEL) {
|
||||
if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
|
||||
return false;
|
||||
|
||||
if (cached_root_available(vcpu, new_cr3, new_role)) {
|
||||
/*
|
||||
* It is possible that the cached previous root page is
|
||||
* obsolete because of a change in the MMU
|
||||
* generation number. However, that is accompanied by
|
||||
* KVM_REQ_MMU_RELOAD, which will free the root that we
|
||||
* have set here and allocate a new one.
|
||||
*/
|
||||
|
||||
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
|
||||
if (!skip_tlb_flush) {
|
||||
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
|
||||
kvm_x86_ops->tlb_flush(vcpu, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* The last MMIO access's GVA and GPA are cached in the
|
||||
* VCPU. When switching to a new CR3, that GVA->GPA
|
||||
* mapping may no longer be valid. So clear any cached
|
||||
* MMIO info even when we don't need to sync the shadow
|
||||
* page tables.
|
||||
*/
|
||||
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
|
||||
|
||||
__clear_sp_write_flooding_count(
|
||||
page_header(mmu->root_hpa));
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||
union kvm_mmu_page_role new_role,
|
||||
bool skip_tlb_flush)
|
||||
{
|
||||
if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
|
||||
kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT);
|
||||
}
|
||||
|
||||
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
|
||||
{
|
||||
__kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu),
|
||||
skip_tlb_flush);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3);
|
||||
|
||||
static unsigned long get_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_read_cr3(vcpu);
|
||||
@@ -4432,7 +4674,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
|
||||
context->invlpg = paging64_invlpg;
|
||||
context->update_pte = paging64_update_pte;
|
||||
context->shadow_root_level = level;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
}
|
||||
|
||||
@@ -4462,7 +4703,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
|
||||
context->invlpg = paging32_invlpg;
|
||||
context->update_pte = paging32_update_pte;
|
||||
context->shadow_root_level = PT32E_ROOT_LEVEL;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
}
|
||||
|
||||
@@ -4472,20 +4712,32 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
|
||||
paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
|
||||
}
|
||||
|
||||
static union kvm_mmu_page_role
|
||||
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
union kvm_mmu_page_role role = {0};
|
||||
|
||||
role.guest_mode = is_guest_mode(vcpu);
|
||||
role.smm = is_smm(vcpu);
|
||||
role.ad_disabled = (shadow_accessed_mask == 0);
|
||||
role.level = kvm_x86_ops->get_tdp_level(vcpu);
|
||||
role.direct = true;
|
||||
role.access = ACC_ALL;
|
||||
|
||||
return role;
|
||||
}
|
||||
|
||||
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
context->base_role.word = 0;
|
||||
context->base_role.guest_mode = is_guest_mode(vcpu);
|
||||
context->base_role.smm = is_smm(vcpu);
|
||||
context->base_role.ad_disabled = (shadow_accessed_mask == 0);
|
||||
context->base_role.word = mmu_base_role_mask.word &
|
||||
kvm_calc_tdp_mmu_root_page_role(vcpu).word;
|
||||
context->page_fault = tdp_page_fault;
|
||||
context->sync_page = nonpaging_sync_page;
|
||||
context->invlpg = nonpaging_invlpg;
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = true;
|
||||
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
|
||||
context->get_cr3 = get_cr3;
|
||||
@@ -4520,13 +4772,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
reset_tdp_shadow_zero_bits_mask(vcpu, context);
|
||||
}
|
||||
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
|
||||
static union kvm_mmu_page_role
|
||||
kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
union kvm_mmu_page_role role = {0};
|
||||
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
|
||||
bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
|
||||
role.nxe = is_nx(vcpu);
|
||||
role.cr4_pae = !!is_pae(vcpu);
|
||||
role.cr0_wp = is_write_protection(vcpu);
|
||||
role.smep_andnot_wp = smep && !is_write_protection(vcpu);
|
||||
role.smap_andnot_wp = smap && !is_write_protection(vcpu);
|
||||
role.guest_mode = is_guest_mode(vcpu);
|
||||
role.smm = is_smm(vcpu);
|
||||
role.direct = !is_paging(vcpu);
|
||||
role.access = ACC_ALL;
|
||||
|
||||
if (!is_long_mode(vcpu))
|
||||
role.level = PT32E_ROOT_LEVEL;
|
||||
else if (is_la57_mode(vcpu))
|
||||
role.level = PT64_ROOT_5LEVEL;
|
||||
else
|
||||
role.level = PT64_ROOT_4LEVEL;
|
||||
|
||||
return role;
|
||||
}
|
||||
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
|
||||
if (!is_paging(vcpu))
|
||||
nonpaging_init_context(vcpu, context);
|
||||
@@ -4537,26 +4812,34 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
|
||||
else
|
||||
paging32_init_context(vcpu, context);
|
||||
|
||||
context->base_role.nxe = is_nx(vcpu);
|
||||
context->base_role.cr4_pae = !!is_pae(vcpu);
|
||||
context->base_role.cr0_wp = is_write_protection(vcpu);
|
||||
context->base_role.smep_andnot_wp
|
||||
= smep && !is_write_protection(vcpu);
|
||||
context->base_role.smap_andnot_wp
|
||||
= smap && !is_write_protection(vcpu);
|
||||
context->base_role.guest_mode = is_guest_mode(vcpu);
|
||||
context->base_role.smm = is_smm(vcpu);
|
||||
context->base_role.word = mmu_base_role_mask.word &
|
||||
kvm_calc_shadow_mmu_root_page_role(vcpu).word;
|
||||
reset_shadow_zero_bits_mask(vcpu, context);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
|
||||
|
||||
static union kvm_mmu_page_role
|
||||
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
|
||||
{
|
||||
union kvm_mmu_page_role role = vcpu->arch.mmu.base_role;
|
||||
|
||||
role.level = PT64_ROOT_4LEVEL;
|
||||
role.direct = false;
|
||||
role.ad_disabled = !accessed_dirty;
|
||||
role.guest_mode = true;
|
||||
role.access = ACC_ALL;
|
||||
|
||||
return role;
|
||||
}
|
||||
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
bool accessed_dirty)
|
||||
bool accessed_dirty, gpa_t new_eptp)
|
||||
{
|
||||
struct kvm_mmu *context = &vcpu->arch.mmu;
|
||||
union kvm_mmu_page_role root_page_role =
|
||||
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty);
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
|
||||
|
||||
__kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false);
|
||||
context->shadow_root_level = PT64_ROOT_4LEVEL;
|
||||
|
||||
context->nx = true;
|
||||
@@ -4567,10 +4850,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
context->invlpg = ept_invlpg;
|
||||
context->update_pte = ept_update_pte;
|
||||
context->root_level = PT64_ROOT_4LEVEL;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
context->base_role.ad_disabled = !accessed_dirty;
|
||||
context->base_role.guest_mode = 1;
|
||||
context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
|
||||
update_permission_bitmask(vcpu, context, true);
|
||||
update_pkru_bitmask(vcpu, context, true);
|
||||
update_last_nonleaf_level(vcpu, context);
|
||||
@@ -4633,8 +4914,17 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
||||
update_last_nonleaf_level(vcpu, g_context);
|
||||
}
|
||||
|
||||
static void init_kvm_mmu(struct kvm_vcpu *vcpu)
|
||||
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
|
||||
{
|
||||
if (reset_roots) {
|
||||
uint i;
|
||||
|
||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
}
|
||||
|
||||
if (mmu_is_nested(vcpu))
|
||||
init_kvm_nested_mmu(vcpu);
|
||||
else if (tdp_enabled)
|
||||
@@ -4642,11 +4932,21 @@ static void init_kvm_mmu(struct kvm_vcpu *vcpu)
|
||||
else
|
||||
init_kvm_softmmu(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_init_mmu);
|
||||
|
||||
static union kvm_mmu_page_role
|
||||
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (tdp_enabled)
|
||||
return kvm_calc_tdp_mmu_root_page_role(vcpu);
|
||||
else
|
||||
return kvm_calc_shadow_mmu_root_page_role(vcpu);
|
||||
}
|
||||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mmu_unload(vcpu);
|
||||
init_kvm_mmu(vcpu);
|
||||
kvm_init_mmu(vcpu, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
|
||||
|
||||
@@ -4661,8 +4961,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
|
||||
kvm_mmu_sync_roots(vcpu);
|
||||
if (r)
|
||||
goto out;
|
||||
/* set_cr3() should ensure TLB has been flushed */
|
||||
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
|
||||
kvm_mmu_load_cr3(vcpu);
|
||||
kvm_x86_ops->tlb_flush(vcpu, true);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
@@ -4670,7 +4970,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
|
||||
|
||||
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mmu_free_roots(vcpu);
|
||||
kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL);
|
||||
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
|
||||
@@ -4823,16 +5123,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
u64 entry, gentry, *spte;
|
||||
int npte;
|
||||
bool remote_flush, local_flush;
|
||||
union kvm_mmu_page_role mask = { };
|
||||
|
||||
mask.cr0_wp = 1;
|
||||
mask.cr4_pae = 1;
|
||||
mask.nxe = 1;
|
||||
mask.smep_andnot_wp = 1;
|
||||
mask.smap_andnot_wp = 1;
|
||||
mask.smm = 1;
|
||||
mask.guest_mode = 1;
|
||||
mask.ad_disabled = 1;
|
||||
|
||||
/*
|
||||
* If we don't have indirect shadow pages, it means no page is
|
||||
@@ -4876,7 +5166,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
mmu_page_zap_pte(vcpu->kvm, sp, spte);
|
||||
if (gentry &&
|
||||
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
|
||||
& mask.word) && rmap_can_add(vcpu))
|
||||
& mmu_base_role_mask.word) && rmap_can_add(vcpu))
|
||||
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
|
||||
if (need_remote_flush(entry, *spte))
|
||||
remote_flush = true;
|
||||
@@ -5001,12 +5291,67 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
|
||||
|
||||
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
{
|
||||
vcpu->arch.mmu.invlpg(vcpu, gva);
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
int i;
|
||||
|
||||
/* INVLPG on a * non-canonical address is a NOP according to the SDM. */
|
||||
if (is_noncanonical_address(gva, vcpu))
|
||||
return;
|
||||
|
||||
mmu->invlpg(vcpu, gva, mmu->root_hpa);
|
||||
|
||||
/*
|
||||
* INVLPG is required to invalidate any global mappings for the VA,
|
||||
* irrespective of PCID. Since it would take us roughly similar amount
|
||||
* of work to determine whether any of the prev_root mappings of the VA
|
||||
* is marked global, or to just sync it blindly, so we might as well
|
||||
* just always sync it.
|
||||
*
|
||||
* Mappings not reachable via the current cr3 or the prev_roots will be
|
||||
* synced when switching to that cr3, so nothing needs to be done here
|
||||
* for them.
|
||||
*/
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
if (VALID_PAGE(mmu->prev_roots[i].hpa))
|
||||
mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
|
||||
|
||||
kvm_x86_ops->tlb_flush_gva(vcpu, gva);
|
||||
++vcpu->stat.invlpg;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
|
||||
|
||||
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
|
||||
{
|
||||
struct kvm_mmu *mmu = &vcpu->arch.mmu;
|
||||
bool tlb_flush = false;
|
||||
uint i;
|
||||
|
||||
if (pcid == kvm_get_active_pcid(vcpu)) {
|
||||
mmu->invlpg(vcpu, gva, mmu->root_hpa);
|
||||
tlb_flush = true;
|
||||
}
|
||||
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
|
||||
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
|
||||
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
|
||||
mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
|
||||
tlb_flush = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (tlb_flush)
|
||||
kvm_x86_ops->tlb_flush_gva(vcpu, gva);
|
||||
|
||||
++vcpu->stat.invlpg;
|
||||
|
||||
/*
|
||||
* Mappings not reachable via the current cr3 or the prev_roots will be
|
||||
* synced when switching to that cr3, so nothing needs to be done here
|
||||
* for them.
|
||||
*/
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
|
||||
|
||||
void kvm_enable_tdp(void)
|
||||
{
|
||||
tdp_enabled = true;
|
||||
@@ -5030,6 +5375,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
if (tdp_enabled)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
|
||||
* Therefore we need to allocate shadow page tables in the first
|
||||
@@ -5048,11 +5396,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint i;
|
||||
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||
vcpu->arch.mmu.translate_gpa = translate_gpa;
|
||||
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
|
||||
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
|
||||
return alloc_mmu_pages(vcpu);
|
||||
}
|
||||
|
||||
@@ -5060,7 +5413,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
|
||||
init_kvm_mmu(vcpu);
|
||||
kvm_init_mmu(vcpu, true);
|
||||
}
|
||||
|
||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
@@ -5500,7 +5853,7 @@ int kvm_mmu_module_init(void)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
|
||||
kvm_mmu_clear_all_pte_masks();
|
||||
kvm_mmu_reset_all_pte_masks();
|
||||
|
||||
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
|
||||
sizeof(struct pte_list_desc),
|
||||
|
||||
@@ -61,9 +61,10 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
|
||||
void
|
||||
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
||||
|
||||
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
bool accessed_dirty);
|
||||
bool accessed_dirty, gpa_t new_eptp);
|
||||
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
|
||||
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
||||
u64 fault_address, char *insn, int insn_len);
|
||||
@@ -85,6 +86,27 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
|
||||
return kvm_mmu_load(vcpu);
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
|
||||
{
|
||||
BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
|
||||
|
||||
return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
|
||||
? cr3 & X86_CR3_PCID_MASK
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
|
||||
}
|
||||
|
||||
static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
|
||||
kvm_get_active_pcid(vcpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently, we have two sorts of write-protection, a) the first one
|
||||
* write-protects guest page to sync the guest modification, b) another one is
|
||||
|
||||
@@ -181,7 +181,7 @@ no_present:
|
||||
* set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
|
||||
* to signify readability since it isn't used in the EPT case
|
||||
*/
|
||||
static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
|
||||
static inline unsigned FNAME(gpte_access)(u64 gpte)
|
||||
{
|
||||
unsigned access;
|
||||
#if PTTYPE == PTTYPE_EPT
|
||||
@@ -394,8 +394,8 @@ retry_walk:
|
||||
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
|
||||
|
||||
/* Convert to ACC_*_MASK flags for struct guest_walker. */
|
||||
walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
|
||||
walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
|
||||
walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
|
||||
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
|
||||
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
|
||||
if (unlikely(errcode))
|
||||
goto error;
|
||||
@@ -508,7 +508,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
|
||||
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
no_dirty_log && (pte_access & ACC_WRITE_MASK));
|
||||
@@ -856,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
|
||||
return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
|
||||
}
|
||||
|
||||
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
|
||||
{
|
||||
struct kvm_shadow_walk_iterator iterator;
|
||||
struct kvm_mmu_page *sp;
|
||||
@@ -871,13 +871,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
*/
|
||||
mmu_topup_memory_caches(vcpu);
|
||||
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
|
||||
if (!VALID_PAGE(root_hpa)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
for_each_shadow_entry(vcpu, gva, iterator) {
|
||||
for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
|
||||
level = iterator.level;
|
||||
sptep = iterator.sptep;
|
||||
|
||||
@@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
int i, nr_present = 0;
|
||||
bool host_writable;
|
||||
gpa_t first_pte_gpa;
|
||||
int set_spte_ret = 0;
|
||||
|
||||
/* direct kvm_mmu_page can not be unsync. */
|
||||
BUG_ON(sp->role.direct);
|
||||
@@ -1002,7 +1003,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access;
|
||||
pte_access &= FNAME(gpte_access)(vcpu, gpte);
|
||||
pte_access &= FNAME(gpte_access)(gpte);
|
||||
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
|
||||
|
||||
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
|
||||
@@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
|
||||
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
|
||||
|
||||
set_spte(vcpu, &sp->spt[i], pte_access,
|
||||
PT_PAGE_TABLE_LEVEL, gfn,
|
||||
spte_to_pfn(sp->spt[i]), true, false,
|
||||
host_writable);
|
||||
set_spte_ret |= set_spte(vcpu, &sp->spt[i],
|
||||
pte_access, PT_PAGE_TABLE_LEVEL,
|
||||
gfn, spte_to_pfn(sp->spt[i]),
|
||||
true, false, host_writable);
|
||||
}
|
||||
|
||||
if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
|
||||
return nr_present;
|
||||
}
|
||||
|
||||
|
||||
@@ -2884,7 +2884,6 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
|
||||
|
||||
svm->vmcb->control.nested_cr3 = __sme_set(root);
|
||||
mark_dirty(svm->vmcb, VMCB_NPT);
|
||||
svm_flush_tlb(vcpu, true);
|
||||
}
|
||||
|
||||
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
|
||||
@@ -5435,6 +5434,13 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
|
||||
svm->asid_generation--;
|
||||
}
|
||||
|
||||
static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
invlpga(gva, svm->vmcb->control.asid);
|
||||
}
|
||||
|
||||
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
@@ -5766,7 +5772,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
||||
|
||||
svm->vmcb->save.cr3 = __sme_set(root);
|
||||
mark_dirty(svm->vmcb, VMCB_CR);
|
||||
svm_flush_tlb(vcpu, true);
|
||||
}
|
||||
|
||||
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
||||
@@ -5779,8 +5784,6 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
||||
/* Also sync guest cr3 here in case we live migrate */
|
||||
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
|
||||
mark_dirty(svm->vmcb, VMCB_CR);
|
||||
|
||||
svm_flush_tlb(vcpu, true);
|
||||
}
|
||||
|
||||
static int is_disabled(void)
|
||||
@@ -7090,6 +7093,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.set_rflags = svm_set_rflags,
|
||||
|
||||
.tlb_flush = svm_flush_tlb,
|
||||
.tlb_flush_gva = svm_flush_tlb_gva,
|
||||
|
||||
.run = svm_vcpu_run,
|
||||
.handle_exit = handle_exit,
|
||||
|
||||
1124
arch/x86/kvm/vmx.c
1124
arch/x86/kvm/vmx.c
File diff suppressed because it is too large
Load Diff
@@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
|
||||
|
||||
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
{
|
||||
bool skip_tlb_flush = false;
|
||||
#ifdef CONFIG_X86_64
|
||||
bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
|
||||
|
||||
if (pcid_enabled)
|
||||
cr3 &= ~CR3_PCID_INVD;
|
||||
if (pcid_enabled) {
|
||||
skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
|
||||
cr3 &= ~X86_CR3_PCID_NOFLUSH;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
|
||||
kvm_mmu_sync_roots(vcpu);
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
if (!skip_tlb_flush) {
|
||||
kvm_mmu_sync_roots(vcpu);
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
|
||||
return 1;
|
||||
|
||||
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
|
||||
vcpu->arch.cr3 = cr3;
|
||||
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
|
||||
kvm_mmu_new_cr3(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_cr3);
|
||||
@@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
vcpu->arch.mcg_status = data;
|
||||
break;
|
||||
case MSR_IA32_MCG_CTL:
|
||||
if (!(mcg_cap & MCG_CTL_P))
|
||||
if (!(mcg_cap & MCG_CTL_P) &&
|
||||
(data || !msr_info->host_initiated))
|
||||
return 1;
|
||||
if (data != 0 && data != ~(u64)0)
|
||||
return -1;
|
||||
return 1;
|
||||
vcpu->arch.mcg_ctl = data;
|
||||
break;
|
||||
default:
|
||||
@@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_msr);
|
||||
|
||||
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
|
||||
{
|
||||
u64 data;
|
||||
u64 mcg_cap = vcpu->arch.mcg_cap;
|
||||
@@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
data = vcpu->arch.mcg_cap;
|
||||
break;
|
||||
case MSR_IA32_MCG_CTL:
|
||||
if (!(mcg_cap & MCG_CTL_P))
|
||||
if (!(mcg_cap & MCG_CTL_P) && !host)
|
||||
return 1;
|
||||
data = vcpu->arch.mcg_ctl;
|
||||
break;
|
||||
@@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case MSR_IA32_MCG_CTL:
|
||||
case MSR_IA32_MCG_STATUS:
|
||||
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
|
||||
return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
|
||||
return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
|
||||
msr_info->host_initiated);
|
||||
case MSR_K7_CLK_CTL:
|
||||
/*
|
||||
* Provide expected ramp-up count for K7. All other
|
||||
@@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case HV_X64_MSR_TSC_EMULATION_CONTROL:
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
return kvm_hv_get_msr_common(vcpu,
|
||||
msr_info->index, &msr_info->data);
|
||||
msr_info->index, &msr_info->data,
|
||||
msr_info->host_initiated);
|
||||
break;
|
||||
case MSR_IA32_BBL_CR_CTL3:
|
||||
/* This legacy MSR exists but isn't fully documented in current
|
||||
@@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_X2APIC_API:
|
||||
r = KVM_X2APIC_API_VALID_FLAGS;
|
||||
break;
|
||||
case KVM_CAP_NESTED_STATE:
|
||||
r = kvm_x86_ops->get_nested_state ?
|
||||
kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_NESTED_STATE: {
|
||||
struct kvm_nested_state __user *user_kvm_nested_state = argp;
|
||||
u32 user_data_size;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm_x86_ops->get_nested_state)
|
||||
break;
|
||||
|
||||
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
|
||||
if (get_user(user_data_size, &user_kvm_nested_state->size))
|
||||
return -EFAULT;
|
||||
|
||||
r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
|
||||
user_data_size);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (r > user_data_size) {
|
||||
if (put_user(r, &user_kvm_nested_state->size))
|
||||
return -EFAULT;
|
||||
return -E2BIG;
|
||||
}
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_NESTED_STATE: {
|
||||
struct kvm_nested_state __user *user_kvm_nested_state = argp;
|
||||
struct kvm_nested_state kvm_state;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm_x86_ops->set_nested_state)
|
||||
break;
|
||||
|
||||
if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
|
||||
return -EFAULT;
|
||||
|
||||
if (kvm_state.size < sizeof(kvm_state))
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_state.flags &
|
||||
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
|
||||
return -EINVAL;
|
||||
|
||||
/* nested_run_pending implies guest_mode. */
|
||||
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
|
||||
return -EINVAL;
|
||||
|
||||
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@@ -6503,8 +6566,12 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
* Set the reserved bits and the present bit of an paging-structure
|
||||
* entry to generate page fault with PFER.RSV = 1.
|
||||
*/
|
||||
/* Mask the reserved physical address bits. */
|
||||
mask = rsvd_bits(maxphyaddr, 51);
|
||||
|
||||
/*
|
||||
* Mask the uppermost physical address bit, which would be reserved as
|
||||
* long as the supported physical address width is less than 52.
|
||||
*/
|
||||
mask = 1ull << 51;
|
||||
|
||||
/* Set the present bit. */
|
||||
mask |= 1ull;
|
||||
@@ -6769,6 +6836,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
||||
case KVM_HC_CLOCK_PAIRING:
|
||||
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
|
||||
break;
|
||||
case KVM_HC_SEND_IPI:
|
||||
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ret = -KVM_ENOSYS;
|
||||
@@ -7287,6 +7357,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
bool req_immediate_exit = false;
|
||||
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
|
||||
kvm_x86_ops->get_vmcs12_pages(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
|
||||
kvm_mmu_unload(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
|
||||
@@ -7302,6 +7374,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
|
||||
kvm_mmu_sync_roots(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
|
||||
kvm_mmu_load_cr3(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
|
||||
kvm_vcpu_flush_tlb(vcpu, true);
|
||||
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
|
||||
@@ -8013,6 +8087,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
|
||||
|
||||
static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
|
||||
{
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
|
||||
(sregs->cr4 & X86_CR4_OSXSAVE))
|
||||
return -EINVAL;
|
||||
|
||||
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
|
||||
/*
|
||||
* When EFER.LME and CR0.PG are set, the processor is in
|
||||
@@ -8043,10 +8121,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
|
||||
struct desc_ptr dt;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
|
||||
(sregs->cr4 & X86_CR4_OSXSAVE))
|
||||
goto out;
|
||||
|
||||
if (kvm_valid_sregs(vcpu, sregs))
|
||||
goto out;
|
||||
|
||||
|
||||
@@ -130,7 +130,7 @@ static inline bool is_error_page(struct page *page)
|
||||
#define KVM_REQUEST_ARCH_BASE 8
|
||||
|
||||
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
|
||||
BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \
|
||||
BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
|
||||
(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
|
||||
})
|
||||
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
|
||||
@@ -224,7 +224,7 @@ struct kvm_vcpu {
|
||||
int vcpu_id;
|
||||
int srcu_idx;
|
||||
int mode;
|
||||
unsigned long requests;
|
||||
u64 requests;
|
||||
unsigned long guest_debug;
|
||||
|
||||
int pre_pcpu;
|
||||
@@ -309,6 +309,13 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl
|
||||
return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
|
||||
}
|
||||
|
||||
static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long len = kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
|
||||
}
|
||||
|
||||
struct kvm_s390_adapter_int {
|
||||
u64 ind_addr;
|
||||
u64 summary_addr;
|
||||
@@ -827,6 +834,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
|
||||
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
|
||||
void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
|
||||
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
|
||||
@@ -1124,7 +1138,7 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
|
||||
* caller. Paired with the smp_mb__after_atomic in kvm_check_request.
|
||||
*/
|
||||
smp_wmb();
|
||||
set_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
|
||||
set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
|
||||
}
|
||||
|
||||
static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
|
||||
@@ -1134,12 +1148,12 @@ static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
|
||||
|
||||
static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
|
||||
return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
|
||||
}
|
||||
|
||||
static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
clear_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
|
||||
clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
|
||||
}
|
||||
|
||||
static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
|
||||
|
||||
@@ -950,6 +950,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_HYPERV_EVENTFD 154
|
||||
#define KVM_CAP_HYPERV_TLBFLUSH 155
|
||||
#define KVM_CAP_S390_HPAGE_1M 156
|
||||
#define KVM_CAP_NESTED_STATE 157
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@@ -1392,6 +1393,9 @@ struct kvm_enc_region {
|
||||
/* Available with KVM_CAP_HYPERV_EVENTFD */
|
||||
#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd)
|
||||
|
||||
/* Available with KVM_CAP_NESTED_STATE */
|
||||
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
|
||||
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
|
||||
|
||||
/* Secure Encrypted Virtualization command */
|
||||
enum sev_cmd_id {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
/* Return values for hypercalls */
|
||||
#define KVM_ENOSYS 1000
|
||||
#define KVM_EFAULT EFAULT
|
||||
#define KVM_EINVAL EINVAL
|
||||
#define KVM_E2BIG E2BIG
|
||||
#define KVM_EPERM EPERM
|
||||
#define KVM_EOPNOTSUPP 95
|
||||
@@ -26,6 +27,7 @@
|
||||
#define KVM_HC_MIPS_EXIT_VM 7
|
||||
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
|
||||
#define KVM_HC_CLOCK_PAIRING 9
|
||||
#define KVM_HC_SEND_IPI 10
|
||||
|
||||
/*
|
||||
* hypercalls use architecture specific
|
||||
|
||||
2
tools/testing/selftests/kvm/.gitignore
vendored
2
tools/testing/selftests/kvm/.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
cr4_cpuid_sync_test
|
||||
set_sregs_test
|
||||
sync_regs_test
|
||||
vmx_tsc_adjust_test
|
||||
state_test
|
||||
|
||||
@@ -9,6 +9,8 @@ LIBKVM_x86_64 = lib/x86.c lib/vmx.c
|
||||
TEST_GEN_PROGS_x86_64 = set_sregs_test
|
||||
TEST_GEN_PROGS_x86_64 += sync_regs_test
|
||||
TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
|
||||
TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
|
||||
TEST_GEN_PROGS_x86_64 += state_test
|
||||
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||
LIBKVM += $(LIBKVM_$(UNAME_M))
|
||||
|
||||
129
tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
Normal file
129
tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
Normal file
@@ -0,0 +1,129 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* CR4 and CPUID sync test
|
||||
*
|
||||
* Copyright 2018, Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Author:
|
||||
* Wei Huang <wei@redhat.com>
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "x86.h"
|
||||
|
||||
#define X86_FEATURE_XSAVE (1<<26)
|
||||
#define X86_FEATURE_OSXSAVE (1<<27)
|
||||
#define VCPU_ID 1
|
||||
|
||||
enum {
|
||||
GUEST_UPDATE_CR4 = 0x1000,
|
||||
GUEST_FAILED,
|
||||
GUEST_DONE,
|
||||
};
|
||||
|
||||
static void exit_to_hv(uint16_t port)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
static inline bool cr4_cpuid_is_sync(void)
|
||||
{
|
||||
int func, subfunc;
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
uint64_t cr4;
|
||||
|
||||
func = 0x1;
|
||||
subfunc = 0x0;
|
||||
__asm__ __volatile__("cpuid"
|
||||
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
|
||||
: "a"(func), "c"(subfunc));
|
||||
|
||||
cr4 = get_cr4();
|
||||
|
||||
return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
|
||||
}
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
uint64_t cr4;
|
||||
|
||||
/* turn on CR4.OSXSAVE */
|
||||
cr4 = get_cr4();
|
||||
cr4 |= X86_CR4_OSXSAVE;
|
||||
set_cr4(cr4);
|
||||
|
||||
/* verify CR4.OSXSAVE == CPUID.OSXSAVE */
|
||||
if (!cr4_cpuid_is_sync())
|
||||
exit_to_hv(GUEST_FAILED);
|
||||
|
||||
/* notify hypervisor to change CR4 */
|
||||
exit_to_hv(GUEST_UPDATE_CR4);
|
||||
|
||||
/* check again */
|
||||
if (!cr4_cpuid_is_sync())
|
||||
exit_to_hv(GUEST_FAILED);
|
||||
|
||||
exit_to_hv(GUEST_DONE);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_run *run;
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
int rc;
|
||||
|
||||
entry = kvm_get_supported_cpuid_entry(1);
|
||||
if (!(entry->ecx & X86_FEATURE_XSAVE)) {
|
||||
printf("XSAVE feature not supported, skipping test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Tell stdout not to buffer its content */
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, guest_code);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
while (1) {
|
||||
rc = _vcpu_run(vm, VCPU_ID);
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_IO) {
|
||||
switch (run->io.port) {
|
||||
case GUEST_UPDATE_CR4:
|
||||
/* emulate hypervisor clearing CR4.OSXSAVE */
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
sregs.cr4 &= ~X86_CR4_OSXSAVE;
|
||||
vcpu_sregs_set(vm, VCPU_ID, &sregs);
|
||||
break;
|
||||
case GUEST_FAILED:
|
||||
TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
|
||||
break;
|
||||
case GUEST_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_ASSERT(false, "Unknown port 0x%x.",
|
||||
run->io.port);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
done:
|
||||
return 0;
|
||||
}
|
||||
@@ -53,6 +53,8 @@ int kvm_check_cap(long cap);
|
||||
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||
void kvm_vm_free(struct kvm_vm *vmp);
|
||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
||||
void kvm_vm_release(struct kvm_vm *vmp);
|
||||
|
||||
int kvm_memcmp_hva_gva(void *hva,
|
||||
struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
|
||||
@@ -75,7 +77,7 @@ void vcpu_ioctl(struct kvm_vm *vm,
|
||||
uint32_t vcpuid, unsigned long ioctl, void *arg);
|
||||
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
|
||||
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
uint32_t data_memslot, uint32_t pgd_memslot);
|
||||
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
|
||||
|
||||
@@ -380,6 +380,30 @@ static inline int vmptrld(uint64_t vmcs_pa)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int vmptrst(uint64_t *value)
|
||||
{
|
||||
uint64_t tmp;
|
||||
uint8_t ret;
|
||||
|
||||
__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
|
||||
: [value]"=m"(tmp), [ret]"=rm"(ret)
|
||||
: : "cc", "memory");
|
||||
|
||||
*value = tmp;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A wrapper around vmptrst that ignores errors and returns zero if the
|
||||
* vmptrst instruction fails.
|
||||
*/
|
||||
static inline uint64_t vmptrstz(void)
|
||||
{
|
||||
uint64_t value = 0;
|
||||
vmptrst(&value);
|
||||
return value;
|
||||
}
|
||||
|
||||
/*
|
||||
* No guest state (e.g. GPRs) is established by this vmlaunch.
|
||||
*/
|
||||
@@ -444,6 +468,15 @@ static inline int vmresume(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void vmcall(void)
|
||||
{
|
||||
/* Currently, L1 destroys our GPRs during vmexits. */
|
||||
__asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
|
||||
"rax", "rbx", "rcx", "rdx",
|
||||
"rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15");
|
||||
}
|
||||
|
||||
static inline int vmread(uint64_t encoding, uint64_t *value)
|
||||
{
|
||||
uint64_t tmp;
|
||||
@@ -486,9 +519,34 @@ static inline uint32_t vmcs_revision(void)
|
||||
return rdmsr(MSR_IA32_VMX_BASIC);
|
||||
}
|
||||
|
||||
void prepare_for_vmx_operation(void);
|
||||
void prepare_vmcs(void *guest_rip, void *guest_rsp);
|
||||
struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid,
|
||||
vmx_guest_code_t guest_code);
|
||||
struct vmx_pages {
|
||||
void *vmxon_hva;
|
||||
uint64_t vmxon_gpa;
|
||||
void *vmxon;
|
||||
|
||||
void *vmcs_hva;
|
||||
uint64_t vmcs_gpa;
|
||||
void *vmcs;
|
||||
|
||||
void *msr_hva;
|
||||
uint64_t msr_gpa;
|
||||
void *msr;
|
||||
|
||||
void *shadow_vmcs_hva;
|
||||
uint64_t shadow_vmcs_gpa;
|
||||
void *shadow_vmcs;
|
||||
|
||||
void *vmread_hva;
|
||||
uint64_t vmread_gpa;
|
||||
void *vmread;
|
||||
|
||||
void *vmwrite_hva;
|
||||
uint64_t vmwrite_gpa;
|
||||
void *vmwrite;
|
||||
};
|
||||
|
||||
struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
|
||||
bool prepare_for_vmx_operation(struct vmx_pages *vmx);
|
||||
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
|
||||
|
||||
#endif /* !SELFTEST_KVM_VMX_H */
|
||||
|
||||
@@ -59,8 +59,8 @@ enum x86_register {
|
||||
struct desc64 {
|
||||
uint16_t limit0;
|
||||
uint16_t base0;
|
||||
unsigned base1:8, type:5, dpl:2, p:1;
|
||||
unsigned limit1:4, zero0:3, g:1, base2:8;
|
||||
unsigned base1:8, s:1, type:4, dpl:2, p:1;
|
||||
unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
|
||||
uint32_t base3;
|
||||
uint32_t zero1;
|
||||
} __attribute__((packed));
|
||||
@@ -303,6 +303,10 @@ static inline unsigned long get_xmm(int n)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kvm_x86_state;
|
||||
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
|
||||
|
||||
/*
|
||||
* Basic CPU control in CR0
|
||||
*/
|
||||
|
||||
@@ -62,6 +62,18 @@ int kvm_check_cap(long cap)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vm_open(struct kvm_vm *vm, int perm)
|
||||
{
|
||||
vm->kvm_fd = open(KVM_DEV_PATH, perm);
|
||||
if (vm->kvm_fd < 0)
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
/* Create VM. */
|
||||
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
|
||||
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
|
||||
"rc: %i errno: %i", vm->fd, errno);
|
||||
}
|
||||
|
||||
/* VM Create
|
||||
*
|
||||
* Input Args:
|
||||
@@ -90,16 +102,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
TEST_ASSERT(vm != NULL, "Insufficent Memory");
|
||||
|
||||
vm->mode = mode;
|
||||
kvm_fd = open(KVM_DEV_PATH, perm);
|
||||
if (kvm_fd < 0)
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
/* Create VM. */
|
||||
vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
|
||||
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
|
||||
"rc: %i errno: %i", vm->fd, errno);
|
||||
|
||||
close(kvm_fd);
|
||||
vm_open(vm, perm);
|
||||
|
||||
/* Setup mode specific traits. */
|
||||
switch (vm->mode) {
|
||||
@@ -132,6 +135,39 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
return vm;
|
||||
}
|
||||
|
||||
/* VM Restart
|
||||
*
|
||||
* Input Args:
|
||||
* vm - VM that has been released before
|
||||
* perm - permission
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Reopens the file descriptors associated to the VM and reinstates the
|
||||
* global state, such as the irqchip and the memory regions that are mapped
|
||||
* into the guest.
|
||||
*/
|
||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm)
|
||||
{
|
||||
struct userspace_mem_region *region;
|
||||
|
||||
vm_open(vmp, perm);
|
||||
if (vmp->has_irqchip)
|
||||
vm_create_irqchip(vmp);
|
||||
|
||||
for (region = vmp->userspace_mem_region_head; region;
|
||||
region = region->next) {
|
||||
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region);
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
|
||||
" rc: %i errno: %i\n"
|
||||
" slot: %u flags: 0x%x\n"
|
||||
" guest_phys_addr: 0x%lx size: 0x%lx",
|
||||
ret, errno, region->region.slot, region->region.flags,
|
||||
region->region.guest_phys_addr,
|
||||
region->region.memory_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Userspace Memory Region Find
|
||||
*
|
||||
* Input Args:
|
||||
@@ -238,8 +274,12 @@ struct vcpu *vcpu_find(struct kvm_vm *vm,
|
||||
static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
|
||||
int ret;
|
||||
|
||||
int ret = close(vcpu->fd);
|
||||
ret = munmap(vcpu->state, sizeof(*vcpu->state));
|
||||
TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
|
||||
"errno: %i", ret, errno);
|
||||
close(vcpu->fd);
|
||||
TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
|
||||
"errno: %i", ret, errno);
|
||||
|
||||
@@ -252,6 +292,23 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
free(vcpu);
|
||||
}
|
||||
|
||||
void kvm_vm_release(struct kvm_vm *vmp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Free VCPUs. */
|
||||
while (vmp->vcpu_head)
|
||||
vm_vcpu_rm(vmp, vmp->vcpu_head->id);
|
||||
|
||||
/* Close file descriptor for the VM. */
|
||||
ret = close(vmp->fd);
|
||||
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
|
||||
" vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
|
||||
|
||||
close(vmp->kvm_fd);
|
||||
TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
|
||||
" vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
|
||||
}
|
||||
|
||||
/* Destroys and frees the VM pointed to by vmp.
|
||||
*/
|
||||
@@ -282,18 +339,11 @@ void kvm_vm_free(struct kvm_vm *vmp)
|
||||
free(region);
|
||||
}
|
||||
|
||||
/* Free VCPUs. */
|
||||
while (vmp->vcpu_head)
|
||||
vm_vcpu_rm(vmp, vmp->vcpu_head->id);
|
||||
|
||||
/* Free sparsebit arrays. */
|
||||
sparsebit_free(&vmp->vpages_valid);
|
||||
sparsebit_free(&vmp->vpages_mapped);
|
||||
|
||||
/* Close file descriptor for the VM. */
|
||||
ret = close(vmp->fd);
|
||||
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
|
||||
" vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
|
||||
kvm_vm_release(vmp);
|
||||
|
||||
/* Free the structure describing the VM. */
|
||||
free(vmp);
|
||||
@@ -701,7 +751,7 @@ static int vcpu_mmap_sz(void)
|
||||
* Creates and adds to the VM specified by vm and virtual CPU with
|
||||
* the ID given by vcpuid.
|
||||
*/
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
|
||||
@@ -736,7 +786,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
vcpu->next = vm->vcpu_head;
|
||||
vm->vcpu_head = vcpu;
|
||||
|
||||
vcpu_setup(vm, vcpuid);
|
||||
vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
|
||||
}
|
||||
|
||||
/* VM Virtual Address Unused Gap
|
||||
@@ -957,6 +1007,8 @@ void vm_create_irqchip(struct kvm_vm *vm)
|
||||
ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
|
||||
TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
|
||||
"rc: %i errno: %i", ret, errno);
|
||||
|
||||
vm->has_irqchip = true;
|
||||
}
|
||||
|
||||
/* VM VCPU State
|
||||
|
||||
@@ -43,6 +43,7 @@ struct vcpu {
|
||||
|
||||
struct kvm_vm {
|
||||
int mode;
|
||||
int kvm_fd;
|
||||
int fd;
|
||||
unsigned int page_size;
|
||||
unsigned int page_shift;
|
||||
@@ -51,13 +52,17 @@ struct kvm_vm {
|
||||
struct userspace_mem_region *userspace_mem_region_head;
|
||||
struct sparsebit *vpages_valid;
|
||||
struct sparsebit *vpages_mapped;
|
||||
|
||||
bool has_irqchip;
|
||||
bool pgd_created;
|
||||
vm_paddr_t pgd;
|
||||
vm_vaddr_t gdt;
|
||||
vm_vaddr_t tss;
|
||||
};
|
||||
|
||||
struct vcpu *vcpu_find(struct kvm_vm *vm,
|
||||
uint32_t vcpuid);
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid);
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
|
||||
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
|
||||
void regs_dump(FILE *stream, struct kvm_regs *regs,
|
||||
uint8_t indent);
|
||||
|
||||
@@ -13,47 +13,60 @@
|
||||
#include "x86.h"
|
||||
#include "vmx.h"
|
||||
|
||||
/* Create a default VM for VMX tests.
|
||||
/* Allocate memory regions for nested VMX tests.
|
||||
*
|
||||
* Input Args:
|
||||
* vcpuid - The id of the single VCPU to add to the VM.
|
||||
* guest_code - The vCPU's entry point
|
||||
* vm - The VM to allocate guest-virtual addresses in.
|
||||
*
|
||||
* Output Args: None
|
||||
* Output Args:
|
||||
* p_vmx_gva - The guest virtual address for the struct vmx_pages.
|
||||
*
|
||||
* Return:
|
||||
* Pointer to opaque structure that describes the created VM.
|
||||
* Pointer to structure with the addresses of the VMX areas.
|
||||
*/
|
||||
struct kvm_vm *
|
||||
vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code)
|
||||
struct vmx_pages *
|
||||
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
|
||||
{
|
||||
struct kvm_cpuid2 *cpuid;
|
||||
struct kvm_vm *vm;
|
||||
vm_vaddr_t vmxon_vaddr;
|
||||
vm_paddr_t vmxon_paddr;
|
||||
vm_vaddr_t vmcs_vaddr;
|
||||
vm_paddr_t vmcs_paddr;
|
||||
|
||||
vm = vm_create_default(vcpuid, (void *) guest_code);
|
||||
|
||||
/* Enable nesting in CPUID */
|
||||
vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
|
||||
vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
|
||||
|
||||
/* Setup of a region of guest memory for the vmxon region. */
|
||||
vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
|
||||
vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr);
|
||||
vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
|
||||
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
|
||||
|
||||
/* Setup of a region of guest memory for a vmcs. */
|
||||
vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
|
||||
vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr);
|
||||
vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
|
||||
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
|
||||
|
||||
vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr,
|
||||
vmcs_paddr);
|
||||
/* Setup of a region of guest memory for the MSR bitmap. */
|
||||
vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
|
||||
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
|
||||
memset(vmx->msr_hva, 0, getpagesize());
|
||||
|
||||
return vm;
|
||||
/* Setup of a region of guest memory for the shadow VMCS. */
|
||||
vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
|
||||
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
|
||||
|
||||
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
|
||||
vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
|
||||
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
|
||||
memset(vmx->vmread_hva, 0, getpagesize());
|
||||
|
||||
vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
|
||||
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
|
||||
memset(vmx->vmwrite_hva, 0, getpagesize());
|
||||
|
||||
*p_vmx_gva = vmx_gva;
|
||||
return vmx;
|
||||
}
|
||||
|
||||
void prepare_for_vmx_operation(void)
|
||||
bool prepare_for_vmx_operation(struct vmx_pages *vmx)
|
||||
{
|
||||
uint64_t feature_control;
|
||||
uint64_t required;
|
||||
@@ -88,18 +101,42 @@ void prepare_for_vmx_operation(void)
|
||||
feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
|
||||
if ((feature_control & required) != required)
|
||||
wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
|
||||
|
||||
/* Enter VMX root operation. */
|
||||
*(uint32_t *)(vmx->vmxon) = vmcs_revision();
|
||||
if (vmxon(vmx->vmxon_gpa))
|
||||
return false;
|
||||
|
||||
/* Load a VMCS. */
|
||||
*(uint32_t *)(vmx->vmcs) = vmcs_revision();
|
||||
if (vmclear(vmx->vmcs_gpa))
|
||||
return false;
|
||||
|
||||
if (vmptrld(vmx->vmcs_gpa))
|
||||
return false;
|
||||
|
||||
/* Setup shadow VMCS, do not load it yet. */
|
||||
*(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
|
||||
if (vmclear(vmx->shadow_vmcs_gpa))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the control fields to the most basic settings possible.
|
||||
*/
|
||||
static inline void init_vmcs_control_fields(void)
|
||||
static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
|
||||
{
|
||||
vmwrite(VIRTUAL_PROCESSOR_ID, 0);
|
||||
vmwrite(POSTED_INTR_NV, 0);
|
||||
|
||||
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS));
|
||||
vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS));
|
||||
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
|
||||
if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
|
||||
vmwrite(CPU_BASED_VM_EXEC_CONTROL,
|
||||
rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
|
||||
else
|
||||
vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
|
||||
vmwrite(EXCEPTION_BITMAP, 0);
|
||||
vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
|
||||
vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
|
||||
@@ -113,12 +150,15 @@ static inline void init_vmcs_control_fields(void)
|
||||
vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
|
||||
vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
|
||||
vmwrite(TPR_THRESHOLD, 0);
|
||||
vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
|
||||
|
||||
vmwrite(CR0_GUEST_HOST_MASK, 0);
|
||||
vmwrite(CR4_GUEST_HOST_MASK, 0);
|
||||
vmwrite(CR0_READ_SHADOW, get_cr0());
|
||||
vmwrite(CR4_READ_SHADOW, get_cr4());
|
||||
|
||||
vmwrite(MSR_BITMAP, vmx->msr_gpa);
|
||||
vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
|
||||
vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -235,9 +275,9 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp)
|
||||
vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
|
||||
}
|
||||
|
||||
void prepare_vmcs(void *guest_rip, void *guest_rsp)
|
||||
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
|
||||
{
|
||||
init_vmcs_control_fields();
|
||||
init_vmcs_control_fields(vmx);
|
||||
init_vmcs_host_state();
|
||||
init_vmcs_guest_state(guest_rip, guest_rsp);
|
||||
}
|
||||
|
||||
@@ -239,25 +239,6 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
|
||||
vm_paddr_t paddr = vm_phy_page_alloc(vm,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
|
||||
vm->pgd = paddr;
|
||||
|
||||
/* Set pointer to pgd tables in all the VCPUs that
|
||||
* have already been created. Future VCPUs will have
|
||||
* the value set as each one is created.
|
||||
*/
|
||||
for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
|
||||
vcpu = vcpu->next) {
|
||||
struct kvm_sregs sregs;
|
||||
|
||||
/* Obtain the current system register settings */
|
||||
vcpu_sregs_get(vm, vcpu->id, &sregs);
|
||||
|
||||
/* Set and store the pointer to the start of the
|
||||
* pgd tables.
|
||||
*/
|
||||
sregs.cr3 = vm->pgd;
|
||||
vcpu_sregs_set(vm, vcpu->id, &sregs);
|
||||
}
|
||||
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
}
|
||||
@@ -460,9 +441,32 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp)
|
||||
segp->unusable = true;
|
||||
}
|
||||
|
||||
static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
|
||||
{
|
||||
void *gdt = addr_gva2hva(vm, vm->gdt);
|
||||
struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
|
||||
|
||||
desc->limit0 = segp->limit & 0xFFFF;
|
||||
desc->base0 = segp->base & 0xFFFF;
|
||||
desc->base1 = segp->base >> 16;
|
||||
desc->s = segp->s;
|
||||
desc->type = segp->type;
|
||||
desc->dpl = segp->dpl;
|
||||
desc->p = segp->present;
|
||||
desc->limit1 = segp->limit >> 16;
|
||||
desc->l = segp->l;
|
||||
desc->db = segp->db;
|
||||
desc->g = segp->g;
|
||||
desc->base2 = segp->base >> 24;
|
||||
if (!segp->s)
|
||||
desc->base3 = segp->base >> 32;
|
||||
}
|
||||
|
||||
|
||||
/* Set Long Mode Flat Kernel Code Segment
|
||||
*
|
||||
* Input Args:
|
||||
* vm - VM whose GDT is being filled, or NULL to only write segp
|
||||
* selector - selector value
|
||||
*
|
||||
* Output Args:
|
||||
@@ -473,7 +477,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp)
|
||||
* Sets up the KVM segment pointed to by segp, to be a code segment
|
||||
* with the selector value given by selector.
|
||||
*/
|
||||
static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
|
||||
static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
|
||||
struct kvm_segment *segp)
|
||||
{
|
||||
memset(segp, 0, sizeof(*segp));
|
||||
@@ -486,11 +490,14 @@ static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
|
||||
segp->g = true;
|
||||
segp->l = true;
|
||||
segp->present = 1;
|
||||
if (vm)
|
||||
kvm_seg_fill_gdt_64bit(vm, segp);
|
||||
}
|
||||
|
||||
/* Set Long Mode Flat Kernel Data Segment
|
||||
*
|
||||
* Input Args:
|
||||
* vm - VM whose GDT is being filled, or NULL to only write segp
|
||||
* selector - selector value
|
||||
*
|
||||
* Output Args:
|
||||
@@ -501,7 +508,7 @@ static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
|
||||
* Sets up the KVM segment pointed to by segp, to be a data segment
|
||||
* with the selector value given by selector.
|
||||
*/
|
||||
static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
|
||||
static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
|
||||
struct kvm_segment *segp)
|
||||
{
|
||||
memset(segp, 0, sizeof(*segp));
|
||||
@@ -513,6 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
|
||||
*/
|
||||
segp->g = true;
|
||||
segp->present = true;
|
||||
if (vm)
|
||||
kvm_seg_fill_gdt_64bit(vm, segp);
|
||||
}
|
||||
|
||||
/* Address Guest Virtual to Guest Physical
|
||||
@@ -575,13 +584,45 @@ unmapped_gva:
|
||||
"gva: 0x%lx", gva);
|
||||
}
|
||||
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid)
|
||||
static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
|
||||
int pgd_memslot)
|
||||
{
|
||||
if (!vm->gdt)
|
||||
vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
|
||||
KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
|
||||
|
||||
dt->base = vm->gdt;
|
||||
dt->limit = getpagesize();
|
||||
}
|
||||
|
||||
static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
|
||||
int selector, int gdt_memslot,
|
||||
int pgd_memslot)
|
||||
{
|
||||
if (!vm->tss)
|
||||
vm->tss = vm_vaddr_alloc(vm, getpagesize(),
|
||||
KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
|
||||
|
||||
memset(segp, 0, sizeof(*segp));
|
||||
segp->base = vm->tss;
|
||||
segp->limit = 0x67;
|
||||
segp->selector = selector;
|
||||
segp->type = 0xb;
|
||||
segp->present = 1;
|
||||
kvm_seg_fill_gdt_64bit(vm, segp);
|
||||
}
|
||||
|
||||
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
|
||||
/* Set mode specific system register values. */
|
||||
vcpu_sregs_get(vm, vcpuid, &sregs);
|
||||
|
||||
sregs.idt.limit = 0;
|
||||
|
||||
kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
|
||||
|
||||
switch (vm->mode) {
|
||||
case VM_MODE_FLAT48PG:
|
||||
sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
|
||||
@@ -589,30 +630,18 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid)
|
||||
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
|
||||
|
||||
kvm_seg_set_unusable(&sregs.ldt);
|
||||
kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
|
||||
kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
|
||||
kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
|
||||
kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
|
||||
kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
|
||||
kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
|
||||
kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
|
||||
break;
|
||||
|
||||
default:
|
||||
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
|
||||
}
|
||||
|
||||
sregs.cr3 = vm->pgd;
|
||||
vcpu_sregs_set(vm, vcpuid, &sregs);
|
||||
|
||||
/* If virtual translation table have been setup, set system register
|
||||
* to point to the tables. It's okay if they haven't been setup yet,
|
||||
* in that the code that sets up the virtual translation tables, will
|
||||
* go back through any VCPUs that have already been created and set
|
||||
* their values.
|
||||
*/
|
||||
if (vm->pgd_created) {
|
||||
struct kvm_sregs sregs;
|
||||
|
||||
vcpu_sregs_get(vm, vcpuid, &sregs);
|
||||
|
||||
sregs.cr3 = vm->pgd;
|
||||
vcpu_sregs_set(vm, vcpuid, &sregs);
|
||||
}
|
||||
}
|
||||
/* Adds a vCPU with reasonable defaults (i.e., a stack)
|
||||
*
|
||||
@@ -629,7 +658,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
|
||||
|
||||
/* Create VCPU */
|
||||
vm_vcpu_add(vm, vcpuid);
|
||||
vm_vcpu_add(vm, vcpuid, 0, 0);
|
||||
|
||||
/* Setup guest general purpose registers */
|
||||
vcpu_regs_get(vm, vcpuid, ®s);
|
||||
@@ -698,3 +727,148 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
|
||||
|
||||
return vm;
|
||||
}
|
||||
|
||||
struct kvm_x86_state {
|
||||
struct kvm_vcpu_events events;
|
||||
struct kvm_mp_state mp_state;
|
||||
struct kvm_regs regs;
|
||||
struct kvm_xsave xsave;
|
||||
struct kvm_xcrs xcrs;
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_debugregs debugregs;
|
||||
union {
|
||||
struct kvm_nested_state nested;
|
||||
char nested_[16384];
|
||||
};
|
||||
struct kvm_msrs msrs;
|
||||
};
|
||||
|
||||
static int kvm_get_num_msrs(struct kvm_vm *vm)
|
||||
{
|
||||
struct kvm_msr_list nmsrs;
|
||||
int r;
|
||||
|
||||
nmsrs.nmsrs = 0;
|
||||
r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
|
||||
TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
|
||||
r);
|
||||
|
||||
return nmsrs.nmsrs;
|
||||
}
|
||||
|
||||
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
|
||||
struct kvm_msr_list *list;
|
||||
struct kvm_x86_state *state;
|
||||
int nmsrs, r, i;
|
||||
static int nested_size = -1;
|
||||
|
||||
if (nested_size == -1) {
|
||||
nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
|
||||
TEST_ASSERT(nested_size <= sizeof(state->nested_),
|
||||
"Nested state size too big, %i > %zi",
|
||||
nested_size, sizeof(state->nested_));
|
||||
}
|
||||
|
||||
nmsrs = kvm_get_num_msrs(vm);
|
||||
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
|
||||
list->nmsrs = nmsrs;
|
||||
r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
|
||||
r);
|
||||
|
||||
state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
|
||||
r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
|
||||
r);
|
||||
|
||||
if (nested_size) {
|
||||
state->nested.size = sizeof(state->nested_);
|
||||
r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
|
||||
r);
|
||||
TEST_ASSERT(state->nested.size <= nested_size,
|
||||
"Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
|
||||
state->nested.size, nested_size);
|
||||
} else
|
||||
state->nested.size = 0;
|
||||
|
||||
state->msrs.nmsrs = nmsrs;
|
||||
for (i = 0; i < nmsrs; i++)
|
||||
state->msrs.entries[i].index = list->indices[i];
|
||||
r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
|
||||
TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
|
||||
r, r == nmsrs ? -1 : list->indices[r]);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
|
||||
r);
|
||||
|
||||
free(list);
|
||||
return state;
|
||||
}
|
||||
|
||||
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
|
||||
{
|
||||
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
|
||||
int r;
|
||||
|
||||
if (state->nested.size) {
|
||||
r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
|
||||
r);
|
||||
}
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
|
||||
TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
|
||||
r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
|
||||
r);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
|
||||
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
|
||||
r);
|
||||
}
|
||||
|
||||
218
tools/testing/selftests/kvm/state_test.c
Normal file
218
tools/testing/selftests/kvm/state_test.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* KVM_GET/SET_* tests
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2.
|
||||
*
|
||||
* Tests for vCPU state save/restore, including nested guest state.
|
||||
*/
|
||||
#define _GNU_SOURCE /* for program_invocation_short_name */
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "x86.h"
|
||||
#include "vmx.h"
|
||||
|
||||
#define VCPU_ID 5
|
||||
#define PORT_SYNC 0x1000
|
||||
#define PORT_ABORT 0x1001
|
||||
#define PORT_DONE 0x1002
|
||||
|
||||
static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
|
||||
{
|
||||
__asm__ __volatile__("in %[port], %%al"
|
||||
:
|
||||
: [port]"d"(port), "D"(arg0), "S"(arg1)
|
||||
: "rax");
|
||||
}
|
||||
|
||||
#define exit_to_l0(_port, _arg0, _arg1) \
|
||||
__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
|
||||
|
||||
#define GUEST_ASSERT(_condition) do { \
|
||||
if (!(_condition)) \
|
||||
exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, __LINE__);\
|
||||
} while (0)
|
||||
|
||||
#define GUEST_SYNC(stage) \
|
||||
exit_to_l0(PORT_SYNC, "hello", stage);
|
||||
|
||||
static bool have_nested_state;
|
||||
|
||||
void l2_guest_code(void)
|
||||
{
|
||||
GUEST_SYNC(5);
|
||||
|
||||
/* Exit to L1 */
|
||||
vmcall();
|
||||
|
||||
/* L1 has now set up a shadow VMCS for us. */
|
||||
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
|
||||
GUEST_SYNC(9);
|
||||
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
|
||||
GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
|
||||
GUEST_SYNC(10);
|
||||
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
|
||||
GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
|
||||
GUEST_SYNC(11);
|
||||
|
||||
/* Done, exit to L1 and never come back. */
|
||||
vmcall();
|
||||
}
|
||||
|
||||
void l1_guest_code(struct vmx_pages *vmx_pages)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
|
||||
GUEST_ASSERT(vmx_pages->vmcs_gpa);
|
||||
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
|
||||
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
|
||||
|
||||
GUEST_SYNC(3);
|
||||
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
|
||||
|
||||
prepare_vmcs(vmx_pages, l2_guest_code,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
GUEST_SYNC(4);
|
||||
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
|
||||
GUEST_ASSERT(!vmlaunch());
|
||||
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||
|
||||
/* Check that the launched state is preserved. */
|
||||
GUEST_ASSERT(vmlaunch());
|
||||
|
||||
GUEST_ASSERT(!vmresume());
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||
|
||||
GUEST_SYNC(6);
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||
|
||||
GUEST_ASSERT(!vmresume());
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||
|
||||
vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
|
||||
|
||||
vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
|
||||
vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
|
||||
|
||||
GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
|
||||
GUEST_ASSERT(vmlaunch());
|
||||
GUEST_SYNC(7);
|
||||
GUEST_ASSERT(vmlaunch());
|
||||
GUEST_ASSERT(vmresume());
|
||||
|
||||
vmwrite(GUEST_RIP, 0xc0ffee);
|
||||
GUEST_SYNC(8);
|
||||
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
|
||||
|
||||
GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
|
||||
GUEST_ASSERT(!vmresume());
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
|
||||
|
||||
GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
|
||||
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
|
||||
GUEST_ASSERT(vmlaunch());
|
||||
GUEST_ASSERT(vmresume());
|
||||
GUEST_SYNC(12);
|
||||
GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
|
||||
GUEST_ASSERT(vmlaunch());
|
||||
GUEST_ASSERT(vmresume());
|
||||
}
|
||||
|
||||
void guest_code(struct vmx_pages *vmx_pages)
|
||||
{
|
||||
GUEST_SYNC(1);
|
||||
GUEST_SYNC(2);
|
||||
|
||||
if (vmx_pages)
|
||||
l1_guest_code(vmx_pages);
|
||||
|
||||
exit_to_l0(PORT_DONE, 0, 0);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct vmx_pages *vmx_pages = NULL;
|
||||
vm_vaddr_t vmx_pages_gva = 0;
|
||||
|
||||
struct kvm_regs regs1, regs2;
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_run *run;
|
||||
struct kvm_x86_state *state;
|
||||
int stage;
|
||||
|
||||
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, guest_code);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s1);
|
||||
|
||||
if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
|
||||
vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
|
||||
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
|
||||
} else {
|
||||
printf("will skip nested state checks\n");
|
||||
vcpu_args_set(vm, VCPU_ID, 1, 0);
|
||||
}
|
||||
|
||||
for (stage = 1;; stage++) {
|
||||
_vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Unexpected exit reason: %u (%s),\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
memset(®s1, 0, sizeof(regs1));
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s1);
|
||||
switch (run->io.port) {
|
||||
case PORT_ABORT:
|
||||
TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
|
||||
__FILE__, regs1.rsi);
|
||||
/* NOT REACHED */
|
||||
case PORT_SYNC:
|
||||
break;
|
||||
case PORT_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
|
||||
}
|
||||
|
||||
/* PORT_SYNC is handled here. */
|
||||
TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
|
||||
regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
|
||||
stage, (ulong) regs1.rsi);
|
||||
|
||||
state = vcpu_save_state(vm, VCPU_ID);
|
||||
kvm_vm_release(vm);
|
||||
|
||||
/* Restore state in a new VM. */
|
||||
kvm_vm_restart(vm, O_RDWR);
|
||||
vm_vcpu_add(vm, VCPU_ID, 0, 0);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
vcpu_load_state(vm, VCPU_ID, state);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
free(state);
|
||||
|
||||
memset(®s2, 0, sizeof(regs2));
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s2);
|
||||
TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
|
||||
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
|
||||
(ulong) regs2.rdi, (ulong) regs2.rsi);
|
||||
}
|
||||
|
||||
done:
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
@@ -46,11 +46,6 @@ enum {
|
||||
PORT_DONE,
|
||||
};
|
||||
|
||||
struct vmx_page {
|
||||
vm_vaddr_t virt;
|
||||
vm_paddr_t phys;
|
||||
};
|
||||
|
||||
enum {
|
||||
VMXON_PAGE = 0,
|
||||
VMCS_PAGE,
|
||||
@@ -67,9 +62,6 @@ struct kvm_single_msr {
|
||||
/* The virtual machine object. */
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
/* Array of vmx_page descriptors that is shared with the guest. */
|
||||
struct vmx_page *vmx_pages;
|
||||
|
||||
#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
|
||||
static void do_exit_to_l0(uint16_t port, unsigned long arg)
|
||||
{
|
||||
@@ -105,7 +97,7 @@ static void l2_guest_code(void)
|
||||
__asm__ __volatile__("vmcall");
|
||||
}
|
||||
|
||||
static void l1_guest_code(struct vmx_page *vmx_pages)
|
||||
static void l1_guest_code(struct vmx_pages *vmx_pages)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
@@ -116,23 +108,14 @@ static void l1_guest_code(struct vmx_page *vmx_pages)
|
||||
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
|
||||
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
|
||||
|
||||
prepare_for_vmx_operation();
|
||||
|
||||
/* Enter VMX root operation. */
|
||||
*(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision();
|
||||
GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys));
|
||||
|
||||
/* Load a VMCS. */
|
||||
*(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision();
|
||||
GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys));
|
||||
GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys));
|
||||
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
|
||||
|
||||
/* Prepare the VMCS for L2 execution. */
|
||||
prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
prepare_vmcs(vmx_pages, l2_guest_code,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
|
||||
control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
|
||||
vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
|
||||
vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys);
|
||||
vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
|
||||
|
||||
/* Jump into L2. First, test failure to load guest CR3. */
|
||||
@@ -152,33 +135,6 @@ static void l1_guest_code(struct vmx_page *vmx_pages)
|
||||
exit_to_l0(PORT_DONE, 0);
|
||||
}
|
||||
|
||||
static void allocate_vmx_page(struct vmx_page *page)
|
||||
{
|
||||
vm_vaddr_t virt;
|
||||
|
||||
virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0);
|
||||
memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE);
|
||||
|
||||
page->virt = virt;
|
||||
page->phys = addr_gva2gpa(vm, virt);
|
||||
}
|
||||
|
||||
static vm_vaddr_t allocate_vmx_pages(void)
|
||||
{
|
||||
vm_vaddr_t vmx_pages_vaddr;
|
||||
int i;
|
||||
|
||||
vmx_pages_vaddr = vm_vaddr_alloc(
|
||||
vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0);
|
||||
|
||||
vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr);
|
||||
|
||||
for (i = 0; i < NUM_VMX_PAGES; i++)
|
||||
allocate_vmx_page(&vmx_pages[i]);
|
||||
|
||||
return vmx_pages_vaddr;
|
||||
}
|
||||
|
||||
void report(int64_t val)
|
||||
{
|
||||
printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
|
||||
@@ -187,7 +143,8 @@ void report(int64_t val)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
vm_vaddr_t vmx_pages_vaddr;
|
||||
struct vmx_pages *vmx_pages;
|
||||
vm_vaddr_t vmx_pages_gva;
|
||||
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
|
||||
|
||||
if (!(entry->ecx & CPUID_VMX)) {
|
||||
@@ -195,23 +152,23 @@ int main(int argc, char *argv[])
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
|
||||
vm = vm_create_default(VCPU_ID, (void *) l1_guest_code);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
|
||||
/* Allocate VMX pages and shared descriptors (vmx_pages). */
|
||||
vmx_pages_vaddr = allocate_vmx_pages();
|
||||
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr);
|
||||
vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
|
||||
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
|
||||
|
||||
for (;;) {
|
||||
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
|
||||
struct kvm_regs regs;
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Got exit_reason other than KVM_EXIT_IO: %u (%s),\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Got exit_reason other than KVM_EXIT_IO: %u (%s), rip=%lx\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason), regs.rip);
|
||||
|
||||
switch (run->io.port) {
|
||||
case PORT_ABORT:
|
||||
|
||||
@@ -273,7 +273,8 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
|
||||
* barrier here.
|
||||
*/
|
||||
if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
|
||||
if (!kvm_arch_flush_remote_tlb(kvm)
|
||||
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
|
||||
++kvm->stat.remote_tlb_flush;
|
||||
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
|
||||
}
|
||||
@@ -1169,7 +1170,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
|
||||
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
|
||||
memset(dirty_bitmap_buffer, 0, n);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
@@ -1342,18 +1343,16 @@ static inline int check_user_page_hwpoison(unsigned long addr)
|
||||
}
|
||||
|
||||
/*
|
||||
* The atomic path to get the writable pfn which will be stored in @pfn,
|
||||
* true indicates success, otherwise false is returned.
|
||||
* The fast path to get the writable pfn which will be stored in @pfn,
|
||||
* true indicates success, otherwise false is returned. It's also the
|
||||
* only part that runs if we can are in atomic context.
|
||||
*/
|
||||
static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
|
||||
bool write_fault, bool *writable, kvm_pfn_t *pfn)
|
||||
static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
|
||||
bool *writable, kvm_pfn_t *pfn)
|
||||
{
|
||||
struct page *page[1];
|
||||
int npages;
|
||||
|
||||
if (!(async || atomic))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Fast pin a writable pfn only if it is a write fault request
|
||||
* or the caller allows to map a writable pfn for a read fault
|
||||
@@ -1497,7 +1496,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
|
||||
/* we can do it either atomically or asynchronously, not both */
|
||||
BUG_ON(atomic && async);
|
||||
|
||||
if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
|
||||
if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
|
||||
return pfn;
|
||||
|
||||
if (atomic)
|
||||
@@ -2127,16 +2126,22 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret = -EINTR;
|
||||
int idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_UNHALT, vcpu);
|
||||
return -EINTR;
|
||||
goto out;
|
||||
}
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
return -EINTR;
|
||||
goto out;
|
||||
if (signal_pending(current))
|
||||
return -EINTR;
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
ret = 0;
|
||||
out:
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user