5a8781607e
Currently, host_rsp is cached on a per-vCPU basis, i.e. it's stored in struct vcpu_vmx. In non-nested usage the caching is for all intents and purposes 100% effective, e.g. only the first VMLAUNCH needs to synchronize VMCS.HOST_RSP since the call stack to vmx_vcpu_run() is identical each and every time. But when running a nested guest, KVM must invalidate the cache when switching the current VMCS as it can't guarantee the new VMCS has the same HOST_RSP as the previous VMCS. In other words, the cache loses almost all of its efficacy when running a nested VM. Move host_rsp to struct vmcs_host_state, which is per-VMCS, so that it is cached on a per-VMCS basis and restores its 100% hit rate when nested VMs are in play. Note that the host_rsp cache for vmcs02 essentially "breaks" when nested early checks are enabled as nested_vmx_check_vmentry_hw() will see a different RSP at the time of its VM-Enter. While it's possible to avoid even that VMCS.HOST_RSP synchronization, e.g. by employing a dedicated VM-Exit stack, there is little motivation for doing so as the overhead of two VMWRITEs (~55 cycles) is dwarfed by the overhead of the extra VMX transition (600+ cycles) and is a proverbial drop in the ocean relative to the total cost of a nested transtion (10s of thousands of cycles). Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Reviewed-by: Jim Mattson <jmattson@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
138 lines
3.2 KiB
C
138 lines
3.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __KVM_X86_VMX_VMCS_H
|
|
#define __KVM_X86_VMX_VMCS_H
|
|
|
|
#include <linux/ktime.h>
|
|
#include <linux/list.h>
|
|
#include <linux/nospec.h>
|
|
|
|
#include <asm/kvm.h>
|
|
#include <asm/vmx.h>
|
|
|
|
#include "capabilities.h"
|
|
|
|
struct vmcs_hdr {
|
|
u32 revision_id:31;
|
|
u32 shadow_vmcs:1;
|
|
};
|
|
|
|
struct vmcs {
|
|
struct vmcs_hdr hdr;
|
|
u32 abort;
|
|
char data[0];
|
|
};
|
|
|
|
DECLARE_PER_CPU(struct vmcs *, current_vmcs);
|
|
|
|
/*
|
|
* vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
|
|
* and whose values change infrequently, but are not constant. I.e. this is
|
|
* used as a write-through cache of the corresponding VMCS fields.
|
|
*/
|
|
struct vmcs_host_state {
|
|
unsigned long cr3; /* May not match real cr3 */
|
|
unsigned long cr4; /* May not match real cr4 */
|
|
unsigned long gs_base;
|
|
unsigned long fs_base;
|
|
unsigned long rsp;
|
|
|
|
u16 fs_sel, gs_sel, ldt_sel;
|
|
#ifdef CONFIG_X86_64
|
|
u16 ds_sel, es_sel;
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
|
|
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
|
|
* loaded on this CPU (so we can clear them if the CPU goes down).
|
|
*/
|
|
struct loaded_vmcs {
|
|
struct vmcs *vmcs;
|
|
struct vmcs *shadow_vmcs;
|
|
int cpu;
|
|
bool launched;
|
|
bool nmi_known_unmasked;
|
|
bool hv_timer_armed;
|
|
/* Support for vnmi-less CPUs */
|
|
int soft_vnmi_blocked;
|
|
ktime_t entry_time;
|
|
s64 vnmi_blocked_time;
|
|
unsigned long *msr_bitmap;
|
|
struct list_head loaded_vmcss_on_cpu_link;
|
|
struct vmcs_host_state host_state;
|
|
};
|
|
|
|
static inline bool is_exception_n(u32 intr_info, u8 vector)
|
|
{
|
|
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
|
|
INTR_INFO_VALID_MASK)) ==
|
|
(INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
|
|
}
|
|
|
|
static inline bool is_debug(u32 intr_info)
|
|
{
|
|
return is_exception_n(intr_info, DB_VECTOR);
|
|
}
|
|
|
|
static inline bool is_breakpoint(u32 intr_info)
|
|
{
|
|
return is_exception_n(intr_info, BP_VECTOR);
|
|
}
|
|
|
|
static inline bool is_page_fault(u32 intr_info)
|
|
{
|
|
return is_exception_n(intr_info, PF_VECTOR);
|
|
}
|
|
|
|
static inline bool is_invalid_opcode(u32 intr_info)
|
|
{
|
|
return is_exception_n(intr_info, UD_VECTOR);
|
|
}
|
|
|
|
static inline bool is_gp_fault(u32 intr_info)
|
|
{
|
|
return is_exception_n(intr_info, GP_VECTOR);
|
|
}
|
|
|
|
static inline bool is_machine_check(u32 intr_info)
|
|
{
|
|
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
|
|
INTR_INFO_VALID_MASK)) ==
|
|
(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
|
|
}
|
|
|
|
/* Undocumented: icebp/int1 */
|
|
static inline bool is_icebp(u32 intr_info)
|
|
{
|
|
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
|
== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
|
|
}
|
|
|
|
static inline bool is_nmi(u32 intr_info)
|
|
{
|
|
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
|
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
|
|
}
|
|
|
|
enum vmcs_field_width {
|
|
VMCS_FIELD_WIDTH_U16 = 0,
|
|
VMCS_FIELD_WIDTH_U64 = 1,
|
|
VMCS_FIELD_WIDTH_U32 = 2,
|
|
VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
|
|
};
|
|
|
|
static inline int vmcs_field_width(unsigned long field)
|
|
{
|
|
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
|
|
return VMCS_FIELD_WIDTH_U32;
|
|
return (field >> 13) & 0x3;
|
|
}
|
|
|
|
static inline int vmcs_field_readonly(unsigned long field)
|
|
{
|
|
return (((field >> 10) & 0x3) == 1);
|
|
}
|
|
|
|
#endif /* __KVM_X86_VMX_VMCS_H */
|