mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 04:02:20 +00:00
Merge remote-tracking branch 'kvm/queue' into HEAD
x86 Xen-for-KVM: * Allow the Xen runstate information to cross a page boundary * Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured * add support for 32-bit guests in SCHEDOP_poll x86 fixes: * One-off fixes for various emulation flows (SGX, VMXON, NRIPS=0). * Reinstate IBPB on emulated VM-Exit that was incorrectly dropped a few years back when eliminating unnecessary barriers when switching between vmcs01 and vmcs02. * Clean up the MSR filter docs. * Clean up vmread_error_trampoline() to make it more obvious that params must be passed on the stack, even for x86-64. * Let userspace set all supported bits in MSR_IA32_FEAT_CTL irrespective of the current guest CPUID. * Fudge around a race with TSC refinement that results in KVM incorrectly thinking a guest needs TSC scaling when running on a CPU with a constant TSC, but no hardware-enumerated TSC frequency. * Advertise (on AMD) that the SMM_CTL MSR is not supported * Remove unnecessary exports Selftests: * Fix an inverted check in the access tracking perf test, and restore support for asserting that there aren't too many idle pages when running on bare metal. * Fix an ordering issue in the AMX test introduced by recent conversions to use kvm_cpu_has(), and harden the code to guard against similar bugs in the future. Anything that tiggers caching of KVM's supported CPUID, kvm_cpu_has() in this case, effectively hides opt-in XSAVE features if the caching occurs before the test opts in via prctl(). * Fix build errors that occur in certain setups (unsure exactly what is unique about the problematic setup) due to glibc overriding static_assert() to a variant that requires a custom message. * Introduce actual atomics for clear/set_bit() in selftests Documentation: * Remove deleted ioctls from documentation * Various fixes
This commit is contained in:
commit
9352e7470a
@ -272,18 +272,6 @@ the VCPU file descriptor can be mmap-ed, including:
|
||||
KVM_CAP_DIRTY_LOG_RING, see section 8.3.
|
||||
|
||||
|
||||
4.6 KVM_SET_MEMORY_REGION
|
||||
-------------------------
|
||||
|
||||
:Capability: basic
|
||||
:Architectures: all
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_memory_region (in)
|
||||
:Returns: 0 on success, -1 on error
|
||||
|
||||
This ioctl is obsolete and has been removed.
|
||||
|
||||
|
||||
4.7 KVM_CREATE_VCPU
|
||||
-------------------
|
||||
|
||||
@ -368,17 +356,6 @@ see the description of the capability.
|
||||
Note that the Xen shared info page, if configured, shall always be assumed
|
||||
to be dirty. KVM will not explicitly mark it such.
|
||||
|
||||
4.9 KVM_SET_MEMORY_ALIAS
|
||||
------------------------
|
||||
|
||||
:Capability: basic
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_memory_alias (in)
|
||||
:Returns: 0 (success), -1 (error)
|
||||
|
||||
This ioctl is obsolete and has been removed.
|
||||
|
||||
|
||||
4.10 KVM_RUN
|
||||
------------
|
||||
@ -1332,7 +1309,7 @@ yet and must be cleared on entry.
|
||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||
};
|
||||
|
||||
/* for kvm_memory_region::flags */
|
||||
/* for kvm_userspace_memory_region::flags */
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||
#define KVM_MEM_READONLY (1UL << 1)
|
||||
|
||||
@ -1377,10 +1354,6 @@ the memory region are automatically reflected into the guest. For example, an
|
||||
mmap() that affects the region will be made visible immediately. Another
|
||||
example is madvise(MADV_DROP).
|
||||
|
||||
It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
|
||||
The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
|
||||
allocation and is deprecated.
|
||||
|
||||
|
||||
4.36 KVM_SET_TSS_ADDR
|
||||
---------------------
|
||||
@ -3293,6 +3266,7 @@ valid entries found.
|
||||
----------------------
|
||||
|
||||
:Capability: KVM_CAP_DEVICE_CTRL
|
||||
:Architectures: all
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_create_device (in/out)
|
||||
:Returns: 0 on success, -1 on error
|
||||
@ -3333,6 +3307,7 @@ number.
|
||||
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
|
||||
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
|
||||
KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set)
|
||||
:Architectures: x86, arm64, s390
|
||||
:Type: device ioctl, vm ioctl, vcpu ioctl
|
||||
:Parameters: struct kvm_device_attr
|
||||
:Returns: 0 on success, -1 on error
|
||||
@ -4104,80 +4079,71 @@ flags values for ``struct kvm_msr_filter_range``:
|
||||
``KVM_MSR_FILTER_READ``
|
||||
|
||||
Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
|
||||
indicates that a read should immediately fail, while a 1 indicates that
|
||||
a read for a particular MSR should be handled regardless of the default
|
||||
indicates that read accesses should be denied, while a 1 indicates that
|
||||
a read for a particular MSR should be allowed regardless of the default
|
||||
filter action.
|
||||
|
||||
``KVM_MSR_FILTER_WRITE``
|
||||
|
||||
Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
|
||||
indicates that a write should immediately fail, while a 1 indicates that
|
||||
a write for a particular MSR should be handled regardless of the default
|
||||
indicates that write accesses should be denied, while a 1 indicates that
|
||||
a write for a particular MSR should be allowed regardless of the default
|
||||
filter action.
|
||||
|
||||
``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
|
||||
|
||||
Filter both read and write accesses to MSRs using the given bitmap. A 0
|
||||
in the bitmap indicates that both reads and writes should immediately fail,
|
||||
while a 1 indicates that reads and writes for a particular MSR are not
|
||||
filtered by this range.
|
||||
|
||||
flags values for ``struct kvm_msr_filter``:
|
||||
|
||||
``KVM_MSR_FILTER_DEFAULT_ALLOW``
|
||||
|
||||
If no filter range matches an MSR index that is getting accessed, KVM will
|
||||
fall back to allowing access to the MSR.
|
||||
allow accesses to all MSRs by default.
|
||||
|
||||
``KVM_MSR_FILTER_DEFAULT_DENY``
|
||||
|
||||
If no filter range matches an MSR index that is getting accessed, KVM will
|
||||
fall back to rejecting access to the MSR. In this mode, all MSRs that should
|
||||
be processed by KVM need to explicitly be marked as allowed in the bitmaps.
|
||||
deny accesses to all MSRs by default.
|
||||
|
||||
This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
|
||||
specify whether a certain MSR access should be explicitly filtered for or not.
|
||||
This ioctl allows userspace to define up to 16 bitmaps of MSR ranges to deny
|
||||
guest MSR accesses that would normally be allowed by KVM. If an MSR is not
|
||||
covered by a specific range, the "default" filtering behavior applies. Each
|
||||
bitmap range covers MSRs from [base .. base+nmsrs).
|
||||
|
||||
If this ioctl has never been invoked, MSR accesses are not guarded and the
|
||||
default KVM in-kernel emulation behavior is fully preserved.
|
||||
If an MSR access is denied by userspace, the resulting KVM behavior depends on
|
||||
whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is
|
||||
enabled. If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace
|
||||
on denied accesses, i.e. userspace effectively intercepts the MSR access. If
|
||||
KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest
|
||||
on denied accesses.
|
||||
|
||||
If an MSR access is allowed by userspace, KVM will emulate and/or virtualize
|
||||
the access in accordance with the vCPU model. Note, KVM may still ultimately
|
||||
inject a #GP if an access is allowed by userspace, e.g. if KVM doesn't support
|
||||
the MSR, or to follow architectural behavior for the MSR.
|
||||
|
||||
By default, KVM operates in KVM_MSR_FILTER_DEFAULT_ALLOW mode with no MSR range
|
||||
filters.
|
||||
|
||||
Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
|
||||
filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
|
||||
an error.
|
||||
|
||||
As soon as the filtering is in place, every MSR access is processed through
|
||||
the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
|
||||
x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
|
||||
and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
|
||||
register.
|
||||
|
||||
.. warning::
|
||||
MSR accesses coming from nested vmentry/vmexit are not filtered.
|
||||
MSR accesses as part of nested VM-Enter/VM-Exit are not filtered.
|
||||
This includes both writes to individual VMCS fields and reads/writes
|
||||
through the MSR lists pointed to by the VMCS.
|
||||
|
||||
If a bit is within one of the defined ranges, read and write accesses are
|
||||
guarded by the bitmap's value for the MSR index if the kind of access
|
||||
is included in the ``struct kvm_msr_filter_range`` flags. If no range
|
||||
cover this particular access, the behavior is determined by the flags
|
||||
field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
|
||||
and ``KVM_MSR_FILTER_DEFAULT_DENY``.
|
||||
|
||||
Each bitmap range specifies a range of MSRs to potentially allow access on.
|
||||
The range goes from MSR index [base .. base+nmsrs]. The flags field
|
||||
indicates whether reads, writes or both reads and writes are filtered
|
||||
by setting a 1 bit in the bitmap for the corresponding MSR index.
|
||||
|
||||
If an MSR access is not permitted through the filtering, it generates a
|
||||
#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
|
||||
allows user space to deflect and potentially handle various MSR accesses
|
||||
into user space.
|
||||
x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that
|
||||
cover any x2APIC MSRs).
|
||||
|
||||
Note, invoking this ioctl while a vCPU is running is inherently racy. However,
|
||||
KVM does guarantee that vCPUs will see either the previous filter or the new
|
||||
filter, e.g. MSRs with identical settings in both the old and new filter will
|
||||
have deterministic behavior.
|
||||
|
||||
Similarly, if userspace wishes to intercept on denied accesses,
|
||||
KVM_MSR_EXIT_REASON_FILTER must be enabled before activating any filters, and
|
||||
left enabled until after all filters are deactivated. Failure to do so may
|
||||
result in KVM injecting a #GP instead of exiting to userspace.
|
||||
|
||||
4.98 KVM_CREATE_SPAPR_TCE_64
|
||||
----------------------------
|
||||
|
||||
@ -5339,6 +5305,7 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
|
||||
union {
|
||||
__u8 long_mode;
|
||||
__u8 vector;
|
||||
__u8 runstate_update_flag;
|
||||
struct {
|
||||
__u64 gfn;
|
||||
} shared_info;
|
||||
@ -5416,6 +5383,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION
|
||||
event channel delivery, so responding within the kernel without
|
||||
exiting to userspace is beneficial.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
|
||||
XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
|
||||
other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
|
||||
the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
|
||||
hypercall.
|
||||
|
||||
4.127 KVM_XEN_HVM_GET_ATTR
|
||||
--------------------------
|
||||
|
||||
@ -6473,31 +6448,33 @@ if it decides to decode and emulate the instruction.
|
||||
|
||||
Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is
|
||||
enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code
|
||||
will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
|
||||
may instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
|
||||
exit for writes.
|
||||
|
||||
The "reason" field specifies why the MSR trap occurred. User space will only
|
||||
receive MSR exit traps when a particular reason was requested during through
|
||||
The "reason" field specifies why the MSR interception occurred. Userspace will
|
||||
only receive MSR exits when a particular reason was requested during through
|
||||
ENABLE_CAP. Currently valid exit reasons are:
|
||||
|
||||
KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
|
||||
KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
|
||||
KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER
|
||||
|
||||
For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest
|
||||
wants to read. To respond to this request with a successful read, user space
|
||||
For KVM_EXIT_X86_RDMSR, the "index" field tells userspace which MSR the guest
|
||||
wants to read. To respond to this request with a successful read, userspace
|
||||
writes the respective data into the "data" field and must continue guest
|
||||
execution to ensure the read data is transferred into guest register state.
|
||||
|
||||
If the RDMSR request was unsuccessful, user space indicates that with a "1" in
|
||||
If the RDMSR request was unsuccessful, userspace indicates that with a "1" in
|
||||
the "error" field. This will inject a #GP into the guest when the VCPU is
|
||||
executed again.
|
||||
|
||||
For KVM_EXIT_X86_WRMSR, the "index" field tells user space which MSR the guest
|
||||
wants to write. Once finished processing the event, user space must continue
|
||||
vCPU execution. If the MSR write was unsuccessful, user space also sets the
|
||||
For KVM_EXIT_X86_WRMSR, the "index" field tells userspace which MSR the guest
|
||||
wants to write. Once finished processing the event, userspace must continue
|
||||
vCPU execution. If the MSR write was unsuccessful, userspace also sets the
|
||||
"error" field to "1".
|
||||
|
||||
See KVM_X86_SET_MSR_FILTER for details on the interaction with MSR filtering.
|
||||
|
||||
::
|
||||
|
||||
|
||||
@ -7263,19 +7240,27 @@ the module parameter for the target VM.
|
||||
:Parameters: args[0] contains the mask of KVM_MSR_EXIT_REASON_* events to report
|
||||
:Returns: 0 on success; -1 on error
|
||||
|
||||
This capability enables trapping of #GP invoking RDMSR and WRMSR instructions
|
||||
into user space.
|
||||
This capability allows userspace to intercept RDMSR and WRMSR instructions if
|
||||
access to an MSR is denied. By default, KVM injects #GP on denied accesses.
|
||||
|
||||
When a guest requests to read or write an MSR, KVM may not implement all MSRs
|
||||
that are relevant to a respective system. It also does not differentiate by
|
||||
CPU type.
|
||||
|
||||
To allow more fine grained control over MSR handling, user space may enable
|
||||
To allow more fine grained control over MSR handling, userspace may enable
|
||||
this capability. With it enabled, MSR accesses that match the mask specified in
|
||||
args[0] and trigger a #GP event inside the guest by KVM will instead trigger
|
||||
KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
|
||||
can then handle to implement model specific MSR handling and/or user notifications
|
||||
to inform a user that an MSR was not handled.
|
||||
args[0] and would trigger a #GP inside the guest will instead trigger
|
||||
KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. Userspace
|
||||
can then implement model specific MSR handling and/or user notifications
|
||||
to inform a user that an MSR was not emulated/virtualized by KVM.
|
||||
|
||||
The valid mask flags are:
|
||||
|
||||
KVM_MSR_EXIT_REASON_UNKNOWN - intercept accesses to unknown (to KVM) MSRs
|
||||
KVM_MSR_EXIT_REASON_INVAL - intercept accesses that are architecturally
|
||||
invalid according to the vCPU model and/or mode
|
||||
KVM_MSR_EXIT_REASON_FILTER - intercept accesses that are denied by userspace
|
||||
via KVM_X86_SET_MSR_FILTER
|
||||
|
||||
7.22 KVM_CAP_X86_BUS_LOCK_EXIT
|
||||
-------------------------------
|
||||
@ -7936,7 +7921,7 @@ KVM_EXIT_X86_WRMSR exit notifications.
|
||||
This capability indicates that KVM supports that accesses to user defined MSRs
|
||||
may be rejected. With this capability exposed, KVM exports new VM ioctl
|
||||
KVM_X86_SET_MSR_FILTER which user space can call to specify bitmaps of MSR
|
||||
ranges that KVM should reject access to.
|
||||
ranges that KVM should deny access to.
|
||||
|
||||
In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
|
||||
trap and emulate MSRs that are outside of the scope of KVM as well as
|
||||
@ -8080,12 +8065,13 @@ KVM device "kvm-arm-vgic-its" when dirty ring is enabled.
|
||||
This capability indicates the features that Xen supports for hosting Xen
|
||||
PVHVM guests. Valid flags are::
|
||||
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
@ -8117,6 +8103,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
|
||||
related to event channel delivery, timers, and the XENVER_version
|
||||
interception.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports
|
||||
the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR
|
||||
and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the
|
||||
XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during
|
||||
updates of the runstate information. Note that versions of KVM which support
|
||||
the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will
|
||||
always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure,
|
||||
which is perhaps counterintuitive. When this flag is advertised, KVM will
|
||||
behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
|
||||
specifically enabled (by the guest making the hypercall, causing the VMM
|
||||
to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
|
||||
|
||||
8.31 KVM_CAP_PPC_MULTITCE
|
||||
-------------------------
|
||||
|
||||
|
@ -686,6 +686,7 @@ struct kvm_vcpu_xen {
|
||||
struct gfn_to_pfn_cache vcpu_info_cache;
|
||||
struct gfn_to_pfn_cache vcpu_time_info_cache;
|
||||
struct gfn_to_pfn_cache runstate_cache;
|
||||
struct gfn_to_pfn_cache runstate2_cache;
|
||||
u64 last_steal;
|
||||
u64 runstate_entry_time;
|
||||
u64 runstate_times[4];
|
||||
@ -1112,6 +1113,7 @@ struct msr_bitmap_range {
|
||||
struct kvm_xen {
|
||||
u32 xen_version;
|
||||
bool long_mode;
|
||||
bool runstate_update_flag;
|
||||
u8 upcall_vector;
|
||||
struct gfn_to_pfn_cache shinfo_cache;
|
||||
struct idr evtchn_ports;
|
||||
|
@ -53,14 +53,6 @@
|
||||
/* Architectural interrupt line count. */
|
||||
#define KVM_NR_INTERRUPTS 256
|
||||
|
||||
struct kvm_memory_alias {
|
||||
__u32 slot; /* this has a different namespace than memory slots */
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size;
|
||||
__u64 target_phys_addr;
|
||||
};
|
||||
|
||||
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
|
||||
struct kvm_pic_state {
|
||||
__u8 last_irr; /* edge detection */
|
||||
|
@ -1233,8 +1233,12 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
* Other defined bits are for MSRs that KVM does not expose:
|
||||
* EAX 3 SPCL, SMM page configuration lock
|
||||
* EAX 13 PCMSR, Prefetch control MSR
|
||||
*
|
||||
* KVM doesn't support SMM_CTL.
|
||||
* EAX 9 SMM_CTL MSR is not supported
|
||||
*/
|
||||
entry->eax &= BIT(0) | BIT(2) | BIT(6);
|
||||
entry->eax |= BIT(9);
|
||||
if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC))
|
||||
entry->eax |= BIT(2);
|
||||
if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
|
||||
|
@ -31,7 +31,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
|
||||
|
||||
/*
|
||||
* check if there is a pending userspace external interrupt
|
||||
@ -150,7 +149,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
|
||||
if (kvm_xen_timer_enabled(vcpu))
|
||||
kvm_xen_inject_timer_irqs(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
|
||||
|
||||
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -160,7 +160,6 @@ bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
|
||||
&& !(kvm_mwait_in_guest(vcpu->kvm) ||
|
||||
kvm_can_post_timer_interrupt(vcpu));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
|
||||
|
||||
static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -1914,7 +1913,6 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
|
||||
|
||||
return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
|
||||
|
||||
static void cancel_hv_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
@ -2432,7 +2430,6 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
|
||||
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
|
||||
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
@ -2724,8 +2721,6 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
|
||||
icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
|
||||
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
|
||||
}
|
||||
} else {
|
||||
kvm_lapic_xapic_id_updated(vcpu->arch.apic);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -2761,6 +2756,9 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
||||
}
|
||||
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
|
||||
|
||||
if (!apic_x2apic_mode(apic))
|
||||
kvm_lapic_xapic_id_updated(apic);
|
||||
|
||||
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
|
||||
kvm_recalculate_apic_map(vcpu->kvm);
|
||||
kvm_apic_set_version(vcpu);
|
||||
|
@ -465,9 +465,9 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
|
||||
return;
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
page_virtual = kmap_atomic(pages[i]);
|
||||
page_virtual = kmap_local_page(pages[i]);
|
||||
clflush_cache_range(page_virtual, PAGE_SIZE);
|
||||
kunmap_atomic(page_virtual);
|
||||
kunmap_local(page_virtual);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
@ -3895,8 +3895,14 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
|
||||
to_svm(vcpu)->vmcb->control.exit_info_1)
|
||||
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
|
||||
|
||||
/*
|
||||
* Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
|
||||
* can't read guest memory (dereference memslots) to decode the WRMSR.
|
||||
*/
|
||||
if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
|
||||
nrips && control->next_rip)
|
||||
return handle_fastpath_set_msr_irqoff(vcpu);
|
||||
|
||||
return EXIT_FASTPATH_NONE;
|
||||
|
@ -2588,12 +2588,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
nested_ept_init_mmu_context(vcpu);
|
||||
|
||||
/*
|
||||
* This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
|
||||
* bits which we consider mandatory enabled.
|
||||
* The CR0_READ_SHADOW is what L2 should have expected to read given
|
||||
* the specifications by L1; It's not enough to take
|
||||
* vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we
|
||||
* have more bits than L1 expected.
|
||||
* Override the CR0/CR4 read shadows after setting the effective guest
|
||||
* CR0/CR4. The common helpers also set the shadows, but they don't
|
||||
* account for vmcs12's cr0/4_guest_host_mask.
|
||||
*/
|
||||
vmx_set_cr0(vcpu, vmcs12->guest_cr0);
|
||||
vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
|
||||
@ -4798,6 +4795,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
||||
|
||||
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
||||
|
||||
/*
|
||||
* If IBRS is advertised to the vCPU, KVM must flush the indirect
|
||||
* branch predictors when transitioning from L2 to L1, as L1 expects
|
||||
* hardware (KVM in this case) to provide separate predictor modes.
|
||||
* Bare metal isolates VMX root (host) from VMX non-root (guest), but
|
||||
* doesn't isolate different VMCSs, i.e. in this case, doesn't provide
|
||||
* separate modes for L2 vs L1.
|
||||
*/
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
||||
indirect_branch_prediction_barrier();
|
||||
|
||||
/* Update any VMCS fields that might have changed while L2 ran */
|
||||
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
||||
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
||||
@ -5131,24 +5139,35 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
|
||||
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
|
||||
|
||||
/*
|
||||
* Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
|
||||
* that have higher priority than VM-Exit (see Intel SDM's pseudocode
|
||||
* for VMXON), as KVM must load valid CR0/CR4 values into hardware while
|
||||
* running the guest, i.e. KVM needs to check the _guest_ values.
|
||||
* Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
|
||||
* the guest and so cannot rely on hardware to perform the check,
|
||||
* which has higher priority than VM-Exit (see Intel SDM's pseudocode
|
||||
* for VMXON).
|
||||
*
|
||||
* Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
|
||||
* !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
|
||||
* Mode, but KVM will never take the guest out of those modes.
|
||||
* Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
|
||||
* and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't
|
||||
* force any of the relevant guest state. For a restricted guest, KVM
|
||||
* does force CR0.PE=1, but only to also force VM86 in order to emulate
|
||||
* Real Mode, and so there's no need to check CR0.PE manually.
|
||||
*/
|
||||
if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
|
||||
!nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
|
||||
if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* CPL=0 and all other checks that are lower priority than VM-Exit must
|
||||
* be checked manually.
|
||||
* The CPL is checked for "not in VMX operation" and for "in VMX root",
|
||||
* and has higher priority than the VM-Fail due to being post-VMXON,
|
||||
* i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root,
|
||||
* VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
|
||||
* from L2 to L1, i.e. there's no need to check for the vCPU being in
|
||||
* VMX non-root.
|
||||
*
|
||||
* Forwarding the VM-Exit unconditionally, i.e. without performing the
|
||||
* #UD checks (see above), is functionally ok because KVM doesn't allow
|
||||
* L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
|
||||
* CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
|
||||
* missed by hardware due to shadowing CR0 and/or CR4.
|
||||
*/
|
||||
if (vmx_get_cpl(vcpu)) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
@ -5158,6 +5177,17 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
|
||||
if (vmx->nested.vmxon)
|
||||
return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
|
||||
|
||||
/*
|
||||
* Invalid CR0/CR4 generates #GP. These checks are performed if and
|
||||
* only if the vCPU isn't already in VMX operation, i.e. effectively
|
||||
* have lower priority than the VM-Fail above.
|
||||
*/
|
||||
if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
|
||||
!nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
|
||||
!= VMXON_NEEDED_FEATURES) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
|
@ -79,9 +79,10 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the cr0 value that a nested guest would read. This is a combination
|
||||
* of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
|
||||
* its hypervisor (cr0_read_shadow).
|
||||
* Return the cr0/4 value that a nested guest would read. This is a combination
|
||||
* of L1's "real" cr0 used to run the guest (guest_cr0), and the bits shadowed
|
||||
* by the L1 hypervisor (cr0_read_shadow). KVM must emulate CPU behavior as
|
||||
* the value+mask loaded into vmcs02 may not match the vmcs12 fields.
|
||||
*/
|
||||
static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
|
||||
{
|
||||
|
@ -182,8 +182,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
|
||||
/* Enforce CPUID restriction on max enclave size. */
|
||||
max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
|
||||
sgx_12_0->edx;
|
||||
if (size >= BIT_ULL(max_size_log2))
|
||||
if (size >= BIT_ULL(max_size_log2)) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* sgx_virt_ecreate() returns:
|
||||
|
@ -269,6 +269,7 @@ SYM_FUNC_END(__vmx_vcpu_run)
|
||||
|
||||
.section .text, "ax"
|
||||
|
||||
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
|
||||
/**
|
||||
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
|
||||
* @field: VMCS field encoding that failed
|
||||
@ -317,6 +318,7 @@ SYM_FUNC_START(vmread_error_trampoline)
|
||||
|
||||
RET
|
||||
SYM_FUNC_END(vmread_error_trampoline)
|
||||
#endif
|
||||
|
||||
SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
|
||||
/*
|
||||
|
@ -858,7 +858,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
|
||||
* to change it directly without causing a vmexit. In that case read
|
||||
* it after vmexit and store it in vmx->spec_ctrl.
|
||||
*/
|
||||
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
|
||||
if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
|
||||
flags |= VMX_RUN_SAVE_SPEC_CTRL;
|
||||
|
||||
return flags;
|
||||
@ -1348,8 +1348,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
|
||||
|
||||
/*
|
||||
* No indirect branch prediction barrier needed when switching
|
||||
* the active VMCS within a guest, e.g. on nested VM-Enter.
|
||||
* The L1 VMM can protect itself with retpolines, IBPB or IBRS.
|
||||
* the active VMCS within a vCPU, unless IBRS is advertised to
|
||||
* the vCPU. To minimize the number of IBPBs executed, KVM
|
||||
* performs IBPB on nested VM-Exit (a single nested transition
|
||||
* may switch the active VMCS multiple times).
|
||||
*/
|
||||
if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
|
||||
indirect_branch_prediction_barrier();
|
||||
@ -1834,12 +1836,42 @@ bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
|
||||
return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
|
||||
}
|
||||
|
||||
static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
|
||||
uint64_t val)
|
||||
{
|
||||
uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
|
||||
/*
|
||||
* Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of
|
||||
* guest CPUID. Note, KVM allows userspace to set "VMX in SMX" to maintain
|
||||
* backwards compatibility even though KVM doesn't support emulating SMX. And
|
||||
* because userspace set "VMX in SMX", the guest must also be allowed to set it,
|
||||
* e.g. if the MSR is left unlocked and the guest does a RMW operation.
|
||||
*/
|
||||
#define KVM_SUPPORTED_FEATURE_CONTROL (FEAT_CTL_LOCKED | \
|
||||
FEAT_CTL_VMX_ENABLED_INSIDE_SMX | \
|
||||
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | \
|
||||
FEAT_CTL_SGX_LC_ENABLED | \
|
||||
FEAT_CTL_SGX_ENABLED | \
|
||||
FEAT_CTL_LMCE_ENABLED)
|
||||
|
||||
return !(val & ~valid_bits);
|
||||
static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
|
||||
struct msr_data *msr)
|
||||
{
|
||||
uint64_t valid_bits;
|
||||
|
||||
/*
|
||||
* Ensure KVM_SUPPORTED_FEATURE_CONTROL is updated when new bits are
|
||||
* exposed to the guest.
|
||||
*/
|
||||
WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits &
|
||||
~KVM_SUPPORTED_FEATURE_CONTROL);
|
||||
|
||||
if (!msr->host_initiated &&
|
||||
(vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED))
|
||||
return false;
|
||||
|
||||
if (msr->host_initiated)
|
||||
valid_bits = KVM_SUPPORTED_FEATURE_CONTROL;
|
||||
else
|
||||
valid_bits = vmx->msr_ia32_feature_control_valid_bits;
|
||||
|
||||
return !(msr->data & ~valid_bits);
|
||||
}
|
||||
|
||||
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
|
||||
@ -2238,10 +2270,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
vcpu->arch.mcg_ext_ctl = data;
|
||||
break;
|
||||
case MSR_IA32_FEAT_CTL:
|
||||
if (!vmx_feature_control_msr_valid(vcpu, data) ||
|
||||
(to_vmx(vcpu)->msr_ia32_feature_control &
|
||||
FEAT_CTL_LOCKED && !msr_info->host_initiated))
|
||||
if (!is_vmx_feature_control_msr_valid(vmx, msr_info))
|
||||
return 1;
|
||||
|
||||
vmx->msr_ia32_feature_control = data;
|
||||
if (msr_info->host_initiated && data == 0)
|
||||
vmx_leave_nested(vcpu);
|
||||
|
@ -11,14 +11,28 @@
|
||||
#include "../x86.h"
|
||||
|
||||
void vmread_error(unsigned long field, bool fault);
|
||||
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
|
||||
bool fault);
|
||||
void vmwrite_error(unsigned long field, unsigned long value);
|
||||
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
|
||||
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
|
||||
void invvpid_error(unsigned long ext, u16 vpid, gva_t gva);
|
||||
void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
|
||||
|
||||
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
|
||||
/*
|
||||
* The VMREAD error trampoline _always_ uses the stack to pass parameters, even
|
||||
* for 64-bit targets. Preserving all registers allows the VMREAD inline asm
|
||||
* blob to avoid clobbering GPRs, which in turn allows the compiler to better
|
||||
* optimize sequences of VMREADs.
|
||||
*
|
||||
* Declare the trampoline as an opaque label as it's not safe to call from C
|
||||
* code; there is no way to tell the compiler to pass params on the stack for
|
||||
* 64-bit targets.
|
||||
*
|
||||
* void vmread_error_trampoline(unsigned long field, bool fault);
|
||||
*/
|
||||
extern unsigned long vmread_error_trampoline;
|
||||
#endif
|
||||
|
||||
static __always_inline void vmcs_check16(unsigned long field)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
|
||||
|
@ -463,7 +463,6 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.apic_base;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
|
||||
|
||||
enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -491,7 +490,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
kvm_recalculate_apic_map(vcpu->kvm);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
|
||||
|
||||
/*
|
||||
* Handle a fault on a hardware virtualization (VMX or SVM) instruction.
|
||||
@ -782,7 +780,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
||||
kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
|
||||
fault->address);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
|
||||
|
||||
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct x86_exception *fault)
|
||||
@ -811,7 +808,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
atomic_inc(&vcpu->arch.nmi_queued);
|
||||
kvm_make_request(KVM_REQ_NMI, vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_inject_nmi);
|
||||
|
||||
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
|
||||
{
|
||||
@ -836,7 +832,6 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
|
||||
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_require_cpl);
|
||||
|
||||
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
|
||||
{
|
||||
@ -2069,7 +2064,6 @@ int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
|
||||
|
||||
int kvm_emulate_invd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -2317,13 +2311,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
|
||||
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
|
||||
|
||||
/* we verify if the enable bit is set... */
|
||||
if (system_time & 1) {
|
||||
kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
|
||||
KVM_HOST_USES_PFN, system_time & ~1ULL,
|
||||
if (system_time & 1)
|
||||
kvm_gpc_activate(&vcpu->arch.pv_time, system_time & ~1ULL,
|
||||
sizeof(struct pvclock_vcpu_time_info));
|
||||
} else {
|
||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
|
||||
}
|
||||
else
|
||||
kvm_gpc_deactivate(&vcpu->arch.pv_time);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -2515,7 +2507,6 @@ u64 kvm_scale_tsc(u64 tsc, u64 ratio)
|
||||
|
||||
return _tsc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_scale_tsc);
|
||||
|
||||
static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
||||
{
|
||||
@ -2974,6 +2965,22 @@ static void kvm_update_masterclock(struct kvm *kvm)
|
||||
kvm_end_pvclock_update(kvm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the kernel's tsc_khz directly if the TSC is constant, otherwise use KVM's
|
||||
* per-CPU value (which may be zero if a CPU is going offline). Note, tsc_khz
|
||||
* can change during boot even if the TSC is constant, as it's possible for KVM
|
||||
* to be loaded before TSC calibration completes. Ideally, KVM would get a
|
||||
* notification when calibration completes, but practically speaking calibration
|
||||
* will complete before userspace is alive enough to create VMs.
|
||||
*/
|
||||
static unsigned long get_cpu_tsc_khz(void)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_CONSTANT_TSC))
|
||||
return tsc_khz;
|
||||
else
|
||||
return __this_cpu_read(cpu_tsc_khz);
|
||||
}
|
||||
|
||||
/* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */
|
||||
static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
|
||||
{
|
||||
@ -2984,7 +2991,8 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
|
||||
get_cpu();
|
||||
|
||||
data->flags = 0;
|
||||
if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
|
||||
if (ka->use_master_clock &&
|
||||
(static_cpu_has(X86_FEATURE_CONSTANT_TSC) || __this_cpu_read(cpu_tsc_khz))) {
|
||||
#ifdef CONFIG_X86_64
|
||||
struct timespec64 ts;
|
||||
|
||||
@ -2998,7 +3006,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
|
||||
data->flags |= KVM_CLOCK_TSC_STABLE;
|
||||
hv_clock.tsc_timestamp = ka->master_cycle_now;
|
||||
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
|
||||
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
|
||||
kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
|
||||
&hv_clock.tsc_shift,
|
||||
&hv_clock.tsc_to_system_mul);
|
||||
data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
|
||||
@ -3037,12 +3045,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
|
||||
unsigned long flags;
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
|
||||
offset + sizeof(*guest_hv_clock))) {
|
||||
while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) {
|
||||
read_unlock_irqrestore(&gpc->lock, flags);
|
||||
|
||||
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
|
||||
offset + sizeof(*guest_hv_clock)))
|
||||
if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock)))
|
||||
return;
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
@ -3108,7 +3114,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
||||
/* Keep irq disabled to prevent changes to the clock */
|
||||
local_irq_save(flags);
|
||||
tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
|
||||
tgt_tsc_khz = get_cpu_tsc_khz();
|
||||
if (unlikely(tgt_tsc_khz == 0)) {
|
||||
local_irq_restore(flags);
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
|
||||
@ -3391,7 +3397,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
|
||||
|
||||
static void kvmclock_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
|
||||
kvm_gpc_deactivate(&vcpu->arch.pv_time);
|
||||
vcpu->arch.time = 0;
|
||||
}
|
||||
|
||||
@ -4431,7 +4437,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
|
||||
KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
|
||||
if (sched_info_on())
|
||||
r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
|
||||
r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
|
||||
KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_SYNC_REGS:
|
||||
@ -8771,7 +8778,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
write_fault_to_spt,
|
||||
emulation_type))
|
||||
return 1;
|
||||
if (ctxt->have_exception) {
|
||||
|
||||
if (ctxt->have_exception &&
|
||||
!(emulation_type & EMULTYPE_SKIP)) {
|
||||
/*
|
||||
* #UD should result in just EMULATION_FAILED, and trap-like
|
||||
* exception should not be encountered during decode.
|
||||
@ -9035,9 +9044,11 @@ static void tsc_khz_changed(void *data)
|
||||
struct cpufreq_freqs *freq = data;
|
||||
unsigned long khz = 0;
|
||||
|
||||
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
|
||||
|
||||
if (data)
|
||||
khz = freq->new;
|
||||
else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
|
||||
else
|
||||
khz = cpufreq_quick_get(raw_smp_processor_id());
|
||||
if (!khz)
|
||||
khz = tsc_khz;
|
||||
@ -9058,8 +9069,10 @@ static void kvm_hyperv_tsc_notifier(void)
|
||||
hyperv_stop_tsc_emulation();
|
||||
|
||||
/* TSC frequency always matches when on Hyper-V */
|
||||
for_each_present_cpu(cpu)
|
||||
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
|
||||
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
for_each_present_cpu(cpu)
|
||||
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
|
||||
}
|
||||
kvm_caps.max_guest_tsc_khz = tsc_khz;
|
||||
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
@ -9196,10 +9209,10 @@ static void kvm_timer_init(void)
|
||||
}
|
||||
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
}
|
||||
|
||||
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
|
||||
kvmclock_cpu_online, kvmclock_cpu_down_prep);
|
||||
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
|
||||
kvmclock_cpu_online, kvmclock_cpu_down_prep);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -9359,10 +9372,11 @@ void kvm_arch_exit(void)
|
||||
#endif
|
||||
kvm_lapic_exit();
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
|
||||
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
|
||||
irq_work_sync(&pvclock_irq_work);
|
||||
@ -10276,8 +10290,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
vcpu->mmio_needed = 0;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
|
||||
/* Page is swapped out. Do synthetic halt */
|
||||
@ -11538,7 +11552,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
kvm_gpc_init(&vcpu->arch.pv_time);
|
||||
kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm, vcpu, KVM_HOST_USES_PFN);
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
@ -12040,7 +12054,6 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
|
||||
|
||||
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -42,13 +42,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
|
||||
int idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
if (gfn == GPA_INVALID) {
|
||||
kvm_gpc_deactivate(kvm, gpc);
|
||||
kvm_gpc_deactivate(gpc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
|
||||
PAGE_SIZE);
|
||||
ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -170,7 +169,257 @@ static void kvm_xen_init_timer(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.xen.timer.function = xen_timer_callback;
|
||||
}
|
||||
|
||||
static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
||||
static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
|
||||
{
|
||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||
struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache;
|
||||
struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache;
|
||||
size_t user_len, user_len1, user_len2;
|
||||
struct vcpu_runstate_info rs;
|
||||
unsigned long flags;
|
||||
size_t times_ofs;
|
||||
uint8_t *update_bit = NULL;
|
||||
uint64_t entry_time;
|
||||
uint64_t *rs_times;
|
||||
int *rs_state;
|
||||
|
||||
/*
|
||||
* The only difference between 32-bit and 64-bit versions of the
|
||||
* runstate struct is the alignment of uint64_t in 32-bit, which
|
||||
* means that the 64-bit version has an additional 4 bytes of
|
||||
* padding after the first field 'state'. Let's be really really
|
||||
* paranoid about that, and matching it with our internal data
|
||||
* structures that we memcpy into it...
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
|
||||
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* The 64-bit structure has 4 bytes of padding before 'state_entry_time'
|
||||
* so each subsequent field is shifted by 4, and it's 4 bytes longer.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, time) + 4);
|
||||
BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4);
|
||||
#endif
|
||||
/*
|
||||
* The state field is in the same place at the start of both structs,
|
||||
* and is the same size (int) as vx->current_runstate.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, state));
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
|
||||
sizeof(vx->current_runstate));
|
||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
|
||||
sizeof(vx->current_runstate));
|
||||
|
||||
/*
|
||||
* The state_entry_time field is 64 bits in both versions, and the
|
||||
* XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86
|
||||
* is little-endian means that it's in the last *byte* of the word.
|
||||
* That detail is important later.
|
||||
*/
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
|
||||
sizeof(uint64_t));
|
||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||
sizeof(uint64_t));
|
||||
BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80);
|
||||
|
||||
/*
|
||||
* The time array is four 64-bit quantities in both versions, matching
|
||||
* the vx->runstate_times and immediately following state_entry_time.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t));
|
||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t));
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
||||
sizeof_field(struct compat_vcpu_runstate_info, time));
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
||||
sizeof(vx->runstate_times));
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
|
||||
user_len = sizeof(struct vcpu_runstate_info);
|
||||
times_ofs = offsetof(struct vcpu_runstate_info,
|
||||
state_entry_time);
|
||||
} else {
|
||||
user_len = sizeof(struct compat_vcpu_runstate_info);
|
||||
times_ofs = offsetof(struct compat_vcpu_runstate_info,
|
||||
state_entry_time);
|
||||
}
|
||||
|
||||
/*
|
||||
* There are basically no alignment constraints. The guest can set it
|
||||
* up so it crosses from one page to the next, and at arbitrary byte
|
||||
* alignment (and the 32-bit ABI doesn't align the 64-bit integers
|
||||
* anyway, even if the overall struct had been 64-bit aligned).
|
||||
*/
|
||||
if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) {
|
||||
user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK);
|
||||
user_len2 = user_len - user_len1;
|
||||
} else {
|
||||
user_len1 = user_len;
|
||||
user_len2 = 0;
|
||||
}
|
||||
BUG_ON(user_len1 + user_len2 != user_len);
|
||||
|
||||
retry:
|
||||
/*
|
||||
* Attempt to obtain the GPC lock on *both* (if there are two)
|
||||
* gfn_to_pfn caches that cover the region.
|
||||
*/
|
||||
read_lock_irqsave(&gpc1->lock, flags);
|
||||
while (!kvm_gpc_check(gpc1, user_len1)) {
|
||||
read_unlock_irqrestore(&gpc1->lock, flags);
|
||||
|
||||
/* When invoked from kvm_sched_out() we cannot sleep */
|
||||
if (atomic)
|
||||
return;
|
||||
|
||||
if (kvm_gpc_refresh(gpc1, user_len1))
|
||||
return;
|
||||
|
||||
read_lock_irqsave(&gpc1->lock, flags);
|
||||
}
|
||||
|
||||
if (likely(!user_len2)) {
|
||||
/*
|
||||
* Set up three pointers directly to the runstate_info
|
||||
* struct in the guest (via the GPC).
|
||||
*
|
||||
* • @rs_state → state field
|
||||
* • @rs_times → state_entry_time field.
|
||||
* • @update_bit → last byte of state_entry_time, which
|
||||
* contains the XEN_RUNSTATE_UPDATE bit.
|
||||
*/
|
||||
rs_state = gpc1->khva;
|
||||
rs_times = gpc1->khva + times_ofs;
|
||||
if (v->kvm->arch.xen.runstate_update_flag)
|
||||
update_bit = ((void *)(&rs_times[1])) - 1;
|
||||
} else {
|
||||
/*
|
||||
* The guest's runstate_info is split across two pages and we
|
||||
* need to hold and validate both GPCs simultaneously. We can
|
||||
* declare a lock ordering GPC1 > GPC2 because nothing else
|
||||
* takes them more than one at a time.
|
||||
*/
|
||||
read_lock(&gpc2->lock);
|
||||
|
||||
if (!kvm_gpc_check(gpc2, user_len2)) {
|
||||
read_unlock(&gpc2->lock);
|
||||
read_unlock_irqrestore(&gpc1->lock, flags);
|
||||
|
||||
/* When invoked from kvm_sched_out() we cannot sleep */
|
||||
if (atomic)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Use kvm_gpc_activate() here because if the runstate
|
||||
* area was configured in 32-bit mode and only extends
|
||||
* to the second page now because the guest changed to
|
||||
* 64-bit mode, the second GPC won't have been set up.
|
||||
*/
|
||||
if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1,
|
||||
user_len2))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We dropped the lock on GPC1 so we have to go all the
|
||||
* way back and revalidate that too.
|
||||
*/
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* In this case, the runstate_info struct will be assembled on
|
||||
* the kernel stack (compat or not as appropriate) and will
|
||||
* be copied to GPC1/GPC2 with a dual memcpy. Set up the three
|
||||
* rs pointers accordingly.
|
||||
*/
|
||||
rs_times = &rs.state_entry_time;
|
||||
|
||||
/*
|
||||
* The rs_state pointer points to the start of what we'll
|
||||
* copy to the guest, which in the case of a compat guest
|
||||
* is the 32-bit field that the compiler thinks is padding.
|
||||
*/
|
||||
rs_state = ((void *)rs_times) - times_ofs;
|
||||
|
||||
/*
|
||||
* The update_bit is still directly in the guest memory,
|
||||
* via one GPC or the other.
|
||||
*/
|
||||
if (v->kvm->arch.xen.runstate_update_flag) {
|
||||
if (user_len1 >= times_ofs + sizeof(uint64_t))
|
||||
update_bit = gpc1->khva + times_ofs +
|
||||
sizeof(uint64_t) - 1;
|
||||
else
|
||||
update_bit = gpc2->khva + times_ofs +
|
||||
sizeof(uint64_t) - 1 - user_len1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Don't leak kernel memory through the padding in the 64-bit
|
||||
* version of the struct.
|
||||
*/
|
||||
memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the
|
||||
* state_entry_time field, directly in the guest. We need to set
|
||||
* that (and write-barrier) before writing to the rest of the
|
||||
* structure, and clear it last. Just as Xen does, we address the
|
||||
* single *byte* in which it resides because it might be in a
|
||||
* different cache line to the rest of the 64-bit word, due to
|
||||
* the (lack of) alignment constraints.
|
||||
*/
|
||||
entry_time = vx->runstate_entry_time;
|
||||
if (update_bit) {
|
||||
entry_time |= XEN_RUNSTATE_UPDATE;
|
||||
*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Now assemble the actual structure, either on our kernel stack
|
||||
* or directly in the guest according to how the rs_state and
|
||||
* rs_times pointers were set up above.
|
||||
*/
|
||||
*rs_state = vx->current_runstate;
|
||||
rs_times[0] = entry_time;
|
||||
memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
|
||||
|
||||
/* For the split case, we have to then copy it to the guest. */
|
||||
if (user_len2) {
|
||||
memcpy(gpc1->khva, rs_state, user_len1);
|
||||
memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2);
|
||||
}
|
||||
smp_wmb();
|
||||
|
||||
/* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
|
||||
if (update_bit) {
|
||||
entry_time &= ~XEN_RUNSTATE_UPDATE;
|
||||
*update_bit = entry_time >> 56;
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
if (user_len2)
|
||||
read_unlock(&gpc2->lock);
|
||||
|
||||
read_unlock_irqrestore(&gpc1->lock, flags);
|
||||
|
||||
mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT);
|
||||
if (user_len2)
|
||||
mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
||||
{
|
||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||
u64 now = get_kvmclock_ns(v->kvm);
|
||||
@ -196,122 +445,9 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
||||
vx->runstate_times[vx->current_runstate] += delta_ns;
|
||||
vx->current_runstate = state;
|
||||
vx->runstate_entry_time = now;
|
||||
}
|
||||
|
||||
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
|
||||
{
|
||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||
struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
|
||||
uint64_t *user_times;
|
||||
unsigned long flags;
|
||||
size_t user_len;
|
||||
int *user_state;
|
||||
|
||||
kvm_xen_update_runstate(v, state);
|
||||
|
||||
if (!vx->runstate_cache.active)
|
||||
return;
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
|
||||
user_len = sizeof(struct vcpu_runstate_info);
|
||||
else
|
||||
user_len = sizeof(struct compat_vcpu_runstate_info);
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
|
||||
user_len)) {
|
||||
read_unlock_irqrestore(&gpc->lock, flags);
|
||||
|
||||
/* When invoked from kvm_sched_out() we cannot sleep */
|
||||
if (state == RUNSTATE_runnable)
|
||||
return;
|
||||
|
||||
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len))
|
||||
return;
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* The only difference between 32-bit and 64-bit versions of the
|
||||
* runstate struct us the alignment of uint64_t in 32-bit, which
|
||||
* means that the 64-bit version has an additional 4 bytes of
|
||||
* padding after the first field 'state'.
|
||||
*
|
||||
* So we use 'int __user *user_state' to point to the state field,
|
||||
* and 'uint64_t __user *user_times' for runstate_entry_time. So
|
||||
* the actual array of time[] in each state starts at user_times[1].
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
|
||||
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, time) + 4);
|
||||
#endif
|
||||
|
||||
user_state = gpc->khva;
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
|
||||
user_times = gpc->khva + offsetof(struct vcpu_runstate_info,
|
||||
state_entry_time);
|
||||
else
|
||||
user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info,
|
||||
state_entry_time);
|
||||
|
||||
/*
|
||||
* First write the updated state_entry_time at the appropriate
|
||||
* location determined by 'offset'.
|
||||
*/
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
|
||||
sizeof(user_times[0]));
|
||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||
sizeof(user_times[0]));
|
||||
|
||||
user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* Next, write the new runstate. This is in the *same* place
|
||||
* for 32-bit and 64-bit guests, asserted here for paranoia.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, state));
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
|
||||
sizeof(vx->current_runstate));
|
||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
|
||||
sizeof(vx->current_runstate));
|
||||
|
||||
*user_state = vx->current_runstate;
|
||||
|
||||
/*
|
||||
* Write the actual runstate times immediately after the
|
||||
* runstate_entry_time.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
|
||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
||||
sizeof_field(struct compat_vcpu_runstate_info, time));
|
||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
||||
sizeof(vx->runstate_times));
|
||||
|
||||
memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
|
||||
* runstate_entry_time field.
|
||||
*/
|
||||
user_times[0] &= ~XEN_RUNSTATE_UPDATE;
|
||||
smp_wmb();
|
||||
|
||||
read_unlock_irqrestore(&gpc->lock, flags);
|
||||
|
||||
mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
|
||||
if (vx->runstate_cache.active)
|
||||
kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable);
|
||||
}
|
||||
|
||||
static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
|
||||
@ -352,12 +488,10 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
|
||||
* little more honest about it.
|
||||
*/
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
|
||||
sizeof(struct vcpu_info))) {
|
||||
while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
|
||||
read_unlock_irqrestore(&gpc->lock, flags);
|
||||
|
||||
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
|
||||
sizeof(struct vcpu_info)))
|
||||
if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info)))
|
||||
return;
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
@ -417,8 +551,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
|
||||
sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
|
||||
sizeof(struct vcpu_info))) {
|
||||
while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
|
||||
read_unlock_irqrestore(&gpc->lock, flags);
|
||||
|
||||
/*
|
||||
@ -432,8 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
|
||||
if (in_atomic() || !task_is_running(current))
|
||||
return 1;
|
||||
|
||||
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
|
||||
sizeof(struct vcpu_info))) {
|
||||
if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) {
|
||||
/*
|
||||
* If this failed, userspace has screwed up the
|
||||
* vcpu_info mapping. No interrupts for you.
|
||||
@ -493,6 +625,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
|
||||
mutex_unlock(&kvm->lock);
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -530,6 +673,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -554,15 +706,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
offsetof(struct compat_vcpu_info, time));
|
||||
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
r = kvm_gpc_activate(vcpu->kvm,
|
||||
&vcpu->arch.xen.vcpu_info_cache, NULL,
|
||||
KVM_HOST_USES_PFN, data->u.gpa,
|
||||
sizeof(struct vcpu_info));
|
||||
r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
|
||||
data->u.gpa, sizeof(struct vcpu_info));
|
||||
if (!r)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
|
||||
@ -570,37 +720,65 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
kvm_gpc_deactivate(vcpu->kvm,
|
||||
&vcpu->arch.xen.vcpu_time_info_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
r = kvm_gpc_activate(vcpu->kvm,
|
||||
&vcpu->arch.xen.vcpu_time_info_cache,
|
||||
NULL, KVM_HOST_USES_PFN, data->u.gpa,
|
||||
r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache,
|
||||
data->u.gpa,
|
||||
sizeof(struct pvclock_vcpu_time_info));
|
||||
if (!r)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: {
|
||||
size_t sz, sz1, sz2;
|
||||
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
kvm_gpc_deactivate(vcpu->kvm,
|
||||
&vcpu->arch.xen.runstate_cache);
|
||||
r = 0;
|
||||
deactivate_out:
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
|
||||
break;
|
||||
}
|
||||
|
||||
r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
|
||||
NULL, KVM_HOST_USES_PFN, data->u.gpa,
|
||||
sizeof(struct vcpu_runstate_info));
|
||||
break;
|
||||
/*
|
||||
* If the guest switches to 64-bit mode after setting the runstate
|
||||
* address, that's actually OK. kvm_xen_update_runstate_guest()
|
||||
* will cope.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode)
|
||||
sz = sizeof(struct vcpu_runstate_info);
|
||||
else
|
||||
sz = sizeof(struct compat_vcpu_runstate_info);
|
||||
|
||||
/* How much fits in the (first) page? */
|
||||
sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK);
|
||||
r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache,
|
||||
data->u.gpa, sz1);
|
||||
if (r)
|
||||
goto deactivate_out;
|
||||
|
||||
/* Either map the second page, or deactivate the second GPC */
|
||||
if (sz1 >= sz) {
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
|
||||
} else {
|
||||
sz2 = sz - sz1;
|
||||
BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK);
|
||||
r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache,
|
||||
data->u.gpa + sz1, sz2);
|
||||
if (r)
|
||||
goto deactivate_out;
|
||||
}
|
||||
|
||||
kvm_xen_update_runstate_guest(vcpu, false);
|
||||
break;
|
||||
}
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
@ -693,6 +871,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
|
||||
if (data->u.runstate.state <= RUNSTATE_offline)
|
||||
kvm_xen_update_runstate(vcpu, data->u.runstate.state);
|
||||
else if (vcpu->arch.xen.runstate_cache.active)
|
||||
kvm_xen_update_runstate_guest(vcpu, false);
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
@ -972,9 +1152,9 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
|
||||
bool ret = true;
|
||||
int idx, i;
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
if (!kvm_gpc_check(gpc, PAGE_SIZE))
|
||||
goto out_rcu;
|
||||
|
||||
ret = false;
|
||||
@ -994,8 +1174,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
|
||||
}
|
||||
|
||||
out_rcu:
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
read_unlock_irqrestore(&gpc->lock, flags);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1008,20 +1188,45 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
||||
evtchn_port_t port, *ports;
|
||||
gpa_t gpa;
|
||||
|
||||
if (!longmode || !lapic_in_kernel(vcpu) ||
|
||||
if (!lapic_in_kernel(vcpu) ||
|
||||
!(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
|
||||
return false;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
|
||||
sizeof(sched_poll))) {
|
||||
if (!gpa) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
|
||||
struct compat_sched_poll sp32;
|
||||
|
||||
/* Sanity check that the compat struct definition is correct */
|
||||
BUILD_BUG_ON(sizeof(sp32) != 16);
|
||||
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a 32-bit pointer to an array of evtchn_port_t which
|
||||
* are uint32_t, so once it's converted no further compat
|
||||
* handling is needed.
|
||||
*/
|
||||
sched_poll.ports = (void *)(unsigned long)(sp32.ports);
|
||||
sched_poll.nr_ports = sp32.nr_ports;
|
||||
sched_poll.timeout = sp32.timeout;
|
||||
} else {
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
|
||||
sizeof(sched_poll))) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(sched_poll.nr_ports > 1)) {
|
||||
/* Xen (unofficially) limits number of pollers to 128 */
|
||||
if (sched_poll.nr_ports > 128) {
|
||||
@ -1371,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
|
||||
if (!kvm_gpc_check(gpc, PAGE_SIZE))
|
||||
goto out_rcu;
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
|
||||
@ -1405,7 +1610,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
|
||||
gpc = &vcpu->arch.xen.vcpu_info_cache;
|
||||
|
||||
read_lock_irqsave(&gpc->lock, flags);
|
||||
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
|
||||
if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
|
||||
/*
|
||||
* Could not access the vcpu_info. Set the bit in-kernel
|
||||
* and prod the vCPU to deliver it for itself.
|
||||
@ -1503,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
|
||||
break;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE);
|
||||
rc = kvm_gpc_refresh(gpc, PAGE_SIZE);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
} while(!rc);
|
||||
|
||||
@ -1833,9 +2038,14 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
|
||||
|
||||
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
|
||||
|
||||
kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
|
||||
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
|
||||
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
|
||||
kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL,
|
||||
KVM_HOST_USES_PFN);
|
||||
kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL,
|
||||
KVM_HOST_USES_PFN);
|
||||
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL,
|
||||
KVM_HOST_USES_PFN);
|
||||
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL,
|
||||
KVM_HOST_USES_PFN);
|
||||
}
|
||||
|
||||
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
|
||||
@ -1843,9 +2053,10 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
|
||||
if (kvm_xen_timer_enabled(vcpu))
|
||||
kvm_xen_stop_timer(vcpu);
|
||||
|
||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
|
||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
|
||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
|
||||
|
||||
del_timer_sync(&vcpu->arch.xen.poll_timer);
|
||||
}
|
||||
@ -1853,7 +2064,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
|
||||
void kvm_xen_init_vm(struct kvm *kvm)
|
||||
{
|
||||
idr_init(&kvm->arch.xen.evtchn_ports);
|
||||
kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
|
||||
kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
|
||||
}
|
||||
|
||||
void kvm_xen_destroy_vm(struct kvm *kvm)
|
||||
@ -1861,7 +2072,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
|
||||
struct evtchnfd *evtchnfd;
|
||||
int i;
|
||||
|
||||
kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
|
||||
kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache);
|
||||
|
||||
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
|
||||
if (!evtchnfd->deliver.port.port)
|
||||
|
@ -143,11 +143,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
|
||||
#include <asm/xen/interface.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
|
||||
void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
|
||||
void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state);
|
||||
|
||||
static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
|
||||
kvm_xen_update_runstate(vcpu, RUNSTATE_running);
|
||||
}
|
||||
|
||||
static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
||||
@ -162,7 +162,7 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
||||
if (WARN_ON_ONCE(!vcpu->preempted))
|
||||
return;
|
||||
|
||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
|
||||
kvm_xen_update_runstate(vcpu, RUNSTATE_runnable);
|
||||
}
|
||||
|
||||
/* 32-bit compatibility definitions, also used natively in 32-bit build */
|
||||
@ -207,4 +207,11 @@ struct compat_vcpu_runstate_info {
|
||||
uint64_t time[4];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct compat_sched_poll {
|
||||
/* This is actually a guest virtual address which points to ports. */
|
||||
uint32_t ports;
|
||||
unsigned int nr_ports;
|
||||
uint64_t timeout;
|
||||
};
|
||||
|
||||
#endif /* __ARCH_X86_KVM_XEN_H__ */
|
||||
|
@ -50,8 +50,8 @@
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
|
||||
* in kvm, other bits are visible for userspace which are defined in
|
||||
* The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally
|
||||
* used in kvm, other bits are visible for userspace which are defined in
|
||||
* include/linux/kvm_h.
|
||||
*/
|
||||
#define KVM_MEMSLOT_INVALID (1UL << 16)
|
||||
@ -1262,18 +1262,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
* kvm_gpc_init - initialize gfn_to_pfn_cache.
|
||||
*
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
*
|
||||
* This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must
|
||||
* be zero-allocated (or zeroed by the caller before init).
|
||||
*/
|
||||
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
|
||||
|
||||
/**
|
||||
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
|
||||
* physical address.
|
||||
*
|
||||
* @kvm: pointer to kvm instance.
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
* @vcpu: vCPU to be used for marking pages dirty and to be woken on
|
||||
* invalidation.
|
||||
* @usage: indicates if the resulting host physical PFN is used while
|
||||
@ -1282,28 +1271,36 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
|
||||
* changes!---will also force @vcpu to exit the guest and
|
||||
* refresh the cache); and/or if the PFN used directly
|
||||
* by KVM (and thus needs a kernel virtual mapping).
|
||||
*
|
||||
* This sets up a gfn_to_pfn_cache by initializing locks and assigning the
|
||||
* immutable attributes. Note, the cache must be zero-allocated (or zeroed by
|
||||
* the caller before init).
|
||||
*/
|
||||
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
|
||||
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);
|
||||
|
||||
/**
|
||||
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
|
||||
* physical address.
|
||||
*
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
* @gpa: guest physical address to map.
|
||||
* @len: sanity check; the range being access must fit a single page.
|
||||
*
|
||||
* @return: 0 for success.
|
||||
* -EINVAL for a mapping which would cross a page boundary.
|
||||
* -EFAULT for an untranslatable guest physical address.
|
||||
* -EFAULT for an untranslatable guest physical address.
|
||||
*
|
||||
* This primes a gfn_to_pfn_cache and links it into the @kvm's list for
|
||||
* invalidations to be processed. Callers are required to use
|
||||
* kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
|
||||
* accessing the target page.
|
||||
* This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for
|
||||
* invalidations to be processed. Callers are required to use kvm_gpc_check()
|
||||
* to ensure that the cache is valid before accessing the target page.
|
||||
*/
|
||||
int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
|
||||
gpa_t gpa, unsigned long len);
|
||||
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
|
||||
|
||||
/**
|
||||
* kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
|
||||
* kvm_gpc_check - check validity of a gfn_to_pfn_cache.
|
||||
*
|
||||
* @kvm: pointer to kvm instance.
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
* @gpa: current guest physical address to map.
|
||||
* @len: sanity check; the range being access must fit a single page.
|
||||
*
|
||||
* @return: %true if the cache is still valid and the address matches.
|
||||
@ -1316,52 +1313,35 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
* Callers in IN_GUEST_MODE may do so without locking, although they should
|
||||
* still hold a read lock on kvm->scru for the memslot checks.
|
||||
*/
|
||||
bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
gpa_t gpa, unsigned long len);
|
||||
bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len);
|
||||
|
||||
/**
|
||||
* kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache.
|
||||
* kvm_gpc_refresh - update a previously initialized cache.
|
||||
*
|
||||
* @kvm: pointer to kvm instance.
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
* @gpa: updated guest physical address to map.
|
||||
* @len: sanity check; the range being access must fit a single page.
|
||||
*
|
||||
* @return: 0 for success.
|
||||
* -EINVAL for a mapping which would cross a page boundary.
|
||||
* -EFAULT for an untranslatable guest physical address.
|
||||
* -EFAULT for an untranslatable guest physical address.
|
||||
*
|
||||
* This will attempt to refresh a gfn_to_pfn_cache. Note that a successful
|
||||
* returm from this function does not mean the page can be immediately
|
||||
* return from this function does not mean the page can be immediately
|
||||
* accessed because it may have raced with an invalidation. Callers must
|
||||
* still lock and check the cache status, as this function does not return
|
||||
* with the lock still held to permit access.
|
||||
*/
|
||||
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
gpa_t gpa, unsigned long len);
|
||||
|
||||
/**
|
||||
* kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
|
||||
*
|
||||
* @kvm: pointer to kvm instance.
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
*
|
||||
* This unmaps the referenced page. The cache is left in the invalid state
|
||||
* but at least the mapping from GPA to userspace HVA will remain cached
|
||||
* and can be reused on a subsequent refresh.
|
||||
*/
|
||||
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
|
||||
int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len);
|
||||
|
||||
/**
|
||||
* kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
|
||||
*
|
||||
* @kvm: pointer to kvm instance.
|
||||
* @gpc: struct gfn_to_pfn_cache object.
|
||||
*
|
||||
* This removes a cache from the @kvm's list to be processed on MMU notifier
|
||||
* This removes a cache from the VM's list to be processed on MMU notifier
|
||||
* invocation.
|
||||
*/
|
||||
void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
|
||||
void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
|
||||
|
||||
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
|
||||
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
|
||||
|
@ -67,6 +67,7 @@ struct gfn_to_pfn_cache {
|
||||
gpa_t gpa;
|
||||
unsigned long uhva;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm *kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct list_head list;
|
||||
rwlock_t lock;
|
||||
|
@ -86,14 +86,6 @@ struct kvm_debug_guest {
|
||||
/* *** End of deprecated interfaces *** */
|
||||
|
||||
|
||||
/* for KVM_CREATE_MEMORY_REGION */
|
||||
struct kvm_memory_region {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size; /* bytes */
|
||||
};
|
||||
|
||||
/* for KVM_SET_USER_MEMORY_REGION */
|
||||
struct kvm_userspace_memory_region {
|
||||
__u32 slot;
|
||||
@ -104,9 +96,9 @@ struct kvm_userspace_memory_region {
|
||||
};
|
||||
|
||||
/*
|
||||
* The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
|
||||
* other bits are reserved for kvm internal use which are defined in
|
||||
* include/linux/kvm_host.h.
|
||||
* The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
|
||||
* userspace, other bits are reserved for kvm internal use which are defined
|
||||
* in include/linux/kvm_host.h.
|
||||
*/
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||
#define KVM_MEM_READONLY (1UL << 1)
|
||||
@ -1272,6 +1264,7 @@ struct kvm_x86_mce {
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
|
||||
|
||||
struct kvm_xen_hvm_config {
|
||||
__u32 flags;
|
||||
@ -1442,18 +1435,12 @@ struct kvm_vfio_spapr_tce {
|
||||
__s32 tablefd;
|
||||
};
|
||||
|
||||
/*
|
||||
* ioctls for VM fds
|
||||
*/
|
||||
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
|
||||
/*
|
||||
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
|
||||
* a vcpu fd.
|
||||
*/
|
||||
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
|
||||
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
|
||||
/* KVM_SET_MEMORY_ALIAS is obsolete: */
|
||||
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
|
||||
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
|
||||
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
|
||||
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
|
||||
@ -1777,6 +1764,7 @@ struct kvm_xen_hvm_attr {
|
||||
union {
|
||||
__u8 long_mode;
|
||||
__u8 vector;
|
||||
__u8 runstate_update_flag;
|
||||
struct {
|
||||
__u64 gfn;
|
||||
} shared_info;
|
||||
@ -1817,6 +1805,8 @@ struct kvm_xen_hvm_attr {
|
||||
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
|
||||
#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
|
||||
#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
|
||||
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
|
||||
#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
|
||||
|
||||
/* Per-vCPU Xen attributes */
|
||||
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
|
||||
|
@ -71,10 +71,14 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
|
||||
return cmpxchg(&v->counter, old, new);
|
||||
}
|
||||
|
||||
static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
|
||||
static inline int test_and_set_bit(long nr, unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
|
||||
static inline int test_and_clear_bit(long nr, unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
|
||||
#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
|
||||
|
@ -53,14 +53,6 @@
|
||||
/* Architectural interrupt line count. */
|
||||
#define KVM_NR_INTERRUPTS 256
|
||||
|
||||
struct kvm_memory_alias {
|
||||
__u32 slot; /* this has a different namespace than memory slots */
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size;
|
||||
__u64 target_phys_addr;
|
||||
};
|
||||
|
||||
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
|
||||
struct kvm_pic_state {
|
||||
__u8 last_irr; /* edge detection */
|
||||
|
@ -70,7 +70,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
|
||||
return cmpxchg(&(v)->counter, oldval, newval);
|
||||
}
|
||||
|
||||
static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
|
||||
static inline int test_and_set_bit(long nr, unsigned long *addr)
|
||||
{
|
||||
unsigned long mask = BIT_MASK(nr);
|
||||
long old;
|
||||
@ -81,4 +81,15 @@ static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
|
||||
return !!(old & mask);
|
||||
}
|
||||
|
||||
static inline int test_and_clear_bit(long nr, unsigned long *addr)
|
||||
{
|
||||
unsigned long mask = BIT_MASK(nr);
|
||||
long old;
|
||||
|
||||
addr += BIT_WORD(nr);
|
||||
|
||||
old = __sync_fetch_and_and(addr, ~mask);
|
||||
return !!(old & mask);
|
||||
}
|
||||
|
||||
#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
|
||||
|
@ -5,14 +5,11 @@
|
||||
#include <asm/types.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
|
||||
static inline void set_bit(int nr, unsigned long *addr)
|
||||
{
|
||||
addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
|
||||
}
|
||||
|
||||
static inline void clear_bit(int nr, unsigned long *addr)
|
||||
{
|
||||
addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
|
||||
}
|
||||
/*
|
||||
* Just alias the test versions, all of the compiler built-in atomics "fetch",
|
||||
* and optimizing compile-time constants on x86 isn't worth the complexity.
|
||||
*/
|
||||
#define set_bit test_and_set_bit
|
||||
#define clear_bit test_and_clear_bit
|
||||
|
||||
#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
|
||||
|
@ -77,40 +77,6 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
|
||||
__bitmap_or(dst, src1, src2, nbits);
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_set_bit - Set a bit and return its old value
|
||||
* @nr: Bit to set
|
||||
* @addr: Address to count from
|
||||
*/
|
||||
static inline int test_and_set_bit(int nr, unsigned long *addr)
|
||||
{
|
||||
unsigned long mask = BIT_MASK(nr);
|
||||
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
|
||||
unsigned long old;
|
||||
|
||||
old = *p;
|
||||
*p = old | mask;
|
||||
|
||||
return (old & mask) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_clear_bit - Clear a bit and return its old value
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to count from
|
||||
*/
|
||||
static inline int test_and_clear_bit(int nr, unsigned long *addr)
|
||||
{
|
||||
unsigned long mask = BIT_MASK(nr);
|
||||
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
|
||||
unsigned long old;
|
||||
|
||||
old = *p;
|
||||
*p = old & ~mask;
|
||||
|
||||
return (old & mask) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bitmap_zalloc - Allocate bitmap
|
||||
* @nbits: Number of bits
|
||||
|
@ -86,14 +86,6 @@ struct kvm_debug_guest {
|
||||
/* *** End of deprecated interfaces *** */
|
||||
|
||||
|
||||
/* for KVM_CREATE_MEMORY_REGION */
|
||||
struct kvm_memory_region {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size; /* bytes */
|
||||
};
|
||||
|
||||
/* for KVM_SET_USER_MEMORY_REGION */
|
||||
struct kvm_userspace_memory_region {
|
||||
__u32 slot;
|
||||
@ -104,9 +96,9 @@ struct kvm_userspace_memory_region {
|
||||
};
|
||||
|
||||
/*
|
||||
* The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
|
||||
* other bits are reserved for kvm internal use which are defined in
|
||||
* include/linux/kvm_host.h.
|
||||
* The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
|
||||
* userspace, other bits are reserved for kvm internal use which are defined
|
||||
*in include/linux/kvm_host.h.
|
||||
*/
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||
#define KVM_MEM_READONLY (1UL << 1)
|
||||
@ -1437,18 +1429,12 @@ struct kvm_vfio_spapr_tce {
|
||||
__s32 tablefd;
|
||||
};
|
||||
|
||||
/*
|
||||
* ioctls for VM fds
|
||||
*/
|
||||
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
|
||||
/*
|
||||
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
|
||||
* a vcpu fd.
|
||||
*/
|
||||
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
|
||||
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
|
||||
/* KVM_SET_MEMORY_ALIAS is obsolete: */
|
||||
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
|
||||
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
|
||||
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
|
||||
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
|
||||
|
@ -70,7 +70,7 @@ static int do_for_each_set_bit(unsigned int num_bits)
|
||||
bitmap_zero(to_test, num_bits);
|
||||
skip = num_bits / set_bits;
|
||||
for (i = 0; i < num_bits; i += skip)
|
||||
set_bit(i, to_test);
|
||||
__set_bit(i, to_test);
|
||||
|
||||
for (i = 0; i < outer_iterations; i++) {
|
||||
old = accumulator;
|
||||
|
@ -230,7 +230,7 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
|
||||
"WARNING: no sample cpu value"))
|
||||
return;
|
||||
|
||||
set_bit(sample->cpu, c2c_he->cpuset);
|
||||
__set_bit(sample->cpu, c2c_he->cpuset);
|
||||
}
|
||||
|
||||
static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
|
||||
@ -247,7 +247,7 @@ static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
|
||||
if (WARN_ONCE(node < 0, "WARNING: failed to find node\n"))
|
||||
return;
|
||||
|
||||
set_bit(node, c2c_he->nodeset);
|
||||
__set_bit(node, c2c_he->nodeset);
|
||||
|
||||
if (c2c_he->paddr != sample->phys_addr) {
|
||||
c2c_he->paddr_cnt++;
|
||||
@ -2318,7 +2318,7 @@ static int setup_nodes(struct perf_session *session)
|
||||
continue;
|
||||
|
||||
perf_cpu_map__for_each_cpu(cpu, idx, map) {
|
||||
set_bit(cpu.cpu, set);
|
||||
__set_bit(cpu.cpu, set);
|
||||
|
||||
if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
|
||||
return -EINVAL;
|
||||
|
@ -216,7 +216,7 @@ static struct kwork_atom *atom_new(struct perf_kwork *kwork,
|
||||
list_add_tail(&page->list, &kwork->atom_page_list);
|
||||
|
||||
found_atom:
|
||||
set_bit(i, page->bitmap);
|
||||
__set_bit(i, page->bitmap);
|
||||
atom->time = sample->time;
|
||||
atom->prev = NULL;
|
||||
atom->page_addr = page;
|
||||
@ -229,8 +229,8 @@ static void atom_free(struct kwork_atom *atom)
|
||||
if (atom->prev != NULL)
|
||||
atom_free(atom->prev);
|
||||
|
||||
clear_bit(atom->bit_inpage,
|
||||
((struct kwork_atom_page *)atom->page_addr)->bitmap);
|
||||
__clear_bit(atom->bit_inpage,
|
||||
((struct kwork_atom_page *)atom->page_addr)->bitmap);
|
||||
}
|
||||
|
||||
static void atom_del(struct kwork_atom *atom)
|
||||
|
@ -3555,7 +3555,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
|
||||
/* Return ENODEV is input cpu is greater than max cpu */
|
||||
if ((unsigned long)cpu.cpu > mask->nbits)
|
||||
return -ENODEV;
|
||||
set_bit(cpu.cpu, mask->bits);
|
||||
__set_bit(cpu.cpu, mask->bits);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -3627,8 +3627,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map
|
||||
pr_debug("nr_threads: %d\n", rec->nr_threads);
|
||||
|
||||
for (t = 0; t < rec->nr_threads; t++) {
|
||||
set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
|
||||
set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
|
||||
__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
|
||||
__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
|
||||
if (verbose) {
|
||||
pr_debug("thread_masks[%d]: ", t);
|
||||
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
|
||||
|
@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
|
||||
|
||||
if (sched->map.comp) {
|
||||
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
|
||||
if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
|
||||
if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
|
||||
sched->map.comp_cpus[cpus_nr++] = this_cpu;
|
||||
new_cpu = true;
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
|
||||
|
||||
if (map && bm) {
|
||||
for (i = 0; i < perf_cpu_map__nr(map); i++)
|
||||
set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
|
||||
__set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
|
||||
}
|
||||
|
||||
if (map)
|
||||
|
@ -33,7 +33,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
|
||||
int i;
|
||||
|
||||
perf_cpu_map__for_each_cpu(cpu, i, map)
|
||||
set_bit(cpu.cpu, bm);
|
||||
__set_bit(cpu.cpu, bm);
|
||||
}
|
||||
|
||||
if (map)
|
||||
|
@ -58,14 +58,14 @@ void affinity__set(struct affinity *a, int cpu)
|
||||
return;
|
||||
|
||||
a->changed = true;
|
||||
set_bit(cpu, a->sched_cpus);
|
||||
__set_bit(cpu, a->sched_cpus);
|
||||
/*
|
||||
* We ignore errors because affinity is just an optimization.
|
||||
* This could happen for example with isolated CPUs or cpusets.
|
||||
* In this case the IPIs inside the kernel's perf API still work.
|
||||
*/
|
||||
sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
|
||||
clear_bit(cpu, a->sched_cpus);
|
||||
__clear_bit(cpu, a->sched_cpus);
|
||||
}
|
||||
|
||||
static void __affinity__cleanup(struct affinity *a)
|
||||
|
@ -79,12 +79,12 @@ struct perf_file_attr {
|
||||
|
||||
void perf_header__set_feat(struct perf_header *header, int feat)
|
||||
{
|
||||
set_bit(feat, header->adds_features);
|
||||
__set_bit(feat, header->adds_features);
|
||||
}
|
||||
|
||||
void perf_header__clear_feat(struct perf_header *header, int feat)
|
||||
{
|
||||
clear_bit(feat, header->adds_features);
|
||||
__clear_bit(feat, header->adds_features);
|
||||
}
|
||||
|
||||
bool perf_header__has_feat(const struct perf_header *header, int feat)
|
||||
@ -1358,7 +1358,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
|
||||
rewinddir(dir);
|
||||
|
||||
for_each_memory(phys, dir) {
|
||||
set_bit(phys, n->set);
|
||||
__set_bit(phys, n->set);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
@ -3952,7 +3952,7 @@ int perf_file_header__read(struct perf_file_header *header,
|
||||
|
||||
if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
|
||||
bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
|
||||
set_bit(HEADER_BUILD_ID, header->adds_features);
|
||||
__set_bit(HEADER_BUILD_ID, header->adds_features);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, i
|
||||
pr_err("Failed to allocate node mask for mbind: error %m\n");
|
||||
return -1;
|
||||
}
|
||||
set_bit(node_index, node_mask);
|
||||
__set_bit(node_index, node_mask);
|
||||
if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
|
||||
pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
|
||||
data, data + mmap_len, node_index);
|
||||
@ -256,7 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
|
||||
for (idx = 0; idx < nr_cpus; idx++) {
|
||||
cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
|
||||
if (cpu__get_node(cpu) == node)
|
||||
set_bit(cpu.cpu, mask->bits);
|
||||
__set_bit(cpu.cpu, mask->bits);
|
||||
}
|
||||
}
|
||||
|
||||
@ -270,7 +270,7 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *
|
||||
if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
|
||||
build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
|
||||
else if (mp->affinity == PERF_AFFINITY_CPU)
|
||||
set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
|
||||
__set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1513,7 +1513,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
|
||||
|
||||
memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
|
||||
for (b = from; b <= to; b++)
|
||||
set_bit(b, bits);
|
||||
__set_bit(b, bits);
|
||||
}
|
||||
|
||||
void perf_pmu__del_formats(struct list_head *formats)
|
||||
|
@ -365,7 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
|
||||
|
||||
sprintf(handler, "%s::%s", event->system, event->name);
|
||||
|
||||
if (!test_and_set_bit(event->id, events_defined))
|
||||
if (!__test_and_set_bit(event->id, events_defined))
|
||||
define_event_symbols(event, handler, event->print_fmt.args);
|
||||
|
||||
s = nsecs / NSEC_PER_SEC;
|
||||
|
@ -933,7 +933,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
|
||||
|
||||
sprintf(handler_name, "%s__%s", event->system, event->name);
|
||||
|
||||
if (!test_and_set_bit(event->id, events_defined))
|
||||
if (!__test_and_set_bit(event->id, events_defined))
|
||||
define_event_symbols(event, handler_name, event->print_fmt.args);
|
||||
|
||||
handler = get_handler(handler_name);
|
||||
|
@ -2748,7 +2748,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
|
||||
goto out_delete_map;
|
||||
}
|
||||
|
||||
set_bit(cpu.cpu, cpu_bitmap);
|
||||
__set_bit(cpu.cpu, cpu_bitmap);
|
||||
}
|
||||
|
||||
err = 0;
|
||||
|
@ -741,7 +741,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
|
||||
break;
|
||||
}
|
||||
|
||||
set_bit(c.cpu, cpumask_bits(b));
|
||||
__set_bit(c.cpu, cpumask_bits(b));
|
||||
}
|
||||
|
||||
perf_cpu_map__put(m);
|
||||
|
@ -222,7 +222,7 @@ static void *test_vcpu_run(void *arg)
|
||||
|
||||
/* Currently, any exit from guest is an indication of completion */
|
||||
pthread_mutex_lock(&vcpu_done_map_lock);
|
||||
set_bit(vcpu_idx, vcpu_done_map);
|
||||
__set_bit(vcpu_idx, vcpu_done_map);
|
||||
pthread_mutex_unlock(&vcpu_done_map_lock);
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
|
@ -375,10 +375,6 @@ static void guest_svc_handler(struct ex_regs *regs)
|
||||
svc_addr = regs->pc;
|
||||
}
|
||||
|
||||
enum single_step_op {
|
||||
SINGLE_STEP_ENABLE = 0,
|
||||
};
|
||||
|
||||
static void guest_code_ss(int test_cnt)
|
||||
{
|
||||
uint64_t i;
|
||||
@ -389,8 +385,16 @@ static void guest_code_ss(int test_cnt)
|
||||
w_bvr = i << 2;
|
||||
w_wvr = i << 2;
|
||||
|
||||
/* Enable Single Step execution */
|
||||
GUEST_SYNC(SINGLE_STEP_ENABLE);
|
||||
/*
|
||||
* Enable Single Step execution. Note! This _must_ be a bare
|
||||
* ucall as the ucall() path uses atomic operations to manage
|
||||
* the ucall structures, and the built-in "atomics" are usually
|
||||
* implemented via exclusive access instructions. The exlusive
|
||||
* monitor is cleared on ERET, and so taking debug exceptions
|
||||
* during a LDREX=>STREX sequence will prevent forward progress
|
||||
* and hang the guest/test.
|
||||
*/
|
||||
GUEST_UCALL_NONE();
|
||||
|
||||
/*
|
||||
* The userspace will verify that the pc is as expected during
|
||||
@ -484,12 +488,9 @@ void test_single_step_from_userspace(int test_cnt)
|
||||
break;
|
||||
}
|
||||
|
||||
TEST_ASSERT(cmd == UCALL_SYNC,
|
||||
TEST_ASSERT(cmd == UCALL_NONE,
|
||||
"Unexpected ucall cmd 0x%lx", cmd);
|
||||
|
||||
TEST_ASSERT(uc.args[1] == SINGLE_STEP_ENABLE,
|
||||
"Unexpected ucall action 0x%lx", uc.args[1]);
|
||||
|
||||
debug.control = KVM_GUESTDBG_ENABLE |
|
||||
KVM_GUESTDBG_SINGLESTEP;
|
||||
ss_enable = true;
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include "test_util.h"
|
||||
#include "memstress.h"
|
||||
#include "guest_modes.h"
|
||||
#include "processor.h"
|
||||
|
||||
/* Global variable used to synchronize all of the vCPU threads. */
|
||||
static int iteration;
|
||||
@ -177,16 +178,21 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
|
||||
* access tracking but low enough as to not make the test too brittle
|
||||
* over time and across architectures.
|
||||
*
|
||||
* Note that when run in nested virtualization, this check will trigger
|
||||
* much more frequently because TLB size is unlimited and since no flush
|
||||
* happens, much more pages are cached there and guest won't see the
|
||||
* "idle" bit cleared.
|
||||
* When running the guest as a nested VM, "warn" instead of asserting
|
||||
* as the TLB size is effectively unlimited and the KVM doesn't
|
||||
* explicitly flush the TLB when aging SPTEs. As a result, more pages
|
||||
* are cached and the guest won't see the "idle" bit cleared.
|
||||
*/
|
||||
if (still_idle < pages / 10)
|
||||
printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64
|
||||
"out of %" PRIu64 "), this will affect performance results"
|
||||
".\n",
|
||||
if (still_idle >= pages / 10) {
|
||||
#ifdef __x86_64__
|
||||
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
|
||||
"vCPU%d: Too many pages still idle (%lu out of %lu)",
|
||||
vcpu_idx, still_idle, pages);
|
||||
#endif
|
||||
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
|
||||
"this will affect performance results.\n",
|
||||
vcpu_idx, still_idle, pages);
|
||||
}
|
||||
|
||||
close(page_idle_fd);
|
||||
close(pagemap_fd);
|
||||
|
@ -398,7 +398,7 @@ static void help(char *name)
|
||||
printf(" -x: Split the memory region into this number of memslots.\n"
|
||||
" (default: 1)\n");
|
||||
printf(" -w: specify the percentage of pages which should be written to\n"
|
||||
" as an integer from 0-100 inclusive. This is probabalistic,\n"
|
||||
" as an integer from 0-100 inclusive. This is probabilistic,\n"
|
||||
" so -w X means each page has an X%% chance of writing\n"
|
||||
" and a (100-X)%% chance of reading.\n"
|
||||
" (default: 100 i.e. all pages are written to.)\n");
|
||||
|
@ -47,20 +47,20 @@
|
||||
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
|
||||
# define test_bit_le(nr, addr) \
|
||||
test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define set_bit_le(nr, addr) \
|
||||
set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define clear_bit_le(nr, addr) \
|
||||
clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define test_and_set_bit_le(nr, addr) \
|
||||
test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define test_and_clear_bit_le(nr, addr) \
|
||||
test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define __set_bit_le(nr, addr) \
|
||||
__set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define __clear_bit_le(nr, addr) \
|
||||
__clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define __test_and_set_bit_le(nr, addr) \
|
||||
__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
# define __test_and_clear_bit_le(nr, addr) \
|
||||
__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
|
||||
#else
|
||||
# define test_bit_le test_bit
|
||||
# define set_bit_le set_bit
|
||||
# define clear_bit_le clear_bit
|
||||
# define test_and_set_bit_le test_and_set_bit
|
||||
# define test_and_clear_bit_le test_and_clear_bit
|
||||
# define test_bit_le test_bit
|
||||
# define __set_bit_le __set_bit
|
||||
# define __clear_bit_le __clear_bit
|
||||
# define __test_and_set_bit_le __test_and_set_bit
|
||||
# define __test_and_clear_bit_le __test_and_clear_bit
|
||||
#endif
|
||||
|
||||
#define TEST_DIRTY_RING_COUNT 65536
|
||||
@ -328,7 +328,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
|
||||
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
|
||||
"0x%llx >= 0x%x", cur->offset, num_pages);
|
||||
//pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
|
||||
set_bit_le(cur->offset, bitmap);
|
||||
__set_bit_le(cur->offset, bitmap);
|
||||
dirty_ring_last_page = cur->offset;
|
||||
dirty_gfn_set_collected(cur);
|
||||
(*fetch_index)++;
|
||||
@ -585,7 +585,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
|
||||
value_ptr = host_test_mem + page * host_page_size;
|
||||
|
||||
/* If this is a special page that we were tracking... */
|
||||
if (test_and_clear_bit_le(page, host_bmap_track)) {
|
||||
if (__test_and_clear_bit_le(page, host_bmap_track)) {
|
||||
host_track_next_count++;
|
||||
TEST_ASSERT(test_bit_le(page, bmap),
|
||||
"Page %"PRIu64" should have its dirty bit "
|
||||
@ -593,7 +593,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
|
||||
page);
|
||||
}
|
||||
|
||||
if (test_and_clear_bit_le(page, bmap)) {
|
||||
if (__test_and_clear_bit_le(page, bmap)) {
|
||||
bool matched;
|
||||
|
||||
host_dirty_count++;
|
||||
@ -686,7 +686,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
|
||||
* should report its dirtyness in the
|
||||
* next run
|
||||
*/
|
||||
set_bit_le(page, host_bmap_track);
|
||||
__set_bit_le(page, host_bmap_track);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,18 @@
|
||||
|
||||
#include "sparsebit.h"
|
||||
|
||||
/*
|
||||
* Provide a version of static_assert() that is guaranteed to have an optional
|
||||
* message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h)
|
||||
* #undefs and #defines static_assert() as a direct alias to _Static_assert(),
|
||||
* i.e. effectively makes the message mandatory. Many KVM selftests #define
|
||||
* _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As
|
||||
* a result, static_assert() behavior is non-deterministic and may or may not
|
||||
* require a message depending on #include order.
|
||||
*/
|
||||
#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
|
||||
#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
|
||||
|
||||
#define KVM_DEV_PATH "/dev/kvm"
|
||||
#define KVM_MAX_VCPUS 512
|
||||
|
||||
@ -219,7 +231,7 @@ static inline bool kvm_has_cap(long cap)
|
||||
|
||||
#define kvm_do_ioctl(fd, cmd, arg) \
|
||||
({ \
|
||||
static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \
|
||||
kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
|
||||
ioctl(fd, cmd, arg); \
|
||||
})
|
||||
|
||||
|
@ -35,6 +35,14 @@ void ucall(uint64_t cmd, int nargs, ...);
|
||||
uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
|
||||
void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
|
||||
|
||||
/*
|
||||
* Perform userspace call without any associated data. This bare call avoids
|
||||
* allocating a ucall struct, which can be useful if the atomic operations in
|
||||
* the full ucall() are problematic and/or unwanted. Note, this will come out
|
||||
* as UCALL_NONE on the backend.
|
||||
*/
|
||||
#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL)
|
||||
|
||||
#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
|
||||
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
|
||||
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
|
||||
|
@ -72,11 +72,11 @@ struct kvm_x86_cpu_feature {
|
||||
.bit = __bit, \
|
||||
}; \
|
||||
\
|
||||
static_assert((fn & 0xc0000000) == 0 || \
|
||||
(fn & 0xc0000000) == 0x40000000 || \
|
||||
(fn & 0xc0000000) == 0x80000000 || \
|
||||
(fn & 0xc0000000) == 0xc0000000); \
|
||||
static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
|
||||
kvm_static_assert((fn & 0xc0000000) == 0 || \
|
||||
(fn & 0xc0000000) == 0x40000000 || \
|
||||
(fn & 0xc0000000) == 0x80000000 || \
|
||||
(fn & 0xc0000000) == 0xc0000000); \
|
||||
kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
|
||||
feature; \
|
||||
})
|
||||
|
||||
@ -94,6 +94,7 @@ struct kvm_x86_cpu_feature {
|
||||
#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
|
||||
#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
|
||||
#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
|
||||
#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
|
||||
#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
|
||||
#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
|
||||
#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
|
||||
@ -102,6 +103,7 @@ struct kvm_x86_cpu_feature {
|
||||
#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
|
||||
#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
|
||||
#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
|
||||
#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
|
||||
#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
|
||||
#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
|
||||
#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
|
||||
@ -115,6 +117,7 @@ struct kvm_x86_cpu_feature {
|
||||
#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
|
||||
#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
|
||||
#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
|
||||
#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
|
||||
#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
|
||||
#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
|
||||
#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
|
||||
@ -190,12 +193,12 @@ struct kvm_x86_cpu_property {
|
||||
.hi_bit = high_bit, \
|
||||
}; \
|
||||
\
|
||||
static_assert(low_bit < high_bit); \
|
||||
static_assert((fn & 0xc0000000) == 0 || \
|
||||
(fn & 0xc0000000) == 0x40000000 || \
|
||||
(fn & 0xc0000000) == 0x80000000 || \
|
||||
(fn & 0xc0000000) == 0xc0000000); \
|
||||
static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
|
||||
kvm_static_assert(low_bit < high_bit); \
|
||||
kvm_static_assert((fn & 0xc0000000) == 0 || \
|
||||
(fn & 0xc0000000) == 0x40000000 || \
|
||||
(fn & 0xc0000000) == 0x80000000 || \
|
||||
(fn & 0xc0000000) == 0xc0000000); \
|
||||
kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
|
||||
property; \
|
||||
})
|
||||
|
||||
|
@ -44,7 +44,7 @@ static struct ucall *ucall_alloc(void)
|
||||
GUEST_ASSERT(ucall_pool);
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) {
|
||||
if (!test_and_set_bit(i, ucall_pool->in_use)) {
|
||||
uc = &ucall_pool->ucalls[i];
|
||||
memset(uc->args, 0, sizeof(uc->args));
|
||||
return uc;
|
||||
|
@ -552,40 +552,6 @@ static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
|
||||
vcpu_sregs_set(vcpu, &sregs);
|
||||
}
|
||||
|
||||
void __vm_xsave_require_permission(int bit, const char *name)
|
||||
{
|
||||
int kvm_fd;
|
||||
u64 bitmask;
|
||||
long rc;
|
||||
struct kvm_device_attr attr = {
|
||||
.group = 0,
|
||||
.attr = KVM_X86_XCOMP_GUEST_SUPP,
|
||||
.addr = (unsigned long) &bitmask
|
||||
};
|
||||
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
|
||||
|
||||
kvm_fd = open_kvm_dev_path_or_exit();
|
||||
rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
|
||||
close(kvm_fd);
|
||||
|
||||
if (rc == -1 && (errno == ENXIO || errno == EINVAL))
|
||||
__TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
|
||||
|
||||
TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
|
||||
|
||||
__TEST_REQUIRE(bitmask & (1ULL << bit),
|
||||
"Required XSAVE feature '%s' not supported", name);
|
||||
|
||||
TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
|
||||
|
||||
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
|
||||
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
|
||||
TEST_ASSERT(bitmask & (1ULL << bit),
|
||||
"prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
|
||||
bitmask);
|
||||
}
|
||||
|
||||
void kvm_arch_vm_post_create(struct kvm_vm *vm)
|
||||
{
|
||||
vm_create_irqchip(vm);
|
||||
@ -636,21 +602,24 @@ void vcpu_arch_free(struct kvm_vcpu *vcpu)
|
||||
free(vcpu->cpuid);
|
||||
}
|
||||
|
||||
/* Do not use kvm_supported_cpuid directly except for validity checks. */
|
||||
static void *kvm_supported_cpuid;
|
||||
|
||||
const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
|
||||
{
|
||||
static struct kvm_cpuid2 *cpuid;
|
||||
int kvm_fd;
|
||||
|
||||
if (cpuid)
|
||||
return cpuid;
|
||||
if (kvm_supported_cpuid)
|
||||
return kvm_supported_cpuid;
|
||||
|
||||
cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
|
||||
kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
|
||||
kvm_fd = open_kvm_dev_path_or_exit();
|
||||
|
||||
kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
|
||||
kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
|
||||
(struct kvm_cpuid2 *)kvm_supported_cpuid);
|
||||
|
||||
close(kvm_fd);
|
||||
return cpuid;
|
||||
return kvm_supported_cpuid;
|
||||
}
|
||||
|
||||
static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
|
||||
@ -708,6 +677,41 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
|
||||
return buffer.entry.data;
|
||||
}
|
||||
|
||||
void __vm_xsave_require_permission(int bit, const char *name)
|
||||
{
|
||||
int kvm_fd;
|
||||
u64 bitmask;
|
||||
long rc;
|
||||
struct kvm_device_attr attr = {
|
||||
.group = 0,
|
||||
.attr = KVM_X86_XCOMP_GUEST_SUPP,
|
||||
.addr = (unsigned long) &bitmask
|
||||
};
|
||||
|
||||
TEST_ASSERT(!kvm_supported_cpuid,
|
||||
"kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
|
||||
|
||||
kvm_fd = open_kvm_dev_path_or_exit();
|
||||
rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
|
||||
close(kvm_fd);
|
||||
|
||||
if (rc == -1 && (errno == ENXIO || errno == EINVAL))
|
||||
__TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
|
||||
|
||||
TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
|
||||
|
||||
__TEST_REQUIRE(bitmask & (1ULL << bit),
|
||||
"Required XSAVE feature '%s' not supported", name);
|
||||
|
||||
TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
|
||||
|
||||
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
|
||||
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
|
||||
TEST_ASSERT(bitmask & (1ULL << bit),
|
||||
"prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
|
||||
bitmask);
|
||||
}
|
||||
|
||||
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
|
||||
{
|
||||
TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
|
||||
|
@ -249,16 +249,21 @@ int main(int argc, char *argv[])
|
||||
u32 amx_offset;
|
||||
int stage, ret;
|
||||
|
||||
/*
|
||||
* Note, all off-by-default features must be enabled before anything
|
||||
* caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
|
||||
*/
|
||||
vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
|
||||
TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
|
||||
"KVM should enumerate max XSAVE size when XSAVE is supported");
|
||||
xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
|
||||
|
@ -142,7 +142,7 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
|
||||
/* Intercept RDMSR 0xc0000100 */
|
||||
vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
|
||||
CPU_BASED_USE_MSR_BITMAPS);
|
||||
set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
|
||||
__set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
|
||||
GUEST_ASSERT(!vmresume());
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
|
||||
current_evmcs->guest_rip += 2; /* rdmsr */
|
||||
@ -154,7 +154,7 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
|
||||
current_evmcs->guest_rip += 2; /* rdmsr */
|
||||
|
||||
/* Intercept RDMSR 0xc0000101 without telling KVM about it */
|
||||
set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
|
||||
__set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
|
||||
/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
|
||||
current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
|
||||
GUEST_ASSERT(!vmresume());
|
||||
|
@ -103,7 +103,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
|
||||
|
||||
/* Intercept RDMSR 0xc0000100 */
|
||||
vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
|
||||
set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
|
||||
__set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
|
||||
vmcb->save.rip += 2; /* rdmsr */
|
||||
@ -115,7 +115,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
|
||||
vmcb->save.rip += 2; /* rdmsr */
|
||||
|
||||
/* Intercept RDMSR 0xc0000101 without telling KVM about it */
|
||||
set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
|
||||
__set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
|
||||
/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
|
||||
vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
|
@ -67,6 +67,52 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
|
||||
vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
|
||||
}
|
||||
|
||||
static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
|
||||
uint64_t msr_bit,
|
||||
struct kvm_x86_cpu_feature feature)
|
||||
{
|
||||
uint64_t val;
|
||||
|
||||
vcpu_clear_cpuid_feature(vcpu, feature);
|
||||
|
||||
val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
|
||||
vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
|
||||
vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
|
||||
vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
|
||||
vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
|
||||
vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
|
||||
|
||||
if (!kvm_cpu_has(feature))
|
||||
return;
|
||||
|
||||
vcpu_set_cpuid_feature(vcpu, feature);
|
||||
}
|
||||
|
||||
static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint64_t supported_bits = FEAT_CTL_LOCKED |
|
||||
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
|
||||
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
|
||||
FEAT_CTL_SGX_LC_ENABLED |
|
||||
FEAT_CTL_SGX_ENABLED |
|
||||
FEAT_CTL_LMCE_ENABLED;
|
||||
int bit, r;
|
||||
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
|
||||
__ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
|
||||
|
||||
for_each_clear_bit(bit, &supported_bits, 64) {
|
||||
r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
|
||||
TEST_ASSERT(r == 0,
|
||||
"Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
@ -79,6 +125,7 @@ int main(void)
|
||||
vm = vm_create_with_one_vcpu(&vcpu, NULL);
|
||||
|
||||
vmx_save_restore_msrs_test(vcpu);
|
||||
ia32_feature_control_msr_test(vcpu);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
@ -26,17 +26,17 @@
|
||||
#define SHINFO_REGION_GPA 0xc0000000ULL
|
||||
#define SHINFO_REGION_SLOT 10
|
||||
|
||||
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
|
||||
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
|
||||
#define DUMMY_REGION_SLOT 11
|
||||
|
||||
#define SHINFO_ADDR (SHINFO_REGION_GPA)
|
||||
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
|
||||
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
|
||||
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
|
||||
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
|
||||
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
|
||||
|
||||
#define SHINFO_VADDR (SHINFO_REGION_GVA)
|
||||
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
|
||||
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
|
||||
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
|
||||
|
||||
#define EVTCHN_VECTOR 0x10
|
||||
|
||||
@ -88,14 +88,20 @@ struct pvclock_wall_clock {
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct vcpu_runstate_info {
|
||||
uint32_t state;
|
||||
uint64_t state_entry_time;
|
||||
uint64_t time[4];
|
||||
uint32_t state;
|
||||
uint64_t state_entry_time;
|
||||
uint64_t time[5]; /* Extra field for overrun check */
|
||||
};
|
||||
|
||||
struct compat_vcpu_runstate_info {
|
||||
uint32_t state;
|
||||
uint64_t state_entry_time;
|
||||
uint64_t time[5];
|
||||
} __attribute__((__packed__));;
|
||||
|
||||
struct arch_vcpu_info {
|
||||
unsigned long cr2;
|
||||
unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
|
||||
unsigned long cr2;
|
||||
unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
|
||||
};
|
||||
|
||||
struct vcpu_info {
|
||||
@ -440,6 +446,7 @@ int main(int argc, char *argv[])
|
||||
TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
|
||||
|
||||
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
|
||||
bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
|
||||
bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
|
||||
bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
|
||||
|
||||
@ -449,8 +456,8 @@ int main(int argc, char *argv[])
|
||||
|
||||
/* Map a region for the shared_info page */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
|
||||
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
|
||||
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
|
||||
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
|
||||
|
||||
struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
|
||||
|
||||
@ -475,6 +482,19 @@ int main(int argc, char *argv[])
|
||||
};
|
||||
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
|
||||
|
||||
if (do_runstate_flag) {
|
||||
struct kvm_xen_hvm_attr ruf = {
|
||||
.type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
|
||||
.u.runstate_update_flag = 1,
|
||||
};
|
||||
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
|
||||
|
||||
ruf.u.runstate_update_flag = 0;
|
||||
vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
|
||||
TEST_ASSERT(ruf.u.runstate_update_flag == 1,
|
||||
"Failed to read back RUNSTATE_UPDATE_FLAG attr");
|
||||
}
|
||||
|
||||
struct kvm_xen_hvm_attr ha = {
|
||||
.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
|
||||
.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
|
||||
@ -999,22 +1019,91 @@ int main(int argc, char *argv[])
|
||||
runstate_names[i], rs->time[i]);
|
||||
}
|
||||
}
|
||||
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
|
||||
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
|
||||
"State entry time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
|
||||
"Running time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
|
||||
"Runnable time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
|
||||
"Blocked time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
|
||||
"Offline time mismatch");
|
||||
|
||||
TEST_ASSERT(rs->state_entry_time == rs->time[0] +
|
||||
rs->time[1] + rs->time[2] + rs->time[3],
|
||||
"runstate times don't add up");
|
||||
/*
|
||||
* Exercise runstate info at all points across the page boundary, in
|
||||
* 32-bit and 64-bit mode. In particular, test the case where it is
|
||||
* configured in 32-bit mode and then switched to 64-bit mode while
|
||||
* active, which takes it onto the second page.
|
||||
*/
|
||||
unsigned long runstate_addr;
|
||||
struct compat_vcpu_runstate_info *crs;
|
||||
for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
|
||||
runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
|
||||
|
||||
rs = addr_gpa2hva(vm, runstate_addr);
|
||||
crs = (void *)rs;
|
||||
|
||||
memset(rs, 0xa5, sizeof(*rs));
|
||||
|
||||
/* Set to compatibility mode */
|
||||
lm.u.long_mode = 0;
|
||||
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
|
||||
|
||||
/* Set runstate to new address (kernel will write it) */
|
||||
struct kvm_xen_vcpu_attr st = {
|
||||
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
|
||||
.u.gpa = runstate_addr,
|
||||
};
|
||||
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
|
||||
|
||||
if (verbose)
|
||||
printf("Compatibility runstate at %08lx\n", runstate_addr);
|
||||
|
||||
TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
|
||||
TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
|
||||
"State entry time mismatch");
|
||||
TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
|
||||
"Running time mismatch");
|
||||
TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
|
||||
"Runnable time mismatch");
|
||||
TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
|
||||
"Blocked time mismatch");
|
||||
TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
|
||||
"Offline time mismatch");
|
||||
TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
|
||||
"Structure overrun");
|
||||
TEST_ASSERT(crs->state_entry_time == crs->time[0] +
|
||||
crs->time[1] + crs->time[2] + crs->time[3],
|
||||
"runstate times don't add up");
|
||||
|
||||
|
||||
/* Now switch to 64-bit mode */
|
||||
lm.u.long_mode = 1;
|
||||
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
|
||||
|
||||
memset(rs, 0xa5, sizeof(*rs));
|
||||
|
||||
/* Don't change the address, just trigger a write */
|
||||
struct kvm_xen_vcpu_attr adj = {
|
||||
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
|
||||
.u.runstate.state = (uint64_t)-1
|
||||
};
|
||||
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
|
||||
|
||||
if (verbose)
|
||||
printf("64-bit runstate at %08lx\n", runstate_addr);
|
||||
|
||||
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
|
||||
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
|
||||
"State entry time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
|
||||
"Running time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
|
||||
"Runnable time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
|
||||
"Blocked time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
|
||||
"Offline time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
|
||||
"Structure overrun");
|
||||
|
||||
TEST_ASSERT(rs->state_entry_time == rs->time[0] +
|
||||
rs->time[1] + rs->time[2] + rs->time[3],
|
||||
"runstate times don't add up");
|
||||
}
|
||||
}
|
||||
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
||||
|
@ -3519,10 +3519,6 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
|
||||
ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
|
||||
|
||||
do {
|
||||
/*
|
||||
* This sets KVM_REQ_UNHALT if an interrupt
|
||||
* arrives.
|
||||
*/
|
||||
if (kvm_vcpu_check_block(vcpu) < 0)
|
||||
goto out;
|
||||
cpu_relax();
|
||||
|
@ -76,19 +76,17 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
|
||||
}
|
||||
}
|
||||
|
||||
bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
gpa_t gpa, unsigned long len)
|
||||
bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
|
||||
|
||||
if (!gpc->active)
|
||||
return false;
|
||||
|
||||
if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
|
||||
if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE)
|
||||
return false;
|
||||
|
||||
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
|
||||
kvm_is_error_hva(gpc->uhva))
|
||||
if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva))
|
||||
return false;
|
||||
|
||||
if (!gpc->valid)
|
||||
@ -96,9 +94,9 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
|
||||
EXPORT_SYMBOL_GPL(kvm_gpc_check);
|
||||
|
||||
static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
|
||||
static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva)
|
||||
{
|
||||
/* Unmap the old pfn/page if it was mapped before. */
|
||||
if (!is_error_noslot_pfn(pfn) && khva) {
|
||||
@ -139,7 +137,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
|
||||
return kvm->mmu_invalidate_seq != mmu_seq;
|
||||
}
|
||||
|
||||
static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
|
||||
{
|
||||
/* Note, the new page offset may be different than the old! */
|
||||
void *old_khva = gpc->khva - offset_in_page(gpc->khva);
|
||||
@ -159,7 +157,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
gpc->valid = false;
|
||||
|
||||
do {
|
||||
mmu_seq = kvm->mmu_invalidate_seq;
|
||||
mmu_seq = gpc->kvm->mmu_invalidate_seq;
|
||||
smp_rmb();
|
||||
|
||||
write_unlock_irq(&gpc->lock);
|
||||
@ -177,7 +175,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
* the existing mapping and didn't create a new one.
|
||||
*/
|
||||
if (new_khva != old_khva)
|
||||
gpc_unmap_khva(kvm, new_pfn, new_khva);
|
||||
gpc_unmap_khva(new_pfn, new_khva);
|
||||
|
||||
kvm_release_pfn_clean(new_pfn);
|
||||
|
||||
@ -217,7 +215,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
* attempting to refresh.
|
||||
*/
|
||||
WARN_ON_ONCE(gpc->valid);
|
||||
} while (mmu_notifier_retry_cache(kvm, mmu_seq));
|
||||
} while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
|
||||
|
||||
gpc->valid = true;
|
||||
gpc->pfn = new_pfn;
|
||||
@ -238,10 +236,10 @@ out_error:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
gpa_t gpa, unsigned long len)
|
||||
static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
|
||||
unsigned long len)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
|
||||
unsigned long page_offset = gpa & ~PAGE_MASK;
|
||||
bool unmap_old = false;
|
||||
unsigned long old_uhva;
|
||||
@ -295,7 +293,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
* drop the lock and do the HVA to PFN lookup again.
|
||||
*/
|
||||
if (!gpc->valid || old_uhva != gpc->uhva) {
|
||||
ret = hva_to_pfn_retry(kvm, gpc);
|
||||
ret = hva_to_pfn_retry(gpc);
|
||||
} else {
|
||||
/*
|
||||
* If the HVA→PFN mapping was already valid, don't unmap it.
|
||||
@ -303,9 +301,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
* may have changed.
|
||||
*/
|
||||
gpc->khva = old_khva + page_offset;
|
||||
old_pfn = KVM_PFN_ERR_FAULT;
|
||||
old_khva = NULL;
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
out:
|
||||
@ -329,59 +326,41 @@ out_unlock:
|
||||
mutex_unlock(&gpc->refresh_lock);
|
||||
|
||||
if (unmap_old)
|
||||
gpc_unmap_khva(kvm, old_pfn, old_khva);
|
||||
gpc_unmap_khva(old_pfn, old_khva);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
|
||||
|
||||
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
|
||||
{
|
||||
void *old_khva;
|
||||
kvm_pfn_t old_pfn;
|
||||
|
||||
mutex_lock(&gpc->refresh_lock);
|
||||
write_lock_irq(&gpc->lock);
|
||||
|
||||
gpc->valid = false;
|
||||
|
||||
old_khva = gpc->khva - offset_in_page(gpc->khva);
|
||||
old_pfn = gpc->pfn;
|
||||
|
||||
/*
|
||||
* We can leave the GPA → uHVA map cache intact but the PFN
|
||||
* lookup will need to be redone even for the same page.
|
||||
*/
|
||||
gpc->khva = NULL;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
|
||||
write_unlock_irq(&gpc->lock);
|
||||
mutex_unlock(&gpc->refresh_lock);
|
||||
|
||||
gpc_unmap_khva(kvm, old_pfn, old_khva);
|
||||
return __kvm_gpc_refresh(gpc, gpc->gpa, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
|
||||
EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
|
||||
|
||||
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
|
||||
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
|
||||
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
|
||||
{
|
||||
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
|
||||
WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);
|
||||
|
||||
rwlock_init(&gpc->lock);
|
||||
mutex_init(&gpc->refresh_lock);
|
||||
|
||||
gpc->kvm = kvm;
|
||||
gpc->vcpu = vcpu;
|
||||
gpc->usage = usage;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
gpc->uhva = KVM_HVA_ERR_BAD;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gpc_init);
|
||||
|
||||
int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
|
||||
gpa_t gpa, unsigned long len)
|
||||
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
|
||||
{
|
||||
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
|
||||
struct kvm *kvm = gpc->kvm;
|
||||
|
||||
if (!gpc->active) {
|
||||
gpc->khva = NULL;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
gpc->uhva = KVM_HVA_ERR_BAD;
|
||||
gpc->vcpu = vcpu;
|
||||
gpc->usage = usage;
|
||||
gpc->valid = false;
|
||||
if (KVM_BUG_ON(gpc->valid, kvm))
|
||||
return -EIO;
|
||||
|
||||
spin_lock(&kvm->gpc_lock);
|
||||
list_add(&gpc->list, &kvm->gpc_list);
|
||||
@ -396,12 +375,16 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
|
||||
gpc->active = true;
|
||||
write_unlock_irq(&gpc->lock);
|
||||
}
|
||||
return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
|
||||
return __kvm_gpc_refresh(gpc, gpa, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gpc_activate);
|
||||
|
||||
void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
|
||||
{
|
||||
struct kvm *kvm = gpc->kvm;
|
||||
kvm_pfn_t old_pfn;
|
||||
void *old_khva;
|
||||
|
||||
if (gpc->active) {
|
||||
/*
|
||||
* Deactivate the cache before removing it from the list, KVM
|
||||
@ -410,13 +393,26 @@ void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
|
||||
*/
|
||||
write_lock_irq(&gpc->lock);
|
||||
gpc->active = false;
|
||||
gpc->valid = false;
|
||||
|
||||
/*
|
||||
* Leave the GPA => uHVA cache intact, it's protected by the
|
||||
* memslot generation. The PFN lookup needs to be redone every
|
||||
* time as mmu_notifier protection is lost when the cache is
|
||||
* removed from the VM's gpc_list.
|
||||
*/
|
||||
old_khva = gpc->khva - offset_in_page(gpc->khva);
|
||||
gpc->khva = NULL;
|
||||
|
||||
old_pfn = gpc->pfn;
|
||||
gpc->pfn = KVM_PFN_ERR_FAULT;
|
||||
write_unlock_irq(&gpc->lock);
|
||||
|
||||
spin_lock(&kvm->gpc_lock);
|
||||
list_del(&gpc->list);
|
||||
spin_unlock(&kvm->gpc_lock);
|
||||
|
||||
kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
|
||||
gpc_unmap_khva(old_pfn, old_khva);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);
|
||||
|
Loading…
Reference in New Issue
Block a user