mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 21:21:41 +00:00
KVM generic changes for 6.11
- Enable halt poll shrinking by default, as Intel found it to be a clear win. - Setup empty IRQ routing when creating a VM to avoid having to synchronize SRCU when creating a split IRQCHIP on x86. - Rework the sched_in/out() paths to replace kvm_arch_sched_in() with a flag that arch code can use for hooking both sched_in() and sched_out(). - Take the vCPU @id as an "unsigned long" instead of "u32" to avoid truncating a bogus value from userspace, e.g. to help userspace detect bugs. - Mark a vCPU as preempted if and only if it's scheduled out while in the KVM_RUN loop, e.g. to avoid marking it preempted and thus writing guest memory when retrieving guest state during live migration blackout. - A few minor cleanups -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmaRuOYACgkQOlYIJqCj N/1UnQ/8CI5Qfr+/0gzYgtWmtEMczGG+rMNpzD3XVqPjJjXcMcBiQnplnzUVLhha vlPdYVK7vgmEt003XGzV55mik46LHL+DX/v4hI3HEdblfyCeNLW3fKEWVRB44qJe o+YUQwSK42SORUp9oXuQINxhA//U9EnI7CQxlJ8w8wenv5IJKfIGr01DefmfGPAV PKm9t6WLcNqvhZMEyy/zmzM3KVPCJL0NcwI97x6sHxFpQYIDtL0E/VexA4AFqMoT QK7cSDC/2US41Zvem/r/GzM/ucdF6vb9suzZYBohwhxtVhwJe2CDeYQZvtNKJ1U7 GOHPaKL6nBWdZCm/yyWbbX2nstY1lHqxhN3JD0X8wqU5rNcwm2b8Vfyav0Ehc7H+ jVbDTshOx4YJmIgajoKjgM050rdBK59TdfVL+l+AAV5q/TlHocalYtvkEBdGmIDg 2td9UHSime6sp20vQfczUEz4bgrQsh4l2Fa/qU2jFwLievnBw0AvEaMximkSGMJe b8XfjmdTjlOesWAejANKtQolfrq14+1wYw0zZZ8PA+uNVpKdoovmcqSOcaDC9bT8 GO/NFUvoG+lkcvJcIlo1SSl81SmGLosijwxWfGvFAqsgpR3/3l3dYp0QtztoCNJO d3+HnjgYn5o5FwufuTD3eUOXH4AFjG108DH0o25XrIkb2Kymy0o= =BalU -----END PGP SIGNATURE----- Merge tag 'kvm-x86-generic-6.11' of https://github.com/kvm-x86/linux into HEAD KVM generic changes for 6.11 - Enable halt poll shrinking by default, as Intel found it to be a clear win. - Setup empty IRQ routing when creating a VM to avoid having to synchronize SRCU when creating a split IRQCHIP on x86. - Rework the sched_in/out() paths to replace kvm_arch_sched_in() with a flag that arch code can use for hooking both sched_in() and sched_out(). - Take the vCPU @id as an "unsigned long" instead of "u32" to avoid truncating a bogus value from userspace, e.g. to help userspace detect bugs. - Mark a vCPU as preempted if and only if it's scheduled out while in the KVM_RUN loop, e.g. to avoid marking it preempted and thus writing guest memory when retrieving guest state during live migration blackout. - A few minor cleanups
This commit is contained in:
commit
86014c1e20
@ -7969,10 +7969,10 @@ perform a bulk copy of tags to/from the guest.
|
||||
7.29 KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
|
||||
-------------------------------------
|
||||
|
||||
Architectures: x86 SEV enabled
|
||||
Type: vm
|
||||
Parameters: args[0] is the fd of the source vm
|
||||
Returns: 0 on success
|
||||
:Architectures: x86 SEV enabled
|
||||
:Type: vm
|
||||
:Parameters: args[0] is the fd of the source vm
|
||||
:Returns: 0 on success
|
||||
|
||||
This capability enables userspace to migrate the encryption context from the VM
|
||||
indicated by the fd to the VM this is called on.
|
||||
|
@ -79,11 +79,11 @@ adjustment of the polling interval.
|
||||
Module Parameters
|
||||
=================
|
||||
|
||||
The kvm module has 3 tuneable module parameters to adjust the global max
|
||||
polling interval as well as the rate at which the polling interval is grown and
|
||||
shrunk. These variables are defined in include/linux/kvm_host.h and as module
|
||||
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
|
||||
powerpc kvm-hv case.
|
||||
The kvm module has 4 tunable module parameters to adjust the global max polling
|
||||
interval, the initial value (to grow from 0), and the rate at which the polling
|
||||
interval is grown and shrunk. These variables are defined in
|
||||
include/linux/kvm_host.h and as module parameters in virt/kvm/kvm_main.c, or
|
||||
arch/powerpc/kvm/book3s_hv.c in the powerpc kvm-hv case.
|
||||
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|Module Parameter | Description | Default Value |
|
||||
@ -105,7 +105,7 @@ powerpc kvm-hv case.
|
||||
| | grow_halt_poll_ns() | |
|
||||
| | function. | |
|
||||
+-----------------------+---------------------------+-------------------------+
|
||||
|halt_poll_ns_shrink | The value by which the | 0 |
|
||||
|halt_poll_ns_shrink | The value by which the | 2 |
|
||||
| | halt polling interval is | |
|
||||
| | divided in the | |
|
||||
| | shrink_halt_poll_ns() | |
|
||||
|
@ -1289,7 +1289,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
|
||||
}
|
||||
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
|
||||
void kvm_arm_init_debug(void);
|
||||
void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
|
||||
|
@ -1138,7 +1138,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
if (run->immediate_exit) {
|
||||
if (!vcpu->wants_to_run) {
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
@ -274,7 +274,6 @@ static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *arch)
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
@ -1416,7 +1416,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
kvm_complete_iocsr_read(vcpu, run);
|
||||
}
|
||||
|
||||
if (run->immediate_exit)
|
||||
if (!vcpu->wants_to_run)
|
||||
return r;
|
||||
|
||||
/* Clear exit_reason */
|
||||
|
@ -890,7 +890,6 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
|
@ -436,7 +436,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
vcpu->mmio_needed = 0;
|
||||
}
|
||||
|
||||
if (vcpu->run->immediate_exit)
|
||||
if (!vcpu->wants_to_run)
|
||||
goto out;
|
||||
|
||||
lose_fpu(1);
|
||||
|
@ -897,7 +897,6 @@ struct kvm_vcpu_arch {
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
|
@ -1852,7 +1852,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_sigset_activate(vcpu);
|
||||
|
||||
if (run->immediate_exit)
|
||||
if (!vcpu->wants_to_run)
|
||||
r = -EINTR;
|
||||
else
|
||||
r = kvmppc_vcpu_run(vcpu);
|
||||
|
@ -286,7 +286,6 @@ struct kvm_vcpu_arch {
|
||||
};
|
||||
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
|
||||
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
|
||||
|
||||
|
@ -763,7 +763,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (run->immediate_exit) {
|
||||
if (!vcpu->wants_to_run) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return -EINTR;
|
||||
}
|
||||
|
@ -1045,7 +1045,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
|
||||
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
|
||||
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||
|
@ -2997,14 +2997,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||
break;
|
||||
}
|
||||
case KVM_CREATE_IRQCHIP: {
|
||||
struct kvm_irq_routing_entry routing;
|
||||
|
||||
r = -EINVAL;
|
||||
if (kvm->arch.use_irqchip) {
|
||||
/* Set up dummy routing. */
|
||||
memset(&routing, 0, sizeof(routing));
|
||||
r = kvm_set_irq_routing(kvm, &routing, 0, 0);
|
||||
}
|
||||
if (kvm->arch.use_irqchip)
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_DEVICE_ATTR: {
|
||||
@ -5032,7 +5027,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->kvm->arch.pv.dumping)
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_run->immediate_exit)
|
||||
if (!vcpu->wants_to_run)
|
||||
return -EINTR;
|
||||
|
||||
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
|
||||
|
@ -103,7 +103,6 @@ KVM_X86_OP(write_tsc_multiplier)
|
||||
KVM_X86_OP(get_exit_info)
|
||||
KVM_X86_OP(check_intercept)
|
||||
KVM_X86_OP(handle_exit_irqoff)
|
||||
KVM_X86_OP(sched_in)
|
||||
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
|
||||
KVM_X86_OP_OPTIONAL(vcpu_blocking)
|
||||
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
|
||||
|
@ -1750,8 +1750,6 @@ struct kvm_x86_ops {
|
||||
struct x86_exception *exception);
|
||||
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
|
||||
|
||||
/*
|
||||
* Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
|
||||
* value indicates CPU dirty logging is unsupported or disabled.
|
||||
|
@ -106,7 +106,6 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
|
||||
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_setup_default_irq_routing(struct kvm *kvm);
|
||||
int kvm_setup_empty_irq_routing(struct kvm *kvm);
|
||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq,
|
||||
struct dest_map *dest_map);
|
||||
|
@ -395,13 +395,6 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
|
||||
ARRAY_SIZE(default_routing), 0);
|
||||
}
|
||||
|
||||
static const struct kvm_irq_routing_entry empty_routing[] = {};
|
||||
|
||||
int kvm_setup_empty_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
|
||||
}
|
||||
|
||||
void kvm_arch_post_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
if (!irqchip_split(kvm))
|
||||
|
@ -521,9 +521,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Unused perf_events are only released if the corresponding MSRs
|
||||
* weren't accessed during the last vCPU time slice. kvm_arch_sched_in
|
||||
* triggers KVM_REQ_PMU if cleanup is needed.
|
||||
* Release unused perf_events if the corresponding guest MSRs weren't
|
||||
* accessed during the last vCPU time slice (need_cleanup is set when
|
||||
* the vCPU is scheduled back in).
|
||||
*/
|
||||
if (unlikely(pmu->need_cleanup))
|
||||
kvm_pmu_cleanup(vcpu);
|
||||
|
@ -1554,6 +1554,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
|
||||
|
||||
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
|
||||
shrink_ple_window(vcpu);
|
||||
|
||||
if (sd->current_vmcb != svm->vmcb) {
|
||||
sd->current_vmcb = svm->vmcb;
|
||||
|
||||
@ -4607,12 +4610,6 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.at_instruction_boundary = true;
|
||||
}
|
||||
|
||||
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
if (!kvm_pause_in_guest(vcpu->kvm))
|
||||
shrink_ple_window(vcpu);
|
||||
}
|
||||
|
||||
static void svm_setup_mce(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* [63:9] are reserved. */
|
||||
@ -5075,8 +5072,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.check_intercept = svm_check_intercept,
|
||||
.handle_exit_irqoff = svm_handle_exit_irqoff,
|
||||
|
||||
.sched_in = svm_sched_in,
|
||||
|
||||
.nested_ops = &svm_nested_ops,
|
||||
|
||||
.deliver_interrupt = svm_deliver_interrupt,
|
||||
|
@ -122,8 +122,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
||||
.check_intercept = vmx_check_intercept,
|
||||
.handle_exit_irqoff = vmx_handle_exit_irqoff,
|
||||
|
||||
.sched_in = vmx_sched_in,
|
||||
|
||||
.cpu_dirty_log_size = PML_ENTITY_NUM,
|
||||
.update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
|
||||
|
||||
|
@ -1411,6 +1411,38 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void grow_ple_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned int old = vmx->ple_window;
|
||||
|
||||
vmx->ple_window = __grow_ple_window(old, ple_window,
|
||||
ple_window_grow,
|
||||
ple_window_max);
|
||||
|
||||
if (vmx->ple_window != old) {
|
||||
vmx->ple_window_dirty = true;
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
vmx->ple_window, old);
|
||||
}
|
||||
}
|
||||
|
||||
static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned int old = vmx->ple_window;
|
||||
|
||||
vmx->ple_window = __shrink_ple_window(old, ple_window,
|
||||
ple_window_shrink,
|
||||
ple_window);
|
||||
|
||||
if (vmx->ple_window != old) {
|
||||
vmx->ple_window_dirty = true;
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
vmx->ple_window, old);
|
||||
}
|
||||
}
|
||||
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
|
||||
struct loaded_vmcs *buddy)
|
||||
{
|
||||
@ -1486,6 +1518,9 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
|
||||
shrink_ple_window(vcpu);
|
||||
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
|
||||
|
||||
vmx_vcpu_pi_load(vcpu, cpu);
|
||||
@ -5897,38 +5932,6 @@ int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void grow_ple_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned int old = vmx->ple_window;
|
||||
|
||||
vmx->ple_window = __grow_ple_window(old, ple_window,
|
||||
ple_window_grow,
|
||||
ple_window_max);
|
||||
|
||||
if (vmx->ple_window != old) {
|
||||
vmx->ple_window_dirty = true;
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
vmx->ple_window, old);
|
||||
}
|
||||
}
|
||||
|
||||
static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned int old = vmx->ple_window;
|
||||
|
||||
vmx->ple_window = __shrink_ple_window(old, ple_window,
|
||||
ple_window_shrink,
|
||||
ple_window);
|
||||
|
||||
if (vmx->ple_window != old) {
|
||||
vmx->ple_window_dirty = true;
|
||||
trace_kvm_ple_window_update(vcpu->vcpu_id,
|
||||
vmx->ple_window, old);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
|
||||
* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
|
||||
@ -6677,9 +6680,10 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
|
||||
bool flush_l1d;
|
||||
|
||||
/*
|
||||
* Clear the per-vcpu flush bit, it gets set again
|
||||
* either from vcpu_run() or from one of the unsafe
|
||||
* VMEXIT handlers.
|
||||
* Clear the per-vcpu flush bit, it gets set again if the vCPU
|
||||
* is reloaded, i.e. if the vCPU is scheduled out or if KVM
|
||||
* exits to userspace, or if KVM reaches one of the unsafe
|
||||
* VMEXIT handlers, e.g. if KVM calls into the emulator.
|
||||
*/
|
||||
flush_l1d = vcpu->arch.l1tf_flush_l1d;
|
||||
vcpu->arch.l1tf_flush_l1d = false;
|
||||
@ -8179,12 +8183,6 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
if (!kvm_pause_in_guest(vcpu->kvm))
|
||||
shrink_ple_window(vcpu);
|
||||
}
|
||||
|
||||
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
@ -112,7 +112,6 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
|
||||
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
|
||||
void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu);
|
||||
void vmx_request_immediate_exit(struct kvm_vcpu *vcpu);
|
||||
void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu);
|
||||
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
|
||||
|
@ -4998,6 +4998,15 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
|
||||
if (vcpu->scheduled_out && pmu->version && pmu->event_count) {
|
||||
pmu->need_cleanup = true;
|
||||
kvm_make_request(KVM_REQ_PMU, vcpu);
|
||||
}
|
||||
|
||||
/* Address WBINVD may be executed by guest */
|
||||
if (need_emulate_wbinvd(vcpu)) {
|
||||
if (static_call(kvm_x86_has_wbinvd_exit)())
|
||||
@ -6546,9 +6555,6 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
goto split_irqchip_unlock;
|
||||
if (kvm->created_vcpus)
|
||||
goto split_irqchip_unlock;
|
||||
r = kvm_setup_empty_irq_routing(kvm);
|
||||
if (r)
|
||||
goto split_irqchip_unlock;
|
||||
/* Pairs with irqchip_in_kernel. */
|
||||
smp_wmb();
|
||||
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
|
||||
@ -6695,7 +6701,9 @@ split_irqchip_unlock:
|
||||
break;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->arch.max_vcpu_ids == cap->args[0]) {
|
||||
if (kvm->arch.bsp_vcpu_id > cap->args[0]) {
|
||||
;
|
||||
} else if (kvm->arch.max_vcpu_ids == cap->args[0]) {
|
||||
r = 0;
|
||||
} else if (!kvm->arch.max_vcpu_ids) {
|
||||
kvm->arch.max_vcpu_ids = cap->args[0];
|
||||
@ -7216,6 +7224,9 @@ set_pit2_out:
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->created_vcpus)
|
||||
r = -EBUSY;
|
||||
else if (arg > KVM_MAX_VCPU_IDS ||
|
||||
(kvm->arch.max_vcpu_ids && arg > kvm->arch.max_vcpu_ids))
|
||||
r = -EINVAL;
|
||||
else
|
||||
kvm->arch.bsp_vcpu_id = arg;
|
||||
mutex_unlock(&kvm->lock);
|
||||
@ -11248,7 +11259,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
int r;
|
||||
|
||||
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
@ -11398,7 +11408,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
|
||||
if (kvm_run->immediate_exit) {
|
||||
if (!vcpu->wants_to_run) {
|
||||
r = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
@ -11476,7 +11486,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
WARN_ON_ONCE(vcpu->mmio_needed);
|
||||
}
|
||||
|
||||
if (kvm_run->immediate_exit) {
|
||||
if (!vcpu->wants_to_run) {
|
||||
r = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
@ -12569,18 +12579,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
|
||||
return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
|
||||
}
|
||||
|
||||
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
if (pmu->version && unlikely(pmu->event_count)) {
|
||||
pmu->need_cleanup = true;
|
||||
kvm_make_request(KVM_REQ_PMU, vcpu);
|
||||
}
|
||||
static_call(kvm_x86_sched_in)(vcpu, cpu);
|
||||
}
|
||||
|
||||
void kvm_arch_free_vm(struct kvm *kvm)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
@ -378,8 +378,10 @@ struct kvm_vcpu {
|
||||
bool dy_eligible;
|
||||
} spin_loop;
|
||||
#endif
|
||||
bool wants_to_run;
|
||||
bool preempted;
|
||||
bool ready;
|
||||
bool scheduled_out;
|
||||
struct kvm_vcpu_arch arch;
|
||||
struct kvm_vcpu_stat stat;
|
||||
char stats_id[KVM_STATS_NAME_SIZE];
|
||||
@ -1494,8 +1496,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg);
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id);
|
||||
@ -1955,8 +1955,6 @@ struct _kvm_stats_desc {
|
||||
HALT_POLL_HIST_COUNT), \
|
||||
STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
|
||||
|
||||
extern struct dentry *kvm_debugfs_dir;
|
||||
|
||||
ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
|
||||
const struct _kvm_stats_desc *desc,
|
||||
void *stats, size_t size_stats,
|
||||
@ -2096,6 +2094,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
||||
const struct kvm_irq_routing_entry *entries,
|
||||
unsigned nr,
|
||||
unsigned flags);
|
||||
int kvm_init_irq_routing(struct kvm *kvm);
|
||||
int kvm_set_routing_entry(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue);
|
||||
@ -2105,6 +2104,11 @@ void kvm_free_irq_routing(struct kvm *kvm);
|
||||
|
||||
static inline void kvm_free_irq_routing(struct kvm *kvm) {}
|
||||
|
||||
static inline int kvm_init_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
|
||||
|
@ -192,11 +192,24 @@ struct kvm_xen_exit {
|
||||
/* Flags that describe what fields in emulation_failure hold valid data. */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
|
||||
|
||||
/*
|
||||
* struct kvm_run can be modified by userspace at any time, so KVM must be
|
||||
* careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
|
||||
* renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
|
||||
* compiled into the kernel, ensuring that any use within KVM is obvious and
|
||||
* gets extra scrutiny.
|
||||
*/
|
||||
#ifdef __KERNEL__
|
||||
#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
|
||||
#else
|
||||
#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
|
||||
#endif
|
||||
|
||||
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
|
||||
struct kvm_run {
|
||||
/* in */
|
||||
__u8 request_interrupt_window;
|
||||
__u8 immediate_exit;
|
||||
__u8 HINT_UNSAFE_IN_KVM(immediate_exit);
|
||||
__u8 padding1[6];
|
||||
|
||||
/* out */
|
||||
|
@ -26,10 +26,19 @@ int main(int argc, char *argv[])
|
||||
TEST_ASSERT(ret < 0,
|
||||
"Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
|
||||
|
||||
/* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
|
||||
if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
|
||||
vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
|
||||
|
||||
/* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
|
||||
ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
|
||||
TEST_ASSERT(ret < 0,
|
||||
"Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
|
||||
}
|
||||
|
||||
/* Set KVM_CAP_MAX_VCPU_ID */
|
||||
vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
|
||||
|
||||
|
||||
/* Try to set KVM_CAP_MAX_VCPU_ID again */
|
||||
ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
|
||||
TEST_ASSERT(ret < 0,
|
||||
@ -39,6 +48,15 @@ int main(int argc, char *argv[])
|
||||
ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
|
||||
TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
|
||||
|
||||
/* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
|
||||
ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
|
||||
TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
|
||||
|
||||
/* Create vCPU with id within bounds */
|
||||
ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
|
||||
TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
|
||||
|
||||
close(ret);
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
||||
|
@ -33,6 +33,20 @@ static void guest_not_bsp_vcpu(void *arg)
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_set_invalid_bsp(struct kvm_vm *vm)
|
||||
{
|
||||
unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
|
||||
int r;
|
||||
|
||||
if (max_vcpu_id) {
|
||||
r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
|
||||
TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
|
||||
}
|
||||
|
||||
r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
|
||||
TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
|
||||
}
|
||||
|
||||
static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
|
||||
{
|
||||
int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
|
||||
@ -80,6 +94,8 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
|
||||
|
||||
vm = vm_create(nr_vcpus);
|
||||
|
||||
test_set_invalid_bsp(vm);
|
||||
|
||||
vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++)
|
||||
|
@ -80,7 +80,6 @@ static void async_pf_execute(struct work_struct *work)
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
first = list_empty(&vcpu->async_pf.done);
|
||||
list_add_tail(&apf->link, &vcpu->async_pf.done);
|
||||
apf->vcpu = NULL;
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
/*
|
||||
@ -120,8 +119,6 @@ static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
|
||||
|
||||
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
|
||||
/* cancel outstanding work queue item */
|
||||
while (!list_empty(&vcpu->async_pf.queue)) {
|
||||
struct kvm_async_pf *work =
|
||||
@ -129,23 +126,15 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
||||
typeof(*work), queue);
|
||||
list_del(&work->queue);
|
||||
|
||||
/*
|
||||
* We know it's present in vcpu->async_pf.done, do
|
||||
* nothing here.
|
||||
*/
|
||||
if (!work->vcpu)
|
||||
continue;
|
||||
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
|
||||
flush_work(&work->work);
|
||||
#else
|
||||
if (cancel_work_sync(&work->work))
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
#endif
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
while (!list_empty(&vcpu->async_pf.done)) {
|
||||
struct kvm_async_pf *work =
|
||||
list_first_entry(&vcpu->async_pf.done,
|
||||
|
@ -237,3 +237,27 @@ out:
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate empty IRQ routing by default so that additional setup isn't needed
|
||||
* when userspace-driven IRQ routing is activated, and so that kvm->irq_routing
|
||||
* is guaranteed to be non-NULL.
|
||||
*/
|
||||
int kvm_init_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_irq_routing_table *new;
|
||||
int chip_size;
|
||||
|
||||
new = kzalloc(struct_size(new, map, 1), GFP_KERNEL_ACCOUNT);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
new->nr_rt_entries = 1;
|
||||
|
||||
chip_size = sizeof(int) * KVM_NR_IRQCHIPS * KVM_IRQCHIP_NUM_PINS;
|
||||
memset(new->chip, -1, chip_size);
|
||||
|
||||
RCU_INIT_POINTER(kvm->irq_routing, new);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,9 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Kernel-based Virtual Machine driver for Linux
|
||||
*
|
||||
* This module enables machines with Intel VT-x extensions to run virtual
|
||||
* machines without emulation or binary translation.
|
||||
* Kernel-based Virtual Machine (KVM) Hypervisor
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
@ -74,6 +71,7 @@
|
||||
#define ITOA_MAX_LEN 12
|
||||
|
||||
MODULE_AUTHOR("Qumranet");
|
||||
MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
/* Architectures should define their poll value according to the halt latency */
|
||||
@ -91,8 +89,8 @@ unsigned int halt_poll_ns_grow_start = 10000; /* 10us */
|
||||
module_param(halt_poll_ns_grow_start, uint, 0644);
|
||||
EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start);
|
||||
|
||||
/* Default resets per-vcpu halt_poll_ns . */
|
||||
unsigned int halt_poll_ns_shrink;
|
||||
/* Default halves per-vcpu halt_poll_ns. */
|
||||
unsigned int halt_poll_ns_shrink = 2;
|
||||
module_param(halt_poll_ns_shrink, uint, 0644);
|
||||
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
|
||||
|
||||
@ -110,8 +108,7 @@ static struct kmem_cache *kvm_vcpu_cache;
|
||||
static __read_mostly struct preempt_ops kvm_preempt_ops;
|
||||
static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu);
|
||||
|
||||
struct dentry *kvm_debugfs_dir;
|
||||
EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
|
||||
static struct dentry *kvm_debugfs_dir;
|
||||
|
||||
static const struct file_operations stat_fops_per_vm;
|
||||
|
||||
@ -1145,8 +1142,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
{
|
||||
struct kvm *kvm = kvm_arch_alloc_vm();
|
||||
struct kvm_memslots *slots;
|
||||
int r = -ENOMEM;
|
||||
int i, j;
|
||||
int r, i, j;
|
||||
|
||||
if (!kvm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -1183,12 +1179,18 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d",
|
||||
task_pid_nr(current));
|
||||
|
||||
r = -ENOMEM;
|
||||
if (init_srcu_struct(&kvm->srcu))
|
||||
goto out_err_no_srcu;
|
||||
if (init_srcu_struct(&kvm->irq_srcu))
|
||||
goto out_err_no_irq_srcu;
|
||||
|
||||
r = kvm_init_irq_routing(kvm);
|
||||
if (r)
|
||||
goto out_err_no_irq_routing;
|
||||
|
||||
refcount_set(&kvm->users_count, 1);
|
||||
|
||||
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
slots = &kvm->__memslots[i][j];
|
||||
@ -1206,6 +1208,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]);
|
||||
}
|
||||
|
||||
r = -ENOMEM;
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
rcu_assign_pointer(kvm->buses[i],
|
||||
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
|
||||
@ -1267,6 +1270,8 @@ out_err_no_arch_destroy_vm:
|
||||
WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(kvm_get_bus(kvm, i));
|
||||
kvm_free_irq_routing(kvm);
|
||||
out_err_no_irq_routing:
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
out_err_no_irq_srcu:
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
@ -4202,12 +4207,21 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* Creates some virtual cpus. Good luck creating more than one.
|
||||
*/
|
||||
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
||||
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
|
||||
{
|
||||
int r;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
* KVM tracks vCPU IDs as 'int', be kind to userspace and reject
|
||||
* too-large values instead of silently truncating.
|
||||
*
|
||||
* Ensure KVM_MAX_VCPU_IDS isn't pushed above INT_MAX without first
|
||||
* changing the storage type (at the very least, IDs should be tracked
|
||||
* as unsigned ints).
|
||||
*/
|
||||
BUILD_BUG_ON(KVM_MAX_VCPU_IDS > INT_MAX);
|
||||
if (id >= KVM_MAX_VCPU_IDS)
|
||||
return -EINVAL;
|
||||
|
||||
@ -4467,7 +4481,10 @@ static long kvm_vcpu_ioctl(struct file *filp,
|
||||
synchronize_rcu();
|
||||
put_pid(oldpid);
|
||||
}
|
||||
vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe);
|
||||
r = kvm_arch_vcpu_ioctl_run(vcpu);
|
||||
vcpu->wants_to_run = false;
|
||||
|
||||
trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
|
||||
break;
|
||||
}
|
||||
@ -6347,8 +6364,9 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
|
||||
WRITE_ONCE(vcpu->ready, false);
|
||||
|
||||
__this_cpu_write(kvm_running_vcpu, vcpu);
|
||||
kvm_arch_sched_in(vcpu, cpu);
|
||||
kvm_arch_vcpu_load(vcpu, cpu);
|
||||
|
||||
WRITE_ONCE(vcpu->scheduled_out, false);
|
||||
}
|
||||
|
||||
static void kvm_sched_out(struct preempt_notifier *pn,
|
||||
@ -6356,7 +6374,9 @@ static void kvm_sched_out(struct preempt_notifier *pn,
|
||||
{
|
||||
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
|
||||
|
||||
if (current->on_rq) {
|
||||
WRITE_ONCE(vcpu->scheduled_out, true);
|
||||
|
||||
if (current->on_rq && vcpu->wants_to_run) {
|
||||
WRITE_ONCE(vcpu->preempted, true);
|
||||
WRITE_ONCE(vcpu->ready, true);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user