KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu <feng.wu@intel.com> [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
28b835d60f
commit
bf9f6ac8d7
@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g.
|
|||||||
MMIO/PIO address->device structure mapping (kvm->buses).
|
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||||
if it is needed by multiple functions.
|
if it is needed by multiple functions.
|
||||||
|
|
||||||
|
Name: blocked_vcpu_on_cpu_lock
|
||||||
|
Type: spinlock_t
|
||||||
|
Arch: x86
|
||||||
|
Protects: blocked_vcpu_on_cpu
|
||||||
|
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||||
|
When VT-d posted-interrupts is supported and the VM has assigned
|
||||||
|
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||||
|
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||||
|
wakeup notification event since external interrupts from the
|
||||||
|
assigned devices happens, we will find the vCPU on the list to
|
||||||
|
wakeup.
|
||||||
|
@ -899,6 +899,17 @@ struct kvm_x86_ops {
|
|||||||
/* pmu operations of sub-arch */
|
/* pmu operations of sub-arch */
|
||||||
const struct kvm_pmu_ops *pmu_ops;
|
const struct kvm_pmu_ops *pmu_ops;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Architecture specific hooks for vCPU blocking due to
|
||||||
|
* HLT instruction.
|
||||||
|
* Returns for .pre_block():
|
||||||
|
* - 0 means continue to block the vCPU.
|
||||||
|
* - 1 means we cannot block the vCPU since some event
|
||||||
|
* happens during this period, such as, 'ON' bit in
|
||||||
|
* posted-interrupts descriptor is set.
|
||||||
|
*/
|
||||||
|
int (*pre_block)(struct kvm_vcpu *vcpu);
|
||||||
|
void (*post_block)(struct kvm_vcpu *vcpu);
|
||||||
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
|
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
|
||||||
uint32_t guest_irq, bool set);
|
uint32_t guest_irq, bool set);
|
||||||
};
|
};
|
||||||
|
@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
|||||||
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
|
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
|
||||||
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
|
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
|
||||||
|
* can find which vCPU should be waken up.
|
||||||
|
*/
|
||||||
|
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
|
||||||
|
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
|
||||||
|
|
||||||
static unsigned long *vmx_io_bitmap_a;
|
static unsigned long *vmx_io_bitmap_a;
|
||||||
static unsigned long *vmx_io_bitmap_b;
|
static unsigned long *vmx_io_bitmap_b;
|
||||||
static unsigned long *vmx_msr_bitmap_legacy;
|
static unsigned long *vmx_msr_bitmap_legacy;
|
||||||
@ -2986,6 +2993,8 @@ static int hardware_enable(void)
|
|||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
|
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
|
||||||
|
INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
|
||||||
|
spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we can enable the vmclear operation in kdump
|
* Now we can enable the vmclear operation in kdump
|
||||||
@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void)
|
|||||||
ple_window_grow, INT_MIN);
|
ple_window_grow, INT_MIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
|
||||||
|
*/
|
||||||
|
static void wakeup_handler(void)
|
||||||
|
{
|
||||||
|
struct kvm_vcpu *vcpu;
|
||||||
|
int cpu = smp_processor_id();
|
||||||
|
|
||||||
|
spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
|
||||||
|
list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
|
||||||
|
blocked_vcpu_list) {
|
||||||
|
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
||||||
|
|
||||||
|
if (pi_test_on(pi_desc) == 1)
|
||||||
|
kvm_vcpu_kick(vcpu);
|
||||||
|
}
|
||||||
|
spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
|
||||||
|
}
|
||||||
|
|
||||||
static __init int hardware_setup(void)
|
static __init int hardware_setup(void)
|
||||||
{
|
{
|
||||||
int r = -ENOMEM, i, msr;
|
int r = -ENOMEM, i, msr;
|
||||||
@ -6231,6 +6259,8 @@ static __init int hardware_setup(void)
|
|||||||
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
|
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
|
||||||
|
|
||||||
return alloc_kvm_area();
|
return alloc_kvm_area();
|
||||||
|
|
||||||
out8:
|
out8:
|
||||||
@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
|
|||||||
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
|
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This routine does the following things for vCPU which is going
|
||||||
|
* to be blocked if VT-d PI is enabled.
|
||||||
|
* - Store the vCPU to the wakeup list, so when interrupts happen
|
||||||
|
* we can find the right vCPU to wake up.
|
||||||
|
* - Change the Posted-interrupt descriptor as below:
|
||||||
|
* 'NDST' <-- vcpu->pre_pcpu
|
||||||
|
* 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
|
||||||
|
* - If 'ON' is set during this process, which means at least one
|
||||||
|
* interrupt is posted for this vCPU, we cannot block it, in
|
||||||
|
* this case, return 1, otherwise, return 0.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int vmx_pre_block(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned int dest;
|
||||||
|
struct pi_desc old, new;
|
||||||
|
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
||||||
|
|
||||||
|
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
|
||||||
|
!irq_remapping_cap(IRQ_POSTING_CAP))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
vcpu->pre_pcpu = vcpu->cpu;
|
||||||
|
spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
|
||||||
|
vcpu->pre_pcpu), flags);
|
||||||
|
list_add_tail(&vcpu->blocked_vcpu_list,
|
||||||
|
&per_cpu(blocked_vcpu_on_cpu,
|
||||||
|
vcpu->pre_pcpu));
|
||||||
|
spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
|
||||||
|
vcpu->pre_pcpu), flags);
|
||||||
|
|
||||||
|
do {
|
||||||
|
old.control = new.control = pi_desc->control;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We should not block the vCPU if
|
||||||
|
* an interrupt is posted for it.
|
||||||
|
*/
|
||||||
|
if (pi_test_on(pi_desc) == 1) {
|
||||||
|
spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
|
||||||
|
vcpu->pre_pcpu), flags);
|
||||||
|
list_del(&vcpu->blocked_vcpu_list);
|
||||||
|
spin_unlock_irqrestore(
|
||||||
|
&per_cpu(blocked_vcpu_on_cpu_lock,
|
||||||
|
vcpu->pre_pcpu), flags);
|
||||||
|
vcpu->pre_pcpu = -1;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
WARN((pi_desc->sn == 1),
|
||||||
|
"Warning: SN field of posted-interrupts "
|
||||||
|
"is set before blocking\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since vCPU can be preempted during this process,
|
||||||
|
* vcpu->cpu could be different with pre_pcpu, we
|
||||||
|
* need to set pre_pcpu as the destination of wakeup
|
||||||
|
* notification event, then we can find the right vCPU
|
||||||
|
* to wakeup in wakeup handler if interrupts happen
|
||||||
|
* when the vCPU is in blocked state.
|
||||||
|
*/
|
||||||
|
dest = cpu_physical_id(vcpu->pre_pcpu);
|
||||||
|
|
||||||
|
if (x2apic_enabled())
|
||||||
|
new.ndst = dest;
|
||||||
|
else
|
||||||
|
new.ndst = (dest << 8) & 0xFF00;
|
||||||
|
|
||||||
|
/* set 'NV' to 'wakeup vector' */
|
||||||
|
new.nv = POSTED_INTR_WAKEUP_VECTOR;
|
||||||
|
} while (cmpxchg(&pi_desc->control, old.control,
|
||||||
|
new.control) != old.control);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vmx_post_block(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
||||||
|
struct pi_desc old, new;
|
||||||
|
unsigned int dest;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
|
||||||
|
!irq_remapping_cap(IRQ_POSTING_CAP))
|
||||||
|
return;
|
||||||
|
|
||||||
|
do {
|
||||||
|
old.control = new.control = pi_desc->control;
|
||||||
|
|
||||||
|
dest = cpu_physical_id(vcpu->cpu);
|
||||||
|
|
||||||
|
if (x2apic_enabled())
|
||||||
|
new.ndst = dest;
|
||||||
|
else
|
||||||
|
new.ndst = (dest << 8) & 0xFF00;
|
||||||
|
|
||||||
|
/* Allow posting non-urgent interrupts */
|
||||||
|
new.sn = 0;
|
||||||
|
|
||||||
|
/* set 'NV' to 'notification vector' */
|
||||||
|
new.nv = POSTED_INTR_VECTOR;
|
||||||
|
} while (cmpxchg(&pi_desc->control, old.control,
|
||||||
|
new.control) != old.control);
|
||||||
|
|
||||||
|
if(vcpu->pre_pcpu != -1) {
|
||||||
|
spin_lock_irqsave(
|
||||||
|
&per_cpu(blocked_vcpu_on_cpu_lock,
|
||||||
|
vcpu->pre_pcpu), flags);
|
||||||
|
list_del(&vcpu->blocked_vcpu_list);
|
||||||
|
spin_unlock_irqrestore(
|
||||||
|
&per_cpu(blocked_vcpu_on_cpu_lock,
|
||||||
|
vcpu->pre_pcpu), flags);
|
||||||
|
vcpu->pre_pcpu = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vmx_update_pi_irte - set IRTE for Posted-Interrupts
|
* vmx_update_pi_irte - set IRTE for Posted-Interrupts
|
||||||
*
|
*
|
||||||
@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||||||
.flush_log_dirty = vmx_flush_log_dirty,
|
.flush_log_dirty = vmx_flush_log_dirty,
|
||||||
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
|
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
|
||||||
|
|
||||||
|
.pre_block = vmx_pre_block,
|
||||||
|
.post_block = vmx_post_block,
|
||||||
|
|
||||||
.pmu_ops = &intel_pmu_ops,
|
.pmu_ops = &intel_pmu_ops,
|
||||||
|
|
||||||
.update_pi_irte = vmx_update_pi_irte,
|
.update_pi_irte = vmx_update_pi_irte,
|
||||||
|
@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KVM_REQ_EVENT is not set when posted interrupts are set by
|
||||||
|
* VT-d hardware, so we have to update RVI unconditionally.
|
||||||
|
*/
|
||||||
|
if (kvm_lapic_enabled(vcpu)) {
|
||||||
|
/*
|
||||||
|
* Update architecture specific hints for APIC
|
||||||
|
* virtual interrupt delivery.
|
||||||
|
*/
|
||||||
|
if (kvm_x86_ops->hwapic_irr_update)
|
||||||
|
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||||
|
kvm_lapic_find_highest_irr(vcpu));
|
||||||
|
}
|
||||||
|
|
||||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||||
kvm_apic_accept_events(vcpu);
|
kvm_apic_accept_events(vcpu);
|
||||||
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
|
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
|
||||||
@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||||||
kvm_x86_ops->enable_irq_window(vcpu);
|
kvm_x86_ops->enable_irq_window(vcpu);
|
||||||
|
|
||||||
if (kvm_lapic_enabled(vcpu)) {
|
if (kvm_lapic_enabled(vcpu)) {
|
||||||
/*
|
|
||||||
* Update architecture specific hints for APIC
|
|
||||||
* virtual interrupt delivery.
|
|
||||||
*/
|
|
||||||
if (kvm_x86_ops->hwapic_irr_update)
|
|
||||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
|
||||||
kvm_lapic_find_highest_irr(vcpu));
|
|
||||||
update_cr8_intercept(vcpu);
|
update_cr8_intercept(vcpu);
|
||||||
kvm_lapic_sync_to_vapic(vcpu);
|
kvm_lapic_sync_to_vapic(vcpu);
|
||||||
}
|
}
|
||||||
@ -6493,10 +6500,15 @@ out:
|
|||||||
|
|
||||||
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
if (!kvm_arch_vcpu_runnable(vcpu) &&
|
||||||
|
(!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
|
||||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||||
kvm_vcpu_block(vcpu);
|
kvm_vcpu_block(vcpu);
|
||||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||||
|
|
||||||
|
if (kvm_x86_ops->post_block)
|
||||||
|
kvm_x86_ops->post_block(vcpu);
|
||||||
|
|
||||||
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
|
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
||||||
!vcpu->arch.apf.halted)
|
!vcpu->arch.apf.halted) {
|
||||||
r = vcpu_enter_guest(vcpu);
|
r = vcpu_enter_guest(vcpu);
|
||||||
else
|
} else {
|
||||||
r = vcpu_block(kvm, vcpu);
|
r = vcpu_block(kvm, vcpu);
|
||||||
|
}
|
||||||
|
|
||||||
if (r <= 0)
|
if (r <= 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -234,6 +234,9 @@ struct kvm_vcpu {
|
|||||||
unsigned long requests;
|
unsigned long requests;
|
||||||
unsigned long guest_debug;
|
unsigned long guest_debug;
|
||||||
|
|
||||||
|
int pre_pcpu;
|
||||||
|
struct list_head blocked_vcpu_list;
|
||||||
|
|
||||||
struct mutex mutex;
|
struct mutex mutex;
|
||||||
struct kvm_run *run;
|
struct kvm_run *run;
|
||||||
|
|
||||||
|
@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||||||
init_waitqueue_head(&vcpu->wq);
|
init_waitqueue_head(&vcpu->wq);
|
||||||
kvm_async_pf_vcpu_init(vcpu);
|
kvm_async_pf_vcpu_init(vcpu);
|
||||||
|
|
||||||
|
vcpu->pre_pcpu = -1;
|
||||||
|
INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
|
||||||
|
|
||||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
|
Loading…
Reference in New Issue
Block a user