svm: Implements update_pi_irte hook to setup posted interrupt
This patch implements update_pi_irte function hook to allow SVM communicate to IOMMU driver regarding how to set up IRTE for handling posted interrupt. In case AVIC is enabled, during vcpu_load/unload, SVM needs to update IOMMU IRTE with appropriate host physical APIC ID. Also, when vcpu_blocking/unblocking, SVM needs to update the is-running bit in the IOMMU IRTE. Both are achieved via calling amd_iommu_update_ga(). However, if GA mode is not enabled for the pass-through device, IOMMU driver will simply just return when calling amd_iommu_update_ga. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
5881f73757
commit
411b44ba80
@ -43,6 +43,7 @@
|
|||||||
#include <asm/desc.h>
|
#include <asm/desc.h>
|
||||||
#include <asm/debugreg.h>
|
#include <asm/debugreg.h>
|
||||||
#include <asm/kvm_para.h>
|
#include <asm/kvm_para.h>
|
||||||
|
#include <asm/irq_remapping.h>
|
||||||
|
|
||||||
#include <asm/virtext.h>
|
#include <asm/virtext.h>
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
@ -200,6 +201,23 @@ struct vcpu_svm {
|
|||||||
struct page *avic_backing_page;
|
struct page *avic_backing_page;
|
||||||
u64 *avic_physical_id_cache;
|
u64 *avic_physical_id_cache;
|
||||||
bool avic_is_running;
|
bool avic_is_running;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per-vcpu list of struct amd_svm_iommu_ir:
|
||||||
|
* This is used mainly to store interrupt remapping information used
|
||||||
|
* when update the vcpu affinity. This avoids the need to scan for
|
||||||
|
* IRTE and try to match ga_tag in the IOMMU driver.
|
||||||
|
*/
|
||||||
|
struct list_head ir_list;
|
||||||
|
spinlock_t ir_list_lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a wrapper of struct amd_iommu_ir_data.
|
||||||
|
*/
|
||||||
|
struct amd_svm_iommu_ir {
|
||||||
|
struct list_head node; /* Used by SVM for per-vcpu ir_list */
|
||||||
|
void *data; /* Storing pointer to struct amd_ir_data */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
|
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
|
||||||
@ -1440,31 +1458,34 @@ free_avic:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static inline int
|
||||||
* This function is called during VCPU halt/unhalt.
|
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||||
*/
|
|
||||||
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
|
|
||||||
{
|
{
|
||||||
u64 entry;
|
int ret = 0;
|
||||||
int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
|
unsigned long flags;
|
||||||
|
struct amd_svm_iommu_ir *ir;
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
|
||||||
if (!kvm_vcpu_apicv_active(vcpu))
|
if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
svm->avic_is_running = is_run;
|
/*
|
||||||
|
* Here, we go through the per-vcpu ir_list to update all existing
|
||||||
|
* interrupt remapping table entry targeting this vcpu.
|
||||||
|
*/
|
||||||
|
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||||
|
|
||||||
/* ID = 0xff (broadcast), ID > 0xff (reserved) */
|
if (list_empty(&svm->ir_list))
|
||||||
if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
|
goto out;
|
||||||
return;
|
|
||||||
|
|
||||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
list_for_each_entry(ir, &svm->ir_list, node) {
|
||||||
WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
|
ret = amd_iommu_update_ga(cpu, r, ir->data);
|
||||||
|
if (ret)
|
||||||
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
break;
|
||||||
if (is_run)
|
}
|
||||||
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
out:
|
||||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||||
@ -1491,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||||||
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||||
|
|
||||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||||
|
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
|
||||||
|
svm->avic_is_running);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||||
@ -1502,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
||||||
|
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
|
||||||
|
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
|
||||||
|
|
||||||
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function is called during VCPU halt/unhalt.
|
||||||
|
*/
|
||||||
|
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
|
||||||
|
{
|
||||||
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
|
||||||
|
svm->avic_is_running = is_run;
|
||||||
|
if (is_run)
|
||||||
|
avic_vcpu_load(vcpu, vcpu->cpu);
|
||||||
|
else
|
||||||
|
avic_vcpu_put(vcpu);
|
||||||
|
}
|
||||||
|
|
||||||
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
@ -1567,6 +1607,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||||||
err = avic_init_backing_page(&svm->vcpu);
|
err = avic_init_backing_page(&svm->vcpu);
|
||||||
if (err)
|
if (err)
|
||||||
goto free_page4;
|
goto free_page4;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&svm->ir_list);
|
||||||
|
spin_lock_init(&svm->ir_list_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We initialize this flag to true to make sure that the is_running
|
/* We initialize this flag to true to make sure that the is_running
|
||||||
@ -4363,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
|
|||||||
kvm_vcpu_wake_up(vcpu);
|
kvm_vcpu_wake_up(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
struct amd_svm_iommu_ir *cur;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||||
|
list_for_each_entry(cur, &svm->ir_list, node) {
|
||||||
|
if (cur->data != pi->ir_data)
|
||||||
|
continue;
|
||||||
|
list_del(&cur->node);
|
||||||
|
kfree(cur);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
unsigned long flags;
|
||||||
|
struct amd_svm_iommu_ir *ir;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In some cases, the existing irte is updaed and re-set,
|
||||||
|
* so we need to check here if it's already been * added
|
||||||
|
* to the ir_list.
|
||||||
|
*/
|
||||||
|
if (pi->ir_data && (pi->prev_ga_tag != 0)) {
|
||||||
|
struct kvm *kvm = svm->vcpu.kvm;
|
||||||
|
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
|
||||||
|
struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
|
||||||
|
struct vcpu_svm *prev_svm;
|
||||||
|
|
||||||
|
if (!prev_vcpu) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
prev_svm = to_svm(prev_vcpu);
|
||||||
|
svm_ir_list_del(prev_svm, pi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocating new amd_iommu_pi_data, which will get
|
||||||
|
* add to the per-vcpu ir_list.
|
||||||
|
*/
|
||||||
|
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
|
||||||
|
if (!ir) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ir->data = pi->ir_data;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||||
|
list_add(&ir->node, &svm->ir_list);
|
||||||
|
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note:
|
||||||
|
* The HW cannot support posting multicast/broadcast
|
||||||
|
* interrupts to a vCPU. So, we still use legacy interrupt
|
||||||
|
* remapping for these kind of interrupts.
|
||||||
|
*
|
||||||
|
* For lowest-priority interrupts, we only support
|
||||||
|
* those with single CPU as the destination, e.g. user
|
||||||
|
* configures the interrupts via /proc/irq or uses
|
||||||
|
* irqbalance to make the interrupts single-CPU.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
||||||
|
struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
|
||||||
|
{
|
||||||
|
struct kvm_lapic_irq irq;
|
||||||
|
struct kvm_vcpu *vcpu = NULL;
|
||||||
|
|
||||||
|
kvm_set_msi_irq(kvm, e, &irq);
|
||||||
|
|
||||||
|
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
|
||||||
|
pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
|
||||||
|
__func__, irq.vector);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
|
||||||
|
irq.vector);
|
||||||
|
*svm = to_svm(vcpu);
|
||||||
|
vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
|
||||||
|
vcpu_info->vector = irq.vector;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* svm_update_pi_irte - set IRTE for Posted-Interrupts
|
||||||
|
*
|
||||||
|
* @kvm: kvm
|
||||||
|
* @host_irq: host irq of the interrupt
|
||||||
|
* @guest_irq: gsi of the interrupt
|
||||||
|
* @set: set or unset PI
|
||||||
|
* returns 0 on success, < 0 on failure
|
||||||
|
*/
|
||||||
|
static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
|
||||||
|
uint32_t guest_irq, bool set)
|
||||||
|
{
|
||||||
|
struct kvm_kernel_irq_routing_entry *e;
|
||||||
|
struct kvm_irq_routing_table *irq_rt;
|
||||||
|
int idx, ret = -EINVAL;
|
||||||
|
|
||||||
|
if (!kvm_arch_has_assigned_device(kvm) ||
|
||||||
|
!irq_remapping_cap(IRQ_POSTING_CAP))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
|
||||||
|
__func__, host_irq, guest_irq, set);
|
||||||
|
|
||||||
|
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||||
|
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||||
|
WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
|
||||||
|
|
||||||
|
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
|
||||||
|
struct vcpu_data vcpu_info;
|
||||||
|
struct vcpu_svm *svm = NULL;
|
||||||
|
|
||||||
|
if (e->type != KVM_IRQ_ROUTING_MSI)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Here, we setup with legacy mode in the following cases:
|
||||||
|
* 1. When cannot target interrupt to a specific vcpu.
|
||||||
|
* 2. Unsetting posted interrupt.
|
||||||
|
* 3. APIC virtialization is disabled for the vcpu.
|
||||||
|
*/
|
||||||
|
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
|
||||||
|
kvm_vcpu_apicv_active(&svm->vcpu)) {
|
||||||
|
struct amd_iommu_pi_data pi;
|
||||||
|
|
||||||
|
/* Try to enable guest_mode in IRTE */
|
||||||
|
pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
|
||||||
|
pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
|
||||||
|
svm->vcpu.vcpu_id);
|
||||||
|
pi.is_guest_mode = true;
|
||||||
|
pi.vcpu_data = &vcpu_info;
|
||||||
|
ret = irq_set_vcpu_affinity(host_irq, &pi);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Here, we successfully setting up vcpu affinity in
|
||||||
|
* IOMMU guest mode. Now, we need to store the posted
|
||||||
|
* interrupt information in a per-vcpu ir_list so that
|
||||||
|
* we can reference to them directly when we update vcpu
|
||||||
|
* scheduling information in IOMMU irte.
|
||||||
|
*/
|
||||||
|
if (!ret && pi.is_guest_mode)
|
||||||
|
svm_ir_list_add(svm, &pi);
|
||||||
|
} else {
|
||||||
|
/* Use legacy mode in IRTE */
|
||||||
|
struct amd_iommu_pi_data pi;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Here, pi is used to:
|
||||||
|
* - Tell IOMMU to use legacy mode for this interrupt.
|
||||||
|
* - Retrieve ga_tag of prior interrupt remapping data.
|
||||||
|
*/
|
||||||
|
pi.is_guest_mode = false;
|
||||||
|
ret = irq_set_vcpu_affinity(host_irq, &pi);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the posted interrupt was previously
|
||||||
|
* setup with the guest_mode by checking if the ga_tag
|
||||||
|
* was cached. If so, we need to clean up the per-vcpu
|
||||||
|
* ir_list.
|
||||||
|
*/
|
||||||
|
if (!ret && pi.prev_ga_tag) {
|
||||||
|
int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
|
||||||
|
struct kvm_vcpu *vcpu;
|
||||||
|
|
||||||
|
vcpu = kvm_get_vcpu_by_id(kvm, id);
|
||||||
|
if (vcpu)
|
||||||
|
svm_ir_list_del(to_svm(vcpu), &pi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ret && svm) {
|
||||||
|
trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
|
||||||
|
host_irq, e->gsi,
|
||||||
|
vcpu_info.vector,
|
||||||
|
vcpu_info.pi_desc_addr, set);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_err("%s: failed to update PI IRTE\n", __func__);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
out:
|
||||||
|
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
@ -5195,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
|||||||
|
|
||||||
.pmu_ops = &amd_pmu_ops,
|
.pmu_ops = &amd_pmu_ops,
|
||||||
.deliver_posted_interrupt = svm_deliver_avic_intr,
|
.deliver_posted_interrupt = svm_deliver_avic_intr,
|
||||||
|
.update_pi_irte = svm_update_pi_irte,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __init svm_init(void)
|
static int __init svm_init(void)
|
||||||
|
Loading…
Reference in New Issue
Block a user