svm: Implements update_pi_irte hook to setup posted interrupt

This patch implements update_pi_irte function hook to allow SVM
communicate to IOMMU driver regarding how to set up IRTE for handling
posted interrupt.

In case AVIC is enabled, during vcpu_load/unload, SVM needs to update
IOMMU IRTE with appropriate host physical APIC ID. Also, when
vcpu_blocking/unblocking, SVM needs to update the is-running bit in
the IOMMU IRTE. Both are achieved via calling amd_iommu_update_ga().

However, if GA mode is not enabled for the pass-through device,
IOMMU driver will simply just return when calling amd_iommu_update_ga.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Suravee Suthikulpanit 2016-08-23 13:52:43 -05:00 committed by Paolo Bonzini
parent 5881f73757
commit 411b44ba80

View File

@ -43,6 +43,7 @@
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/kvm_para.h> #include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/virtext.h> #include <asm/virtext.h>
#include "trace.h" #include "trace.h"
@ -200,6 +201,23 @@ struct vcpu_svm {
struct page *avic_backing_page; struct page *avic_backing_page;
u64 *avic_physical_id_cache; u64 *avic_physical_id_cache;
bool avic_is_running; bool avic_is_running;
/*
* Per-vcpu list of struct amd_svm_iommu_ir:
* This is used mainly to store interrupt remapping information used
* when update the vcpu affinity. This avoids the need to scan for
* IRTE and try to match ga_tag in the IOMMU driver.
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
};
/*
* This is a wrapper of struct amd_iommu_ir_data.
*/
struct amd_svm_iommu_ir {
struct list_head node; /* Used by SVM for per-vcpu ir_list */
void *data; /* Storing pointer to struct amd_ir_data */
}; };
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
@ -1440,31 +1458,34 @@ free_avic:
return err; return err;
} }
/** static inline int
* This function is called during VCPU halt/unhalt. avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
*/
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{ {
u64 entry; int ret = 0;
int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu); unsigned long flags;
struct amd_svm_iommu_ir *ir;
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
if (!kvm_vcpu_apicv_active(vcpu)) if (!kvm_arch_has_assigned_device(vcpu->kvm))
return; return 0;
svm->avic_is_running = is_run; /*
* Here, we go through the per-vcpu ir_list to update all existing
* interrupt remapping table entry targeting this vcpu.
*/
spin_lock_irqsave(&svm->ir_list_lock, flags);
/* ID = 0xff (broadcast), ID > 0xff (reserved) */ if (list_empty(&svm->ir_list))
if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) goto out;
return;
entry = READ_ONCE(*(svm->avic_physical_id_cache)); list_for_each_entry(ir, &svm->ir_list, node) {
WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)); ret = amd_iommu_update_ga(cpu, r, ir->data);
if (ret)
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; break;
if (is_run) }
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; out:
WRITE_ONCE(*(svm->avic_physical_id_cache), entry); spin_unlock_irqrestore(&svm->ir_list_lock, flags);
return ret;
} }
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@ -1491,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry); WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
svm->avic_is_running);
} }
static void avic_vcpu_put(struct kvm_vcpu *vcpu) static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@ -1502,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
return; return;
entry = READ_ONCE(*(svm->avic_physical_id_cache)); entry = READ_ONCE(*(svm->avic_physical_id_cache));
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry); WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
} }
/**
* This function is called during VCPU halt/unhalt.
*/
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->avic_is_running = is_run;
if (is_run)
avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
}
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
@ -1567,6 +1607,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
err = avic_init_backing_page(&svm->vcpu); err = avic_init_backing_page(&svm->vcpu);
if (err) if (err)
goto free_page4; goto free_page4;
INIT_LIST_HEAD(&svm->ir_list);
spin_lock_init(&svm->ir_list_lock);
} }
/* We initialize this flag to true to make sure that the is_running /* We initialize this flag to true to make sure that the is_running
@ -4363,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_vcpu_wake_up(vcpu); kvm_vcpu_wake_up(vcpu);
} }
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
unsigned long flags;
struct amd_svm_iommu_ir *cur;
spin_lock_irqsave(&svm->ir_list_lock, flags);
list_for_each_entry(cur, &svm->ir_list, node) {
if (cur->data != pi->ir_data)
continue;
list_del(&cur->node);
kfree(cur);
break;
}
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
int ret = 0;
unsigned long flags;
struct amd_svm_iommu_ir *ir;
/**
* In some cases, the existing irte is updaed and re-set,
* so we need to check here if it's already been * added
* to the ir_list.
*/
if (pi->ir_data && (pi->prev_ga_tag != 0)) {
struct kvm *kvm = svm->vcpu.kvm;
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
struct vcpu_svm *prev_svm;
if (!prev_vcpu) {
ret = -EINVAL;
goto out;
}
prev_svm = to_svm(prev_vcpu);
svm_ir_list_del(prev_svm, pi);
}
/**
* Allocating new amd_iommu_pi_data, which will get
* add to the per-vcpu ir_list.
*/
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
if (!ir) {
ret = -ENOMEM;
goto out;
}
ir->data = pi->ir_data;
spin_lock_irqsave(&svm->ir_list_lock, flags);
list_add(&ir->node, &svm->ir_list);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
out:
return ret;
}
/**
* Note:
* The HW cannot support posting multicast/broadcast
* interrupts to a vCPU. So, we still use legacy interrupt
* remapping for these kind of interrupts.
*
* For lowest-priority interrupts, we only support
* those with single CPU as the destination, e.g. user
* configures the interrupts via /proc/irq or uses
* irqbalance to make the interrupts single-CPU.
*/
static int
get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
{
struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu = NULL;
kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
__func__, irq.vector);
return -1;
}
pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
irq.vector);
*svm = to_svm(vcpu);
vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
vcpu_info->vector = irq.vector;
return 0;
}
/*
* svm_update_pi_irte - set IRTE for Posted-Interrupts
*
* @kvm: kvm
* @host_irq: host irq of the interrupt
* @guest_irq: gsi of the interrupt
* @set: set or unset PI
* returns 0 on success, < 0 on failure
*/
static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set)
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_routing_table *irq_rt;
int idx, ret = -EINVAL;
if (!kvm_arch_has_assigned_device(kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP))
return 0;
pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
__func__, host_irq, guest_irq, set);
idx = srcu_read_lock(&kvm->irq_srcu);
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
struct vcpu_data vcpu_info;
struct vcpu_svm *svm = NULL;
if (e->type != KVM_IRQ_ROUTING_MSI)
continue;
/**
* Here, we setup with legacy mode in the following cases:
* 1. When cannot target interrupt to a specific vcpu.
* 2. Unsetting posted interrupt.
* 3. APIC virtialization is disabled for the vcpu.
*/
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
kvm_vcpu_apicv_active(&svm->vcpu)) {
struct amd_iommu_pi_data pi;
/* Try to enable guest_mode in IRTE */
pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
svm->vcpu.vcpu_id);
pi.is_guest_mode = true;
pi.vcpu_data = &vcpu_info;
ret = irq_set_vcpu_affinity(host_irq, &pi);
/**
* Here, we successfully setting up vcpu affinity in
* IOMMU guest mode. Now, we need to store the posted
* interrupt information in a per-vcpu ir_list so that
* we can reference to them directly when we update vcpu
* scheduling information in IOMMU irte.
*/
if (!ret && pi.is_guest_mode)
svm_ir_list_add(svm, &pi);
} else {
/* Use legacy mode in IRTE */
struct amd_iommu_pi_data pi;
/**
* Here, pi is used to:
* - Tell IOMMU to use legacy mode for this interrupt.
* - Retrieve ga_tag of prior interrupt remapping data.
*/
pi.is_guest_mode = false;
ret = irq_set_vcpu_affinity(host_irq, &pi);
/**
* Check if the posted interrupt was previously
* setup with the guest_mode by checking if the ga_tag
* was cached. If so, we need to clean up the per-vcpu
* ir_list.
*/
if (!ret && pi.prev_ga_tag) {
int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
struct kvm_vcpu *vcpu;
vcpu = kvm_get_vcpu_by_id(kvm, id);
if (vcpu)
svm_ir_list_del(to_svm(vcpu), &pi);
}
}
if (!ret && svm) {
trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
host_irq, e->gsi,
vcpu_info.vector,
vcpu_info.pi_desc_addr, set);
}
if (ret < 0) {
pr_err("%s: failed to update PI IRTE\n", __func__);
goto out;
}
}
ret = 0;
out:
srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
}
static int svm_nmi_allowed(struct kvm_vcpu *vcpu) static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
@ -5195,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.pmu_ops = &amd_pmu_ops, .pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr, .deliver_posted_interrupt = svm_deliver_avic_intr,
.update_pi_irte = svm_update_pi_irte,
}; };
static int __init svm_init(void) static int __init svm_init(void)