Merge commit 'origin/master' into next

Conflicts:
	include/linux/kvm.h
This commit is contained in:
Benjamin Herrenschmidt
2009-12-09 17:14:38 +11:00
3651 changed files with 249180 additions and 115589 deletions

818
virt/kvm/assigned-dev.c Normal file
View File

@@ -0,0 +1,818 @@
/*
* Kernel-based Virtual Machine - device assignment support
*
* Copyright (C) 2006-9 Red Hat, Inc
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include "irq.h"
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
struct list_head *ptr;
struct kvm_assigned_dev_kernel *match;
list_for_each(ptr, head) {
match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
if (match->assigned_dev_id == assigned_dev_id)
return match;
}
return NULL;
}
static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
*assigned_dev, int irq)
{
int i, index;
struct msix_entry *host_msix_entries;
host_msix_entries = assigned_dev->host_msix_entries;
index = -1;
for (i = 0; i < assigned_dev->entries_nr; i++)
if (irq == host_msix_entries[i].vector) {
index = i;
break;
}
if (index < 0) {
printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
return 0;
}
return index;
}
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
struct kvm_assigned_dev_kernel *assigned_dev;
struct kvm *kvm;
int i;
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work);
kvm = assigned_dev->kvm;
spin_lock_irq(&assigned_dev->assigned_dev_lock);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
struct kvm_guest_msix_entry *guest_entries =
assigned_dev->guest_msix_entries;
for (i = 0; i < assigned_dev->entries_nr; i++) {
if (!(guest_entries[i].flags &
KVM_ASSIGNED_MSIX_PENDING))
continue;
guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
kvm_set_irq(assigned_dev->kvm,
assigned_dev->irq_source_id,
guest_entries[i].vector, 1);
}
} else
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
assigned_dev->guest_irq, 1);
spin_unlock_irq(&assigned_dev->assigned_dev_lock);
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{
unsigned long flags;
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
int index = find_index_from_host_irq(assigned_dev, irq);
if (index < 0)
goto out;
assigned_dev->guest_msix_entries[index].flags |=
KVM_ASSIGNED_MSIX_PENDING;
}
schedule_work(&assigned_dev->interrupt_work);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
disable_irq_nosync(irq);
assigned_dev->host_irq_disabled = true;
}
out:
spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
return IRQ_HANDLED;
}
/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_assigned_dev_kernel *dev;
unsigned long flags;
if (kian->gsi == -1)
return;
dev = container_of(kian, struct kvm_assigned_dev_kernel,
ack_notifier);
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
/* The guest irq may be shared so this ack may be
* from another device.
*/
spin_lock_irqsave(&dev->assigned_dev_lock, flags);
if (dev->host_irq_disabled) {
enable_irq(dev->host_irq);
dev->host_irq_disabled = false;
}
spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
}
static void deassign_guest_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
assigned_dev->ack_notifier.gsi = -1;
if (assigned_dev->irq_source_id != -1)
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
assigned_dev->irq_source_id = -1;
assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
}
/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
static void deassign_host_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
/*
* In kvm_free_device_irq, cancel_work_sync return true if:
* 1. work is scheduled, and then cancelled.
* 2. work callback is executed.
*
* The first one ensured that the irq is disabled and no more events
* would happen. But for the second one, the irq may be enabled (e.g.
* for MSI). So we disable irq here to prevent further events.
*
* Notice this maybe result in nested disable if the interrupt type is
* INTx, but it's OK for we are going to free it.
*
* If this function is a part of VM destroy, please ensure that till
* now, the kvm state is still legal for probably we also have to wait
* interrupt_work done.
*/
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
int i;
for (i = 0; i < assigned_dev->entries_nr; i++)
disable_irq_nosync(assigned_dev->
host_msix_entries[i].vector);
cancel_work_sync(&assigned_dev->interrupt_work);
for (i = 0; i < assigned_dev->entries_nr; i++)
free_irq(assigned_dev->host_msix_entries[i].vector,
(void *)assigned_dev);
assigned_dev->entries_nr = 0;
kfree(assigned_dev->host_msix_entries);
kfree(assigned_dev->guest_msix_entries);
pci_disable_msix(assigned_dev->dev);
} else {
/* Deal with MSI and INTx */
disable_irq_nosync(assigned_dev->host_irq);
cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
pci_disable_msi(assigned_dev->dev);
}
assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
}
static int kvm_deassign_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev,
unsigned long irq_requested_type)
{
unsigned long guest_irq_type, host_irq_type;
if (!irqchip_in_kernel(kvm))
return -EINVAL;
/* no irq assignment to deassign */
if (!assigned_dev->irq_requested_type)
return -ENXIO;
host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
if (host_irq_type)
deassign_host_irq(kvm, assigned_dev);
if (guest_irq_type)
deassign_guest_irq(kvm, assigned_dev);
return 0;
}
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
}
static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
{
kvm_free_assigned_irq(kvm, assigned_dev);
pci_reset_function(assigned_dev->dev);
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
list_del(&assigned_dev->list);
kfree(assigned_dev);
}
void kvm_free_all_assigned_devices(struct kvm *kvm)
{
struct list_head *ptr, *ptr2;
struct kvm_assigned_dev_kernel *assigned_dev;
list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
assigned_dev = list_entry(ptr,
struct kvm_assigned_dev_kernel,
list);
kvm_free_assigned_device(kvm, assigned_dev);
}
}
static int assigned_device_enable_host_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
dev->host_irq = dev->dev->irq;
/* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
* on the same interrupt line is not a happy situation: there
* are going to be long delays in accepting, acking, etc.
*/
if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
0, "kvm_assigned_intx_device", (void *)dev))
return -EIO;
return 0;
}
#ifdef __KVM_HAVE_MSI
static int assigned_device_enable_host_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
int r;
if (!dev->dev->msi_enabled) {
r = pci_enable_msi(dev->dev);
if (r)
return r;
}
dev->host_irq = dev->dev->irq;
if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_msi_device", (void *)dev)) {
pci_disable_msi(dev->dev);
return -EIO;
}
return 0;
}
#endif
#ifdef __KVM_HAVE_MSIX
static int assigned_device_enable_host_msix(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
int i, r = -EINVAL;
/* host_msix_entries and guest_msix_entries should have been
* initialized */
if (dev->entries_nr == 0)
return r;
r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
if (r)
return r;
for (i = 0; i < dev->entries_nr; i++) {
r = request_irq(dev->host_msix_entries[i].vector,
kvm_assigned_dev_intr, 0,
"kvm_assigned_msix_device",
(void *)dev);
/* FIXME: free requested_irq's on failure */
if (r)
return r;
}
return 0;
}
#endif
static int assigned_device_enable_guest_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = irq->guest_irq;
return 0;
}
#ifdef __KVM_HAVE_MSI
static int assigned_device_enable_guest_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = -1;
dev->host_irq_disabled = false;
return 0;
}
#endif
#ifdef __KVM_HAVE_MSIX
static int assigned_device_enable_guest_msix(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = -1;
dev->host_irq_disabled = false;
return 0;
}
#endif
static int assign_host_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
__u32 host_irq_type)
{
int r = -EEXIST;
if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
return r;
switch (host_irq_type) {
case KVM_DEV_IRQ_HOST_INTX:
r = assigned_device_enable_host_intx(kvm, dev);
break;
#ifdef __KVM_HAVE_MSI
case KVM_DEV_IRQ_HOST_MSI:
r = assigned_device_enable_host_msi(kvm, dev);
break;
#endif
#ifdef __KVM_HAVE_MSIX
case KVM_DEV_IRQ_HOST_MSIX:
r = assigned_device_enable_host_msix(kvm, dev);
break;
#endif
default:
r = -EINVAL;
}
if (!r)
dev->irq_requested_type |= host_irq_type;
return r;
}
static int assign_guest_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq,
unsigned long guest_irq_type)
{
int id;
int r = -EEXIST;
if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
return r;
id = kvm_request_irq_source_id(kvm);
if (id < 0)
return id;
dev->irq_source_id = id;
switch (guest_irq_type) {
case KVM_DEV_IRQ_GUEST_INTX:
r = assigned_device_enable_guest_intx(kvm, dev, irq);
break;
#ifdef __KVM_HAVE_MSI
case KVM_DEV_IRQ_GUEST_MSI:
r = assigned_device_enable_guest_msi(kvm, dev, irq);
break;
#endif
#ifdef __KVM_HAVE_MSIX
case KVM_DEV_IRQ_GUEST_MSIX:
r = assigned_device_enable_guest_msix(kvm, dev, irq);
break;
#endif
default:
r = -EINVAL;
}
if (!r) {
dev->irq_requested_type |= guest_irq_type;
kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
} else
kvm_free_irq_source_id(kvm, dev->irq_source_id);
return r;
}
/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq *assigned_irq)
{
int r = -EINVAL;
struct kvm_assigned_dev_kernel *match;
unsigned long host_irq_type, guest_irq_type;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (!irqchip_in_kernel(kvm))
return r;
mutex_lock(&kvm->lock);
r = -ENODEV;
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_irq->assigned_dev_id);
if (!match)
goto out;
host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
r = -EINVAL;
/* can only assign one type at a time */
if (hweight_long(host_irq_type) > 1)
goto out;
if (hweight_long(guest_irq_type) > 1)
goto out;
if (host_irq_type == 0 && guest_irq_type == 0)
goto out;
r = 0;
if (host_irq_type)
r = assign_host_irq(kvm, match, host_irq_type);
if (r)
goto out;
if (guest_irq_type)
r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
out:
mutex_unlock(&kvm->lock);
return r;
}
static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
{
int r = -ENODEV;
struct kvm_assigned_dev_kernel *match;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_irq->assigned_dev_id);
if (!match)
goto out;
r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
out:
mutex_unlock(&kvm->lock);
return r;
}
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
down_read(&kvm->slots_lock);
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
if (match) {
/* device already assigned */
r = -EEXIST;
goto out;
}
match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
if (match == NULL) {
printk(KERN_INFO "%s: Couldn't allocate memory\n",
__func__);
r = -ENOMEM;
goto out;
}
dev = pci_get_bus_and_slot(assigned_dev->busnr,
assigned_dev->devfn);
if (!dev) {
printk(KERN_INFO "%s: host device not found\n", __func__);
r = -EINVAL;
goto out_free;
}
if (pci_enable_device(dev)) {
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
r = -EBUSY;
goto out_put;
}
r = pci_request_regions(dev, "kvm_assigned_device");
if (r) {
printk(KERN_INFO "%s: Could not get access to device regions\n",
__func__);
goto out_disable;
}
pci_reset_function(dev);
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->flags = assigned_dev->flags;
match->dev = dev;
spin_lock_init(&match->assigned_dev_lock);
match->irq_source_id = -1;
match->kvm = kvm;
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
}
r = kvm_assign_device(kvm, match);
if (r)
goto out_list_del;
}
out:
mutex_unlock(&kvm->lock);
up_read(&kvm->slots_lock);
return r;
out_list_del:
list_del(&match->list);
pci_release_regions(dev);
out_disable:
pci_disable_device(dev);
out_put:
pci_dev_put(dev);
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
up_read(&kvm->slots_lock);
return r;
}
static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
if (!match) {
printk(KERN_INFO "%s: device hasn't been assigned before, "
"so cannot be deassigned\n", __func__);
r = -EINVAL;
goto out;
}
if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
kvm_deassign_device(kvm, match);
kvm_free_assigned_device(kvm, match);
out:
mutex_unlock(&kvm->lock);
return r;
}
#ifdef __KVM_HAVE_MSIX
static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
struct kvm_assigned_msix_nr *entry_nr)
{
int r = 0;
struct kvm_assigned_dev_kernel *adev;
mutex_lock(&kvm->lock);
adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
entry_nr->assigned_dev_id);
if (!adev) {
r = -EINVAL;
goto msix_nr_out;
}
if (adev->entries_nr == 0) {
adev->entries_nr = entry_nr->entry_nr;
if (adev->entries_nr == 0 ||
adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
r = -EINVAL;
goto msix_nr_out;
}
adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
entry_nr->entry_nr,
GFP_KERNEL);
if (!adev->host_msix_entries) {
r = -ENOMEM;
goto msix_nr_out;
}
adev->guest_msix_entries = kzalloc(
sizeof(struct kvm_guest_msix_entry) *
entry_nr->entry_nr, GFP_KERNEL);
if (!adev->guest_msix_entries) {
kfree(adev->host_msix_entries);
r = -ENOMEM;
goto msix_nr_out;
}
} else /* Not allowed set MSI-X number twice */
r = -EINVAL;
msix_nr_out:
mutex_unlock(&kvm->lock);
return r;
}
static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
struct kvm_assigned_msix_entry *entry)
{
int r = 0, i;
struct kvm_assigned_dev_kernel *adev;
mutex_lock(&kvm->lock);
adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
entry->assigned_dev_id);
if (!adev) {
r = -EINVAL;
goto msix_entry_out;
}
for (i = 0; i < adev->entries_nr; i++)
if (adev->guest_msix_entries[i].vector == 0 ||
adev->guest_msix_entries[i].entry == entry->entry) {
adev->guest_msix_entries[i].entry = entry->entry;
adev->guest_msix_entries[i].vector = entry->gsi;
adev->host_msix_entries[i].entry = entry->entry;
break;
}
if (i == adev->entries_nr) {
r = -ENOSPC;
goto msix_entry_out;
}
msix_entry_out:
mutex_unlock(&kvm->lock);
return r;
}
#endif
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
unsigned long arg)
{
void __user *argp = (void __user *)arg;
int r = -ENOTTY;
switch (ioctl) {
case KVM_ASSIGN_PCI_DEVICE: {
struct kvm_assigned_pci_dev assigned_dev;
r = -EFAULT;
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
goto out;
r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
if (r)
goto out;
break;
}
case KVM_ASSIGN_IRQ: {
r = -EOPNOTSUPP;
break;
}
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
case KVM_ASSIGN_DEV_IRQ: {
struct kvm_assigned_irq assigned_irq;
r = -EFAULT;
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
goto out;
r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
if (r)
goto out;
break;
}
case KVM_DEASSIGN_DEV_IRQ: {
struct kvm_assigned_irq assigned_irq;
r = -EFAULT;
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
goto out;
r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
if (r)
goto out;
break;
}
#endif
#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
case KVM_DEASSIGN_PCI_DEVICE: {
struct kvm_assigned_pci_dev assigned_dev;
r = -EFAULT;
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
goto out;
r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
if (r)
goto out;
break;
}
#endif
#ifdef KVM_CAP_IRQ_ROUTING
case KVM_SET_GSI_ROUTING: {
struct kvm_irq_routing routing;
struct kvm_irq_routing __user *urouting;
struct kvm_irq_routing_entry *entries;
r = -EFAULT;
if (copy_from_user(&routing, argp, sizeof(routing)))
goto out;
r = -EINVAL;
if (routing.nr >= KVM_MAX_IRQ_ROUTES)
goto out;
if (routing.flags)
goto out;
r = -ENOMEM;
entries = vmalloc(routing.nr * sizeof(*entries));
if (!entries)
goto out;
r = -EFAULT;
urouting = argp;
if (copy_from_user(entries, urouting->entries,
routing.nr * sizeof(*entries)))
goto out_free_irq_routing;
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
out_free_irq_routing:
vfree(entries);
break;
}
#endif /* KVM_CAP_IRQ_ROUTING */
#ifdef __KVM_HAVE_MSIX
case KVM_ASSIGN_SET_MSIX_NR: {
struct kvm_assigned_msix_nr entry_nr;
r = -EFAULT;
if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
goto out;
r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
if (r)
goto out;
break;
}
case KVM_ASSIGN_SET_MSIX_ENTRY: {
struct kvm_assigned_msix_entry entry;
r = -EFAULT;
if (copy_from_user(&entry, argp, sizeof entry))
goto out;
r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
if (r)
goto out;
break;
}
#endif
}
out:
return r;
}

View File

@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm;
mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
mutex_unlock(&kvm->irq_lock);
}
/*

View File

@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
union kvm_ioapic_redirect_entry entry;
int ret = 1;
mutex_lock(&ioapic->lock);
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
@@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
}
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
}
mutex_unlock(&ioapic->lock);
return ret;
}
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
int trigger_mode)
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
int trigger_mode)
{
union kvm_ioapic_redirect_entry *ent;
int i;
ent = &ioapic->redirtbl[pin];
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
if (ent->fields.vector != vector)
continue;
/*
* We are dropping lock while calling ack notifiers because ack
* notifier callbacks for assigned devices call into IOAPIC
* recursively. Since remote_irr is cleared only after call
* to notifiers if the same vector will be delivered while lock
* is dropped it will be put into irr and will be delivered
* after ack notifier returns.
*/
mutex_unlock(&ioapic->lock);
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
mutex_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG)
continue;
if (trigger_mode == IOAPIC_LEVEL_TRIG) {
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
ioapic_service(ioapic, pin);
if (!ent->fields.mask && (ioapic->irr & (1 << i)))
ioapic_service(ioapic, i);
}
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
if (ioapic->redirtbl[i].fields.vector == vector)
__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
mutex_lock(&ioapic->lock);
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
mutex_unlock(&ioapic->lock);
}
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
mutex_lock(&ioapic->kvm->irq_lock);
addr &= 0xff;
mutex_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
result = ioapic->ioregsel;
@@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
result = 0;
break;
}
mutex_unlock(&ioapic->lock);
switch (len) {
case 8:
*(u64 *) val = result;
@@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
default:
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
}
mutex_unlock(&ioapic->kvm->irq_lock);
return 0;
}
@@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
(void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
mutex_lock(&ioapic->kvm->irq_lock);
if (len == 4 || len == 8)
data = *(u32 *) val;
else {
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
goto unlock;
return 0;
}
addr &= 0xff;
mutex_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
ioapic->ioregsel = data;
@@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
break;
#ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
break;
#endif
default:
break;
}
unlock:
mutex_unlock(&ioapic->kvm->irq_lock);
mutex_unlock(&ioapic->lock);
return 0;
}
@@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm)
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic)
return -ENOMEM;
mutex_init(&ioapic->lock);
kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm)
return ret;
}
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
mutex_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
mutex_unlock(&ioapic->lock);
return 0;
}
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
mutex_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
mutex_unlock(&ioapic->lock);
return 0;
}

View File

@@ -41,9 +41,11 @@ struct kvm_ioapic {
u32 irr;
u32 pad;
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
struct mutex lock;
};
#ifdef DEBUG
@@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq);
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
#endif

View File

@@ -31,20 +31,39 @@
#include "ioapic.h"
static inline int kvm_irq_line_state(unsigned long *irq_state,
int irq_source_id, int level)
{
/* Logical OR for level trig interrupt */
if (level)
set_bit(irq_source_id, irq_state);
else
clear_bit(irq_source_id, irq_state);
return !!(*irq_state);
}
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int level)
struct kvm *kvm, int irq_source_id, int level)
{
#ifdef CONFIG_X86
return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
struct kvm_pic *pic = pic_irqchip(kvm);
level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
irq_source_id, level);
return kvm_pic_set_irq(pic, e->irqchip.pin, level);
#else
return -1;
#endif
}
static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int level)
struct kvm *kvm, int irq_source_id, int level)
{
return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
irq_source_id, level);
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
}
inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
@@ -63,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_is_dm_lowest_prio(irq))
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -96,10 +113,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
}
static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int level)
struct kvm *kvm, int irq_source_id, int level)
{
struct kvm_lapic_irq irq;
if (!level)
return -1;
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
irq.dest_id = (e->msi.address_lo &
@@ -116,78 +136,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
}
/* This should be called with the kvm->irq_lock mutex held
/*
* Return value:
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
* = 0 Interrupt was coalesced (previous irq is still pending)
* > 0 Number of CPUs interrupt was delivered to
*/
int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
{
struct kvm_kernel_irq_routing_entry *e;
unsigned long *irq_state, sig_level;
int ret = -1;
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
int ret = -1, i = 0;
struct kvm_irq_routing_table *irq_rt;
struct hlist_node *n;
trace_kvm_set_irq(irq, level, irq_source_id);
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
if (irq < KVM_IOAPIC_NUM_PINS) {
irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
/* Logical OR for level trig interrupt */
if (level)
set_bit(irq_source_id, irq_state);
else
clear_bit(irq_source_id, irq_state);
sig_level = !!(*irq_state);
} else if (!level)
return ret;
else /* Deal with MSI/MSI-X */
sig_level = 1;
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
list_for_each_entry(e, &kvm->irq_routing, link)
if (e->gsi == irq) {
int r = e->set(e, kvm, sig_level);
if (r < 0)
continue;
rcu_read_lock();
irq_rt = rcu_dereference(kvm->irq_routing);
if (irq < irq_rt->nr_rt_entries)
hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
irq_set[i++] = *e;
rcu_read_unlock();
while(i--) {
int r;
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
if (r < 0)
continue;
ret = r + ((ret < 0) ? 0 : ret);
}
ret = r + ((ret < 0) ? 0 : ret);
}
return ret;
}
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_ack_notifier *kian;
struct hlist_node *n;
unsigned gsi = pin;
int gsi;
trace_kvm_ack_irq(irqchip, pin);
list_for_each_entry(e, &kvm->irq_routing, link)
if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
e->irqchip.irqchip == irqchip &&
e->irqchip.pin == pin) {
gsi = e->gsi;
break;
}
hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
rcu_read_unlock();
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
@@ -195,8 +204,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_del_init(&kian->link);
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
}
int kvm_request_irq_source_id(struct kvm *kvm)
@@ -205,16 +215,17 @@ int kvm_request_irq_source_id(struct kvm *kvm)
int irq_source_id;
mutex_lock(&kvm->irq_lock);
irq_source_id = find_first_zero_bit(bitmap,
sizeof(kvm->arch.irq_sources_bitmap));
irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
if (irq_source_id >= BITS_PER_LONG) {
printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
return -EFAULT;
irq_source_id = -EFAULT;
goto unlock;
}
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
set_bit(irq_source_id, bitmap);
unlock:
mutex_unlock(&kvm->irq_lock);
return irq_source_id;
@@ -228,13 +239,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
mutex_lock(&kvm->irq_lock);
if (irq_source_id < 0 ||
irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
irq_source_id >= BITS_PER_LONG) {
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
return;
goto unlock;
}
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
if (!irqchip_in_kernel(kvm))
goto unlock;
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
if (i >= 16)
continue;
#ifdef CONFIG_X86
clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
#endif
}
unlock:
mutex_unlock(&kvm->irq_lock);
}
@@ -243,7 +264,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
{
mutex_lock(&kvm->irq_lock);
kimn->irq = irq;
hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
@@ -251,8 +272,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn)
{
mutex_lock(&kvm->irq_lock);
hlist_del(&kimn->link);
hlist_del_rcu(&kimn->link);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
}
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -260,33 +282,37 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
struct kvm_irq_mask_notifier *kimn;
struct hlist_node *n;
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
rcu_read_lock();
hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
if (kimn->irq == irq)
kimn->func(kimn, mask);
}
static void __kvm_free_irq_routing(struct list_head *irq_routing)
{
struct kvm_kernel_irq_routing_entry *e, *n;
list_for_each_entry_safe(e, n, irq_routing, link)
kfree(e);
rcu_read_unlock();
}
void kvm_free_irq_routing(struct kvm *kvm)
{
mutex_lock(&kvm->irq_lock);
__kvm_free_irq_routing(&kvm->irq_routing);
mutex_unlock(&kvm->irq_lock);
/* Called only during vm destruction. Nobody can use the pointer
at this stage */
kfree(kvm->irq_routing);
}
static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
int delta;
struct kvm_kernel_irq_routing_entry *ei;
struct hlist_node *n;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and MSI.
*/
hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
if (ei->type == KVM_IRQ_ROUTING_MSI ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return r;
e->gsi = ue->gsi;
e->type = ue->type;
@@ -309,6 +335,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
e->irqchip.pin = ue->u.irqchip.pin + delta;
if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
goto out;
rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -319,6 +348,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
default:
goto out;
}
hlist_add_head(&e->link, &rt->map[e->gsi]);
r = 0;
out:
return r;
@@ -330,43 +361,53 @@ int kvm_set_irq_routing(struct kvm *kvm,
unsigned nr,
unsigned flags)
{
struct list_head irq_list = LIST_HEAD_INIT(irq_list);
struct list_head tmp = LIST_HEAD_INIT(tmp);
struct kvm_kernel_irq_routing_entry *e = NULL;
unsigned i;
struct kvm_irq_routing_table *new, *old;
u32 i, j, nr_rt_entries = 0;
int r;
for (i = 0; i < nr; ++i) {
if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
return -EINVAL;
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
}
nr_rt_entries += 1;
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
GFP_KERNEL);
if (!new)
return -ENOMEM;
new->rt_entries = (void *)&new->map[nr_rt_entries];
new->nr_rt_entries = nr_rt_entries;
for (i = 0; i < 3; i++)
for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
r = -EINVAL;
if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
goto out;
if (ue->flags)
goto out;
r = -ENOMEM;
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (!e)
goto out;
r = setup_routing_entry(e, ue);
r = setup_routing_entry(new, &new->rt_entries[i], ue);
if (r)
goto out;
++ue;
list_add(&e->link, &irq_list);
e = NULL;
}
mutex_lock(&kvm->irq_lock);
list_splice(&kvm->irq_routing, &tmp);
INIT_LIST_HEAD(&kvm->irq_routing);
list_splice(&irq_list, &kvm->irq_routing);
INIT_LIST_HEAD(&irq_list);
list_splice(&tmp, &irq_list);
old = kvm->irq_routing;
rcu_assign_pointer(kvm->irq_routing, new);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
new = old;
r = 0;
out:
kfree(e);
__kvm_free_irq_routing(&irq_list);
kfree(new);
return r;
}

File diff suppressed because it is too large Load Diff