linux/drivers/vfio/pci/vfio_pci.c

277 lines
7.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*
* Derived from original vfio:
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
* Author: Tom Lyon, pugs@cisco.com
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/device.h>
#include <linux/eventfd.h>
#include <linux/file.h>
#include <linux/interrupt.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/vfio_pci_core.h>
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO PCI - User Level meta-driver"
static char ids[1024] __initdata;
module_param_string(ids, ids, sizeof(ids), 0);
MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
static bool nointxmask;
module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nointxmask,
"Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
#ifdef CONFIG_VFIO_PCI_VGA
static bool disable_vga;
module_param(disable_vga, bool, S_IRUGO);
MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci");
#endif
static bool disable_idle_d3;
module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(disable_idle_d3,
"Disable using the PCI D3 low power state for idle, unused devices");
static bool enable_sriov;
#ifdef CONFIG_PCI_IOV
module_param(enable_sriov, bool, 0644);
MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF.");
#endif
static bool disable_denylist;
module_param(disable_denylist, bool, 0444);
MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users.");
static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev)
{
switch (pdev->vendor) {
case PCI_VENDOR_ID_INTEL:
switch (pdev->device) {
case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF:
case PCI_DEVICE_ID_INTEL_QAT_C62X:
case PCI_DEVICE_ID_INTEL_QAT_C62X_VF:
case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF:
return true;
default:
return false;
}
}
return false;
}
static bool vfio_pci_is_denylisted(struct pci_dev *pdev)
{
if (!vfio_pci_dev_in_denylist(pdev))
return false;
if (disable_denylist) {
pci_warn(pdev,
"device denylist disabled - allowing device %04x:%04x.\n",
pdev->vendor, pdev->device);
return false;
}
pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n",
pdev->vendor, pdev->device);
return true;
}
static int vfio_pci_open_device(struct vfio_device *core_vdev)
{
struct vfio_pci_core_device *vdev =
container_of(core_vdev, struct vfio_pci_core_device, vdev);
struct pci_dev *pdev = vdev->pdev;
int ret;
ret = vfio_pci_core_enable(vdev);
if (ret)
return ret;
if (vfio_pci_is_vga(pdev) &&
pdev->vendor == PCI_VENDOR_ID_INTEL &&
IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
ret = vfio_pci_igd_init(vdev);
if (ret && ret != -ENODEV) {
pci_warn(pdev, "Failed to setup Intel IGD regions\n");
vfio_pci_core_disable(vdev);
return ret;
}
}
vfio_pci_core_finish_enable(vdev);
return 0;
}
static const struct vfio_device_ops vfio_pci_ops = {
.name = "vfio-pci",
.open_device = vfio_pci_open_device,
.close_device = vfio_pci_core_close_device,
.ioctl = vfio_pci_core_ioctl,
.device_feature = vfio_pci_core_ioctl_feature,
.read = vfio_pci_core_read,
.write = vfio_pci_core_write,
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
};
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_core_device *vdev;
int ret;
if (vfio_pci_is_denylisted(pdev))
return -EINVAL;
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev)
return -ENOMEM;
vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops);
dev_set_drvdata(&pdev->dev, vdev);
ret = vfio_pci_core_register_device(vdev);
if (ret)
goto out_free;
return 0;
out_free:
vfio_pci_core_uninit_device(vdev);
kfree(vdev);
return ret;
}
static void vfio_pci_remove(struct pci_dev *pdev)
{
struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
vfio_pci_core_unregister_device(vdev);
vfio_pci_core_uninit_device(vdev);
kfree(vdev);
}
static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
if (!enable_sriov)
return -ENOENT;
return vfio_pci_core_sriov_configure(vdev, nr_virtfn);
}
PCI / VFIO: Add 'override_only' support for VFIO PCI sub system Expose an 'override_only' helper macro (i.e. PCI_DRIVER_OVERRIDE_DEVICE_VFIO) for VFIO PCI sub system and add the required code to prefix its matching entries with "vfio_" in modules.alias file. It allows VFIO device drivers to include match entries in the modules.alias file produced by kbuild that are not used for normal driver autoprobing and module autoloading. Drivers using these match entries can be connected to the PCI device manually, by userspace, using the existing driver_override sysfs. For example the resulting modules.alias may have: alias pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_core alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci In this example mlx5_core and mlx5_vfio_pci match to the same PCI device. The kernel will autoload and autobind to mlx5_core but the kernel and udev mechanisms will ignore mlx5_vfio_pci. When userspace wants to change a device to the VFIO subsystem it can implement a generic algorithm: 1) Identify the sysfs path to the device: /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0 2) Get the modalias string from the kernel: $ cat /sys/bus/pci/devices/0000:01:00.0/modalias pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 3) Prefix it with vfio_: vfio_pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 4) Search modules.alias for the above string and select the entry that has the fewest *'s: alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci 5) modprobe the matched module name: $ modprobe mlx5_vfio_pci 6) cat the matched module name to driver_override: echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:01:00.0/driver_override 7) unbind device from original module echo 0000:01:00.0 > /sys/bus/pci/devices/0000:01:00.0/driver/unbind 8) probe PCI drivers (or explicitly bind to mlx5_vfio_pci) echo 0000:01:00.0 > /sys/bus/pci/drivers_probe The algorithm is independent of bus type. In future the other buses with VFIO device drivers, like platform and ACPI, can use this algorithm as well. This patch is the infrastructure to provide the information in the modules.alias to userspace. Convert the only VFIO pci_driver which results in one new line in the modules.alias: alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci Later series introduce additional HW specific VFIO PCI drivers, such as mlx5_vfio_pci. Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> # for pci.h Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20210826103912.128972-11-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2021-08-26 10:39:09 +00:00
static const struct pci_device_id vfio_pci_table[] = {
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_ANY_ID, PCI_ANY_ID) }, /* match all by default */
{}
};
MODULE_DEVICE_TABLE(pci, vfio_pci_table);
static struct pci_driver vfio_pci_driver = {
.name = "vfio-pci",
PCI / VFIO: Add 'override_only' support for VFIO PCI sub system Expose an 'override_only' helper macro (i.e. PCI_DRIVER_OVERRIDE_DEVICE_VFIO) for VFIO PCI sub system and add the required code to prefix its matching entries with "vfio_" in modules.alias file. It allows VFIO device drivers to include match entries in the modules.alias file produced by kbuild that are not used for normal driver autoprobing and module autoloading. Drivers using these match entries can be connected to the PCI device manually, by userspace, using the existing driver_override sysfs. For example the resulting modules.alias may have: alias pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_core alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci In this example mlx5_core and mlx5_vfio_pci match to the same PCI device. The kernel will autoload and autobind to mlx5_core but the kernel and udev mechanisms will ignore mlx5_vfio_pci. When userspace wants to change a device to the VFIO subsystem it can implement a generic algorithm: 1) Identify the sysfs path to the device: /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0 2) Get the modalias string from the kernel: $ cat /sys/bus/pci/devices/0000:01:00.0/modalias pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 3) Prefix it with vfio_: vfio_pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 4) Search modules.alias for the above string and select the entry that has the fewest *'s: alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci 5) modprobe the matched module name: $ modprobe mlx5_vfio_pci 6) cat the matched module name to driver_override: echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:01:00.0/driver_override 7) unbind device from original module echo 0000:01:00.0 > /sys/bus/pci/devices/0000:01:00.0/driver/unbind 8) probe PCI drivers (or explicitly bind to mlx5_vfio_pci) echo 0000:01:00.0 > /sys/bus/pci/drivers_probe The algorithm is independent of bus type. In future the other buses with VFIO device drivers, like platform and ACPI, can use this algorithm as well. This patch is the infrastructure to provide the information in the modules.alias to userspace. Convert the only VFIO pci_driver which results in one new line in the modules.alias: alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci Later series introduce additional HW specific VFIO PCI drivers, such as mlx5_vfio_pci. Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> # for pci.h Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20210826103912.128972-11-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2021-08-26 10:39:09 +00:00
.id_table = vfio_pci_table,
.probe = vfio_pci_probe,
.remove = vfio_pci_remove,
.sriov_configure = vfio_pci_sriov_configure,
.err_handler = &vfio_pci_core_err_handlers,
vfio: Set DMA ownership for VFIO devices Claim group dma ownership when an IOMMU group is set to a container, and release the dma ownership once the iommu group is unset from the container. This change disallows some unsafe bridge drivers to bind to non-ACS bridges while devices under them are assigned to user space. This is an intentional enhancement and possibly breaks some existing configurations. The recommendation to such an affected user would be that the previously allowed host bridge driver was unsafe for this use case and to continue to enable assignment of devices within that group, the driver should be unbound from the bridge device or replaced with the pci-stub driver. For any bridge driver, we consider it unsafe if it satisfies any of the following conditions: 1) The bridge driver uses DMA. Calling pci_set_master() or calling any kernel DMA API (dma_map_*() and etc.) is an indicate that the driver is doing DMA. 2) If the bridge driver uses MMIO, it should be tolerant to hostile userspace also touching the same MMIO registers via P2P DMA attacks. If the bridge driver turns out to be a safe one, it could be used as before by setting the driver's .driver_managed_dma field, just like what we have done in the pcieport driver. Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Acked-by: Alex Williamson <alex.williamson@redhat.com> Link: https://lore.kernel.org/r/20220418005000.897664-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel <jroedel@suse.de>
2022-04-18 00:49:56 +00:00
.driver_managed_dma = true,
};
static void __init vfio_pci_fill_ids(void)
{
char *p, *id;
int rc;
/* no ids passed actually */
if (ids[0] == '\0')
return;
/* add ids specified in the module parameter */
p = ids;
while ((id = strsep(&p, ","))) {
unsigned int vendor, device, subvendor = PCI_ANY_ID,
subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
int fields;
if (!strlen(id))
continue;
fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
&vendor, &device, &subvendor, &subdevice,
&class, &class_mask);
if (fields < 2) {
pr_warn("invalid id string \"%s\"\n", id);
continue;
}
rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
subvendor, subdevice, class, class_mask, 0);
if (rc)
pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
vendor, device, subvendor, subdevice,
class, class_mask, rc);
else
pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
vendor, device, subvendor, subdevice,
class, class_mask);
}
}
static int __init vfio_pci_init(void)
{
int ret;
bool is_disable_vga = true;
#ifdef CONFIG_VFIO_PCI_VGA
is_disable_vga = disable_vga;
#endif
vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3);
/* Register and scan for devices */
ret = pci_register_driver(&vfio_pci_driver);
if (ret)
return ret;
vfio_pci_fill_ids();
if (disable_denylist)
pr_warn("device denylist disabled.\n");
return 0;
}
module_init(vfio_pci_init);
static void __exit vfio_pci_cleanup(void)
{
pci_unregister_driver(&vfio_pci_driver);
}
module_exit(vfio_pci_cleanup);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);