mirror of
https://github.com/torvalds/linux.git
synced 2024-12-11 21:52:04 +00:00
7fa005caa3
Now that vfio_pci has been split into two source modules, one focusing on the "struct pci_driver" (vfio_pci.c) and a toolbox library of code (vfio_pci_core.c), complete the split and move them into two different kernel modules. As before vfio_pci.ko continues to present the same interface under sysfs and this change will have no functional impact. Splitting into another module and adding exports allows creating new HW specific VFIO PCI drivers that can implement device specific functionality, such as VFIO migration interfaces or specialized device requirements. Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20210826103912.128972-14-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
332 lines
7.6 KiB
C
332 lines
7.6 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* VFIO PCI Intel Graphics support
|
|
*
|
|
* Copyright (C) 2016 Red Hat, Inc. All rights reserved.
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
*
|
|
* Register a device specific region through which to provide read-only
|
|
* access to the Intel IGD opregion. The register defining the opregion
|
|
* address is also virtualized to prevent user modification.
|
|
*/
|
|
|
|
#include <linux/io.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/vfio.h>
|
|
|
|
#include <linux/vfio_pci_core.h>
|
|
|
|
#define OPREGION_SIGNATURE "IntelGraphicsMem"
|
|
#define OPREGION_SIZE (8 * 1024)
|
|
#define OPREGION_PCI_ADDR 0xfc
|
|
|
|
#define OPREGION_RVDA 0x3ba
|
|
#define OPREGION_RVDS 0x3c2
|
|
#define OPREGION_VERSION 0x16
|
|
|
|
static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
|
|
char __user *buf, size_t count, loff_t *ppos,
|
|
bool iswrite)
|
|
{
|
|
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
|
|
void *base = vdev->region[i].data;
|
|
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
|
|
|
|
if (pos >= vdev->region[i].size || iswrite)
|
|
return -EINVAL;
|
|
|
|
count = min(count, (size_t)(vdev->region[i].size - pos));
|
|
|
|
if (copy_to_user(buf, base + pos, count))
|
|
return -EFAULT;
|
|
|
|
*ppos += count;
|
|
|
|
return count;
|
|
}
|
|
|
|
static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev,
|
|
struct vfio_pci_region *region)
|
|
{
|
|
memunmap(region->data);
|
|
}
|
|
|
|
static const struct vfio_pci_regops vfio_pci_igd_regops = {
|
|
.rw = vfio_pci_igd_rw,
|
|
.release = vfio_pci_igd_release,
|
|
};
|
|
|
|
static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
|
|
{
|
|
__le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
|
|
u32 addr, size;
|
|
void *base;
|
|
int ret;
|
|
u16 version;
|
|
|
|
ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (!addr || !(~addr))
|
|
return -ENODEV;
|
|
|
|
base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
|
|
if (!base)
|
|
return -ENOMEM;
|
|
|
|
if (memcmp(base, OPREGION_SIGNATURE, 16)) {
|
|
memunmap(base);
|
|
return -EINVAL;
|
|
}
|
|
|
|
size = le32_to_cpu(*(__le32 *)(base + 16));
|
|
if (!size) {
|
|
memunmap(base);
|
|
return -EINVAL;
|
|
}
|
|
|
|
size *= 1024; /* In KB */
|
|
|
|
/*
|
|
* Support opregion v2.1+
|
|
* When VBT data exceeds 6KB size and cannot be within mailbox #4, then
|
|
* the Extended VBT region next to opregion is used to hold the VBT data.
|
|
* RVDA (Relative Address of VBT Data from Opregion Base) and RVDS
|
|
* (Raw VBT Data Size) from opregion structure member are used to hold the
|
|
* address from region base and size of VBT data. RVDA/RVDS are not
|
|
* defined before opregion 2.0.
|
|
*
|
|
* opregion 2.1+: RVDA is unsigned, relative offset from
|
|
* opregion base, and should point to the end of opregion.
|
|
* otherwise, exposing to userspace to allow read access to everything between
|
|
* the OpRegion and VBT is not safe.
|
|
* RVDS is defined as size in bytes.
|
|
*
|
|
* opregion 2.0: rvda is the physical VBT address.
|
|
* Since rvda is HPA it cannot be directly used in guest.
|
|
* And it should not be practically available for end user,so it is not supported.
|
|
*/
|
|
version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION));
|
|
if (version >= 0x0200) {
|
|
u64 rvda;
|
|
u32 rvds;
|
|
|
|
rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA));
|
|
rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));
|
|
if (rvda && rvds) {
|
|
/* no support for opregion v2.0 with physical VBT address */
|
|
if (version == 0x0200) {
|
|
memunmap(base);
|
|
pci_err(vdev->pdev,
|
|
"IGD assignment does not support opregion v2.0 with an extended VBT region\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (rvda != size) {
|
|
memunmap(base);
|
|
pci_err(vdev->pdev,
|
|
"Extended VBT does not follow opregion on version 0x%04x\n",
|
|
version);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* region size for opregion v2.0+: opregion and VBT size. */
|
|
size += rvds;
|
|
}
|
|
}
|
|
|
|
if (size != OPREGION_SIZE) {
|
|
memunmap(base);
|
|
base = memremap(addr, size, MEMREMAP_WB);
|
|
if (!base)
|
|
return -ENOMEM;
|
|
}
|
|
|
|
ret = vfio_pci_register_dev_region(vdev,
|
|
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
|
|
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
|
|
&vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base);
|
|
if (ret) {
|
|
memunmap(base);
|
|
return ret;
|
|
}
|
|
|
|
/* Fill vconfig with the hw value and virtualize register */
|
|
*dwordp = cpu_to_le32(addr);
|
|
memset(vdev->pci_config_map + OPREGION_PCI_ADDR,
|
|
PCI_CAP_ID_INVALID_VIRT, 4);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
|
|
char __user *buf, size_t count, loff_t *ppos,
|
|
bool iswrite)
|
|
{
|
|
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
|
|
struct pci_dev *pdev = vdev->region[i].data;
|
|
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
|
|
size_t size;
|
|
int ret;
|
|
|
|
if (pos >= vdev->region[i].size || iswrite)
|
|
return -EINVAL;
|
|
|
|
size = count = min(count, (size_t)(vdev->region[i].size - pos));
|
|
|
|
if ((pos & 1) && size) {
|
|
u8 val;
|
|
|
|
ret = pci_user_read_config_byte(pdev, pos, &val);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (copy_to_user(buf + count - size, &val, 1))
|
|
return -EFAULT;
|
|
|
|
pos++;
|
|
size--;
|
|
}
|
|
|
|
if ((pos & 3) && size > 2) {
|
|
u16 val;
|
|
|
|
ret = pci_user_read_config_word(pdev, pos, &val);
|
|
if (ret)
|
|
return ret;
|
|
|
|
val = cpu_to_le16(val);
|
|
if (copy_to_user(buf + count - size, &val, 2))
|
|
return -EFAULT;
|
|
|
|
pos += 2;
|
|
size -= 2;
|
|
}
|
|
|
|
while (size > 3) {
|
|
u32 val;
|
|
|
|
ret = pci_user_read_config_dword(pdev, pos, &val);
|
|
if (ret)
|
|
return ret;
|
|
|
|
val = cpu_to_le32(val);
|
|
if (copy_to_user(buf + count - size, &val, 4))
|
|
return -EFAULT;
|
|
|
|
pos += 4;
|
|
size -= 4;
|
|
}
|
|
|
|
while (size >= 2) {
|
|
u16 val;
|
|
|
|
ret = pci_user_read_config_word(pdev, pos, &val);
|
|
if (ret)
|
|
return ret;
|
|
|
|
val = cpu_to_le16(val);
|
|
if (copy_to_user(buf + count - size, &val, 2))
|
|
return -EFAULT;
|
|
|
|
pos += 2;
|
|
size -= 2;
|
|
}
|
|
|
|
while (size) {
|
|
u8 val;
|
|
|
|
ret = pci_user_read_config_byte(pdev, pos, &val);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (copy_to_user(buf + count - size, &val, 1))
|
|
return -EFAULT;
|
|
|
|
pos++;
|
|
size--;
|
|
}
|
|
|
|
*ppos += count;
|
|
|
|
return count;
|
|
}
|
|
|
|
static void vfio_pci_igd_cfg_release(struct vfio_pci_core_device *vdev,
|
|
struct vfio_pci_region *region)
|
|
{
|
|
struct pci_dev *pdev = region->data;
|
|
|
|
pci_dev_put(pdev);
|
|
}
|
|
|
|
static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = {
|
|
.rw = vfio_pci_igd_cfg_rw,
|
|
.release = vfio_pci_igd_cfg_release,
|
|
};
|
|
|
|
static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev)
|
|
{
|
|
struct pci_dev *host_bridge, *lpc_bridge;
|
|
int ret;
|
|
|
|
host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
|
|
if (!host_bridge)
|
|
return -ENODEV;
|
|
|
|
if (host_bridge->vendor != PCI_VENDOR_ID_INTEL ||
|
|
host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) {
|
|
pci_dev_put(host_bridge);
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = vfio_pci_register_dev_region(vdev,
|
|
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
|
|
VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG,
|
|
&vfio_pci_igd_cfg_regops, host_bridge->cfg_size,
|
|
VFIO_REGION_INFO_FLAG_READ, host_bridge);
|
|
if (ret) {
|
|
pci_dev_put(host_bridge);
|
|
return ret;
|
|
}
|
|
|
|
lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0));
|
|
if (!lpc_bridge)
|
|
return -ENODEV;
|
|
|
|
if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL ||
|
|
lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) {
|
|
pci_dev_put(lpc_bridge);
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = vfio_pci_register_dev_region(vdev,
|
|
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
|
|
VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG,
|
|
&vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size,
|
|
VFIO_REGION_INFO_FLAG_READ, lpc_bridge);
|
|
if (ret) {
|
|
pci_dev_put(lpc_bridge);
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
|
|
{
|
|
int ret;
|
|
|
|
ret = vfio_pci_igd_opregion_init(vdev);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = vfio_pci_igd_cfg_init(vdev);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return 0;
|
|
}
|