From 5641ade41f7c7d16e614e25ce3315e04f1bacd33 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Feb 2013 10:45:31 -0700 Subject: vfio-pci: Enable PCIe extended capabilities on v1 Even PCIe 1.x had extended config space. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 8b8f7d1..c975d91 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -985,12 +985,12 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) if (ret) return pcibios_err_to_errno(ret); + vdev->extended_caps = true; + if ((word & PCI_EXP_FLAGS_VERS) == 1) return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; - else { - vdev->extended_caps = true; + else return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2; - } case PCI_CAP_ID_HT: ret = pci_read_config_byte(pdev, pos + 3, &byte); if (ret) -- cgit v0.10.2 From 5b279a11d32998aad1e45fe9de225302b6a8e8ba Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Feb 2013 14:02:12 -0700 Subject: vfio-pci: Cleanup read/write functions The read and write functions are nearly identical, combine them and convert to a switch statement. This also makes it easy to narrow the scope of when we use the io/mem accessors in case new regions are added. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b28e66c..469e110 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -366,8 +366,8 @@ static long vfio_pci_ioctl(void *device_data, return -ENOTTY; } -static ssize_t vfio_pci_read(void *device_data, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_pci_rw(void *device_data, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct vfio_pci_device *vdev = device_data; @@ -376,42 +376,41 @@ static ssize_t vfio_pci_read(void *device_data, char __user *buf, if (index >= VFIO_PCI_NUM_REGIONS) return -EINVAL; - if (index == VFIO_PCI_CONFIG_REGION_INDEX) - return vfio_pci_config_readwrite(vdev, buf, count, ppos, false); - else if (index == VFIO_PCI_ROM_REGION_INDEX) - return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); - else if (pci_resource_flags(pdev, index) & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, buf, count, ppos, false); - else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) + switch (index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + return vfio_pci_config_readwrite(vdev, buf, count, + ppos, iswrite); + case VFIO_PCI_ROM_REGION_INDEX: + if (iswrite) + return -EINVAL; return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); + case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: + { + unsigned long flags = pci_resource_flags(pdev, index); + + if (flags & IORESOURCE_IO) + return vfio_pci_io_readwrite(vdev, buf, count, + ppos, iswrite); + if (flags & IORESOURCE_MEM) + return vfio_pci_mem_readwrite(vdev, buf, count, + ppos, iswrite); + } + } + return -EINVAL; } +static ssize_t vfio_pci_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + return vfio_pci_rw(device_data, buf, count, ppos, false); +} + static ssize_t vfio_pci_write(void *device_data, const char __user *buf, size_t count, loff_t *ppos) { - unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - struct vfio_pci_device *vdev = device_data; - struct pci_dev *pdev = vdev->pdev; - - if (index >= VFIO_PCI_NUM_REGIONS) - return -EINVAL; - - if (index == VFIO_PCI_CONFIG_REGION_INDEX) - return vfio_pci_config_readwrite(vdev, (char __user *)buf, - count, ppos, true); - else if (index == VFIO_PCI_ROM_REGION_INDEX) - return -EINVAL; - else if (pci_resource_flags(pdev, index) & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, (char __user *)buf, - count, ppos, true); - else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) { - return vfio_pci_mem_readwrite(vdev, (char __user *)buf, - count, ppos, true); - } - - return -EINVAL; + return vfio_pci_rw(device_data, buf, count, ppos, true); } static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) -- cgit v0.10.2 From 906ee99dd2a5c819c1171ce5eaf6c080c027e58c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Feb 2013 14:02:12 -0700 Subject: vfio-pci: Cleanup BAR access We can actually handle MMIO and I/O port from the same access function since PCI already does abstraction of this. The ROM BAR only requires a minor difference, so it gets included too. vfio_pci_config_readwrite gets renamed for consistency. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 469e110..bb8c8c2 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -371,31 +371,21 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct vfio_pci_device *vdev = device_data; - struct pci_dev *pdev = vdev->pdev; if (index >= VFIO_PCI_NUM_REGIONS) return -EINVAL; switch (index) { case VFIO_PCI_CONFIG_REGION_INDEX: - return vfio_pci_config_readwrite(vdev, buf, count, - ppos, iswrite); + return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite); + case VFIO_PCI_ROM_REGION_INDEX: if (iswrite) return -EINVAL; - return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); + return vfio_pci_bar_rw(vdev, buf, count, ppos, false); case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: - { - unsigned long flags = pci_resource_flags(pdev, index); - - if (flags & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, buf, count, - ppos, iswrite); - if (flags & IORESOURCE_MEM) - return vfio_pci_mem_readwrite(vdev, buf, count, - ppos, iswrite); - } + return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); } return -EINVAL; @@ -404,13 +394,19 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, static ssize_t vfio_pci_read(void *device_data, char __user *buf, size_t count, loff_t *ppos) { + if (!count) + return 0; + return vfio_pci_rw(device_data, buf, count, ppos, false); } static ssize_t vfio_pci_write(void *device_data, const char __user *buf, size_t count, loff_t *ppos) { - return vfio_pci_rw(device_data, buf, count, ppos, true); + if (!count) + return 0; + + return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index c975d91..f1dde2c 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1501,9 +1501,8 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, return ret; } -ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite) +ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { size_t done = 0; int ret = 0; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 611827c..00d19b9 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -70,15 +70,12 @@ extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data); -extern ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); +extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index f72323e..e9d78eb 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -20,250 +20,158 @@ #include "vfio_pci_private.h" -/* I/O Port BAR access */ -ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +/* + * Read or write from an __iomem region (MMIO or I/O port) with an excluded + * range which is inaccessible. The excluded range drops writes and fills + * reads with -1. This is intended for handling MSI-X vector tables and + * leftover space for ROM BARs. + */ +static ssize_t do_io_rw(void __iomem *io, char __user *buf, + loff_t off, size_t count, size_t x_start, + size_t x_end, bool iswrite) { - struct pci_dev *pdev = vdev->pdev; - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - void __iomem *io; - size_t done = 0; - - if (!pci_resource_start(pdev, bar)) - return -EINVAL; - - if (pos + count > pci_resource_len(pdev, bar)) - return -EINVAL; - - if (!vdev->barmap[bar]) { - int ret; - - ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); - if (ret) - return ret; - - vdev->barmap[bar] = pci_iomap(pdev, bar, 0); - - if (!vdev->barmap[bar]) { - pci_release_selected_regions(pdev, 1 << bar); - return -EINVAL; - } - } - - io = vdev->barmap[bar]; + ssize_t done = 0; while (count) { - int filled; + size_t fillable, filled; + + if (off < x_start) + fillable = min(count, (size_t)(x_start - off)); + else if (off >= x_end) + fillable = count; + else + fillable = 0; - if (count >= 3 && !(pos % 4)) { + if (fillable >= 4 && !(off % 4)) { __le32 val; if (iswrite) { if (copy_from_user(&val, buf, 4)) return -EFAULT; - iowrite32(le32_to_cpu(val), io + pos); + iowrite32(le32_to_cpu(val), io + off); } else { - val = cpu_to_le32(ioread32(io + pos)); + val = cpu_to_le32(ioread32(io + off)); if (copy_to_user(buf, &val, 4)) return -EFAULT; } filled = 4; - - } else if ((pos % 2) == 0 && count >= 2) { + } else if (fillable >= 2 && !(off % 2)) { __le16 val; if (iswrite) { if (copy_from_user(&val, buf, 2)) return -EFAULT; - iowrite16(le16_to_cpu(val), io + pos); + iowrite16(le16_to_cpu(val), io + off); } else { - val = cpu_to_le16(ioread16(io + pos)); + val = cpu_to_le16(ioread16(io + off)); if (copy_to_user(buf, &val, 2)) return -EFAULT; } filled = 2; - } else { + } else if (fillable) { u8 val; if (iswrite) { if (copy_from_user(&val, buf, 1)) return -EFAULT; - iowrite8(val, io + pos); + iowrite8(val, io + off); } else { - val = ioread8(io + pos); + val = ioread8(io + off); if (copy_to_user(buf, &val, 1)) return -EFAULT; } filled = 1; + } else { + /* Fill reads with -1, drop writes */ + filled = min(count, (size_t)(x_end - off)); + if (!iswrite) { + u8 val = 0xFF; + size_t i; + + for (i = 0; i < filled; i++) + if (copy_to_user(buf + i, &val, 1)) + return -EFAULT; + } } count -= filled; done += filled; + off += filled; buf += filled; - pos += filled; } - *ppos += done; - return done; } -/* - * MMIO BAR access - * We handle two excluded ranges here as well, if the user tries to read - * the ROM beyond what PCI tells us is available or the MSI-X table region, - * we return 0xFF and writes are dropped. - */ -ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { struct pci_dev *pdev = vdev->pdev; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - void __iomem *io; + size_t x_start = 0, x_end = 0; resource_size_t end; - size_t done = 0; - size_t x_start = 0, x_end = 0; /* excluded range */ + void __iomem *io; + ssize_t done; if (!pci_resource_start(pdev, bar)) return -EINVAL; end = pci_resource_len(pdev, bar); - if (pos > end) + if (pos >= end) return -EINVAL; - if (pos == end) - return 0; - - if (pos + count > end) - count = end - pos; + count = min(count, (size_t)(end - pos)); if (bar == PCI_ROM_RESOURCE) { + /* + * The ROM can fill less space than the BAR, so we start the + * excluded range at the end of the actual ROM. This makes + * filling large ROM BARs much faster. + */ io = pci_map_rom(pdev, &x_start); + if (!io) + return -ENOMEM; x_end = end; - } else { - if (!vdev->barmap[bar]) { - int ret; - - ret = pci_request_selected_regions(pdev, 1 << bar, - "vfio"); - if (ret) - return ret; + } else if (!vdev->barmap[bar]) { + int ret; - vdev->barmap[bar] = pci_iomap(pdev, bar, 0); + ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); + if (ret) + return ret; - if (!vdev->barmap[bar]) { - pci_release_selected_regions(pdev, 1 << bar); - return -EINVAL; - } + io = pci_iomap(pdev, bar, 0); + if (!io) { + pci_release_selected_regions(pdev, 1 << bar); + return -ENOMEM; } + vdev->barmap[bar] = io; + } else io = vdev->barmap[bar]; - if (bar == vdev->msix_bar) { - x_start = vdev->msix_offset; - x_end = vdev->msix_offset + vdev->msix_size; - } + if (bar == vdev->msix_bar) { + x_start = vdev->msix_offset; + x_end = vdev->msix_offset + vdev->msix_size; } - if (!io) - return -EINVAL; - - while (count) { - size_t fillable, filled; - - if (pos < x_start) - fillable = x_start - pos; - else if (pos >= x_end) - fillable = end - pos; - else - fillable = 0; - - if (fillable >= 4 && !(pos % 4) && (count >= 4)) { - __le32 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 4)) - goto out; - - iowrite32(le32_to_cpu(val), io + pos); - } else { - val = cpu_to_le32(ioread32(io + pos)); - - if (copy_to_user(buf, &val, 4)) - goto out; - } - - filled = 4; - } else if (fillable >= 2 && !(pos % 2) && (count >= 2)) { - __le16 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 2)) - goto out; - - iowrite16(le16_to_cpu(val), io + pos); - } else { - val = cpu_to_le16(ioread16(io + pos)); - - if (copy_to_user(buf, &val, 2)) - goto out; - } - - filled = 2; - } else if (fillable) { - u8 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 1)) - goto out; - - iowrite8(val, io + pos); - } else { - val = ioread8(io + pos); - - if (copy_to_user(buf, &val, 1)) - goto out; - } - - filled = 1; - } else { - /* Drop writes, fill reads with FF */ - filled = min((size_t)(x_end - pos), count); - if (!iswrite) { - char val = 0xFF; - size_t i; + done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); - for (i = 0; i < filled; i++) { - if (put_user(val, buf + i)) - goto out; - } - } + if (done >= 0) + *ppos += done; - } - - count -= filled; - done += filled; - buf += filled; - pos += filled; - } - - *ppos += done; - -out: if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); - return count ? -EFAULT : done; + return done; } -- cgit v0.10.2 From e014e9444aedc365742d533e1443b22470cc67b9 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Feb 2013 14:02:13 -0700 Subject: vfio: Protect vfio_dev_present against device_del vfio_dev_present is meant to give us a wait_event callback so that we can block removing a device from vfio until it becomes unused. The root of this check depends on being able to get the iommu group from the device. Unfortunately if the BUS_NOTIFY_DEL_DEVICE notifier has fired then the device-group reference is no longer searchable and we fail the lookup. We don't need to go to such extents for this though. We have a reference to the device, from which we can acquire a reference to the group. We can then use the group reference to search for the device and properly block removal. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 12c264d..8e6dcec 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -642,33 +642,16 @@ int vfio_add_group_dev(struct device *dev, } EXPORT_SYMBOL_GPL(vfio_add_group_dev); -/* Test whether a struct device is present in our tracking */ -static bool vfio_dev_present(struct device *dev) +/* Given a referenced group, check if it contains the device */ +static bool vfio_dev_present(struct vfio_group *group, struct device *dev) { - struct iommu_group *iommu_group; - struct vfio_group *group; struct vfio_device *device; - iommu_group = iommu_group_get(dev); - if (!iommu_group) - return false; - - group = vfio_group_get_from_iommu(iommu_group); - if (!group) { - iommu_group_put(iommu_group); - return false; - } - device = vfio_group_get_device(group, dev); - if (!device) { - vfio_group_put(group); - iommu_group_put(iommu_group); + if (!device) return false; - } vfio_device_put(device); - vfio_group_put(group); - iommu_group_put(iommu_group); return true; } @@ -682,10 +665,18 @@ void *vfio_del_group_dev(struct device *dev) struct iommu_group *iommu_group = group->iommu_group; void *device_data = device->device_data; + /* + * The group exists so long as we have a device reference. Get + * a group reference and use it to scan for the device going away. + */ + vfio_group_get(group); + vfio_device_put(device); /* TODO send a signal to encourage this to be released */ - wait_event(vfio.release_q, !vfio_dev_present(dev)); + wait_event(vfio.release_q, !vfio_dev_present(group, dev)); + + vfio_group_put(group); iommu_group_put(iommu_group); -- cgit v0.10.2 From 2b489a45f63102205cece37057c21f6fa66f6ce4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Feb 2013 14:02:13 -0700 Subject: vfio: whitelist pcieport pcieport does nice things like manage AER and we know it doesn't do DMA or expose any user accessible devices on the host. It also keeps the Memory, I/O, and Busmaster bits enabled, which is pretty handy when trying to use anyting below it. Devices owned by pcieport cannot be given to users via vfio, but we can tolerate them not being owned by vfio-pci. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 8e6dcec..28e2d5b 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -442,7 +442,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, * a device. It's not always practical to leave a device within a group * driverless as it could get re-bound to something unsafe. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub" }; +static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" }; static bool vfio_whitelisted_driver(struct device_driver *drv) { -- cgit v0.10.2 From 2dd1194833de133960f286903ce704cb10fa7eb0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 18 Feb 2013 10:10:33 -0700 Subject: vfio-pci: Manage user power state transitions We give the user access to change the power state of the device but certain transitions result in an uninitialized state which the user cannot resolve. To fix this we need to mark the PowerState field of the PMCSR register read-only and effect the requested change on behalf of the user. This has the added benefit that pdev->current_state remains accurate while controlled by the user. The primary example of this bug is a QEMU guest doing a reboot where the device it put into D3 on shutdown and becomes unusable on the next boot because the device did a soft reset on D3->D0 (NoSoftRst-). Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index f1dde2c..964ff22 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -587,12 +587,46 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) return 0; } +static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + if (offset == PCI_PM_CTRL) { + pci_power_t state; + + switch (le32_to_cpu(val) & PCI_PM_CTRL_STATE_MASK) { + case 0: + state = PCI_D0; + break; + case 1: + state = PCI_D1; + break; + case 2: + state = PCI_D2; + break; + case 3: + state = PCI_D3hot; + break; + } + + pci_set_power_state(vdev->pdev, state); + } + + return count; +} + /* Permissions for the Power Management capability */ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) { if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM])) return -ENOMEM; + perm->writefn = vfio_pm_config_write; + /* * We always virtualize the next field so we can remove * capabilities from the chain if we want to. @@ -600,10 +634,11 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* - * Power management is defined *per function*, - * so we let the user write this + * Power management is defined *per function*, so we can let + * the user change power state, but we trap and initiate the + * change ourselves, so the state bits are read-only. */ - p_setd(perm, PCI_PM_CTRL, NO_VIRT, ALL_WRITE); + p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK); return 0; } -- cgit v0.10.2 From 84237a826b261de7ddd3d09ee53ee68cb4138937 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 18 Feb 2013 10:11:13 -0700 Subject: vfio-pci: Add support for VGA region access PCI defines display class VGA regions at I/O port address 0x3b0, 0x3c0 and MMIO address 0xa0000. As these are non-overlapping, we can ignore the I/O port vs MMIO difference and expose them both in a single region. We make use of the VGA arbiter around each access to configure chipset access as necessary. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5980758..e84300b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -6,3 +6,13 @@ config VFIO_PCI use of PCI drivers using the VFIO framework. If you don't know what to do here, say N. + +config VFIO_PCI_VGA + bool "VFIO PCI support for VGA devices" + depends on VFIO_PCI && X86 && VGA_ARB && EXPERIMENTAL + help + Support for VGA extension to VFIO PCI. This exposes an additional + region on VGA devices for accessing legacy VGA addresses used by + BIOS and generic video drivers. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index bb8c8c2..8189cb6 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -84,6 +84,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; +#ifdef CONFIG_VFIO_PCI_VGA + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vdev->has_vga = true; +#endif + return 0; } @@ -285,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data, info.flags = VFIO_REGION_INFO_FLAG_READ; break; } + case VFIO_PCI_VGA_REGION_INDEX: + if (!vdev->has_vga) + return -EINVAL; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0xc0000; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + + break; default: return -EINVAL; } @@ -386,6 +401,9 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_VGA_REGION_INDEX: + return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); } return -EINVAL; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 00d19b9..d7e55d0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -53,6 +53,7 @@ struct vfio_pci_device { bool reset_works; bool extended_caps; bool bardirty; + bool has_vga; struct pci_saved_state *pci_saved_state; atomic_t refcnt; }; @@ -77,6 +78,9 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e9d78eb..210db24 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -175,3 +176,63 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, return done; } + +ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; + void __iomem *iomem = NULL; + unsigned int rsrc; + bool is_ioport; + ssize_t done; + + if (!vdev->has_vga) + return -EINVAL; + + switch (pos) { + case 0xa0000 ... 0xbffff: + count = min(count, (size_t)(0xc0000 - pos)); + iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); + off = pos - 0xa0000; + rsrc = VGA_RSRC_LEGACY_MEM; + is_ioport = false; + break; + case 0x3b0 ... 0x3bb: + count = min(count, (size_t)(0x3bc - pos)); + iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); + off = pos - 0x3b0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + case 0x3c0 ... 0x3df: + count = min(count, (size_t)(0x3e0 - pos)); + iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); + off = pos - 0x3c0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + default: + return -EINVAL; + } + + if (!iomem) + return -ENOMEM; + + ret = vga_get_interruptible(vdev->pdev, rsrc); + if (ret) { + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + return ret; + } + + done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); + + vga_put(vdev->pdev, rsrc); + + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + + if (done >= 0) + *ppos += done; + + return done; +} diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 4758d1b..4f41f30 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -303,6 +303,15 @@ enum { VFIO_PCI_BAR5_REGION_INDEX, VFIO_PCI_ROM_REGION_INDEX, VFIO_PCI_CONFIG_REGION_INDEX, + /* + * Expose VGA regions defined for PCI base class 03, subclass 00. + * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df + * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented + * range is found at it's identity mapped offset from the region + * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas + * between described ranges are unimplemented. + */ + VFIO_PCI_VGA_REGION_INDEX, VFIO_PCI_NUM_REGIONS }; -- cgit v0.10.2 From d65530fbc799e4036d4d3da4ab6e9fa6d8c4a447 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 24 Feb 2013 09:59:44 -0700 Subject: drivers/vfio: remove depends on CONFIG_EXPERIMENTAL The CONFIG_EXPERIMENTAL config item has not carried much meaning for a while now and is almost always enabled by default. As agreed during the Linux kernel summit, remove it from any "depends on" lines in Kconfigs. Signed-off-by: Kees Cook Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index e84300b..c41b01e 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -9,7 +9,7 @@ config VFIO_PCI config VFIO_PCI_VGA bool "VFIO PCI support for VGA devices" - depends on VFIO_PCI && X86 && VGA_ARB && EXPERIMENTAL + depends on VFIO_PCI && X86 && VGA_ARB help Support for VGA extension to VFIO PCI. This exposes an additional region on VGA devices for accessing legacy VGA addresses used by -- cgit v0.10.2