From de49fc0d99e6a8af8be4f56869bdea12976d3551 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:42 -0600 Subject: vfio/platform: initial skeleton of VFIO support for platform devices This patch forms the common skeleton code for platform devices support with VFIO. This will include the core functionality of VFIO_PLATFORM, however binding to the device and discovering the device resources will be done with the help of a separate file where any Linux platform bus specific code will reside. This will allow us to implement support for also discovering AMBA devices and their resources, but still reuse a large part of the VFIO_PLATFORM implementation. Signed-off-by: Antonios Motakis [Baptiste Reynal: added includes in vfio_platform_private.h] Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c new file mode 100644 index 0000000..34d023b --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +static void vfio_platform_release(void *device_data) +{ + module_put(THIS_MODULE); +} + +static int vfio_platform_open(void *device_data) +{ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + return 0; +} + +static long vfio_platform_ioctl(void *device_data, + unsigned int cmd, unsigned long arg) +{ + if (cmd == VFIO_DEVICE_GET_INFO) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_GET_REGION_INFO) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_SET_IRQS) + return -EINVAL; + + else if (cmd == VFIO_DEVICE_RESET) + return -EINVAL; + + return -ENOTTY; +} + +static ssize_t vfio_platform_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + return -EINVAL; +} + +static ssize_t vfio_platform_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + return -EINVAL; +} + +static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma) +{ + return -EINVAL; +} + +static const struct vfio_device_ops vfio_platform_ops = { + .name = "vfio-platform", + .open = vfio_platform_open, + .release = vfio_platform_release, + .ioctl = vfio_platform_ioctl, + .read = vfio_platform_read, + .write = vfio_platform_write, + .mmap = vfio_platform_mmap, +}; + +int vfio_platform_probe_common(struct vfio_platform_device *vdev, + struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (!vdev) + return -EINVAL; + + group = iommu_group_get(dev); + if (!group) { + pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); + return -EINVAL; + } + + ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev); + if (ret) { + iommu_group_put(group); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_platform_probe_common); + +struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_del_group_dev(dev); + if (vdev) + iommu_group_put(dev->iommu_group); + + return vdev; +} +EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h new file mode 100644 index 0000000..c046988 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef VFIO_PLATFORM_PRIVATE_H +#define VFIO_PLATFORM_PRIVATE_H + +#include +#include + +struct vfio_platform_device { + /* + * These fields should be filled by the bus specific binder + */ + void *opaque; + const char *name; + uint32_t flags; + /* callbacks to discover device resources */ + struct resource* + (*get_resource)(struct vfio_platform_device *vdev, int i); + int (*get_irq)(struct vfio_platform_device *vdev, int i); +}; + +extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, + struct device *dev); +extern struct vfio_platform_device *vfio_platform_remove_common + (struct device *dev); + +#endif /* VFIO_PLATFORM_PRIVATE_H */ -- cgit v0.10.2 From 9df85aaa43297cb12dc85155695dd1bfdf94f91d Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:43 -0600 Subject: vfio: platform: probe to devices on the platform bus Driver to bind to Linux platform devices, and callbacks to discover their resources to be used by the main VFIO PLATFORM code. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c new file mode 100644 index 0000000..cef645c --- /dev/null +++ b/drivers/vfio/platform/vfio_platform.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis " +#define DRIVER_DESC "VFIO for platform devices - User Level meta-driver" + +/* probing devices from the linux platform bus */ + +static struct resource *get_platform_resource(struct vfio_platform_device *vdev, + int num) +{ + struct platform_device *dev = (struct platform_device *) vdev->opaque; + int i; + + for (i = 0; i < dev->num_resources; i++) { + struct resource *r = &dev->resource[i]; + + if (resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) { + if (!num) + return r; + + num--; + } + } + return NULL; +} + +static int get_platform_irq(struct vfio_platform_device *vdev, int i) +{ + struct platform_device *pdev = (struct platform_device *) vdev->opaque; + + return platform_get_irq(pdev, i); +} + +static int vfio_platform_probe(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->opaque = (void *) pdev; + vdev->name = pdev->name; + vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM; + vdev->get_resource = get_platform_resource; + vdev->get_irq = get_platform_irq; + + ret = vfio_platform_probe_common(vdev, &pdev->dev); + if (ret) + kfree(vdev); + + return ret; +} + +static int vfio_platform_remove(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(&pdev->dev); + if (vdev) { + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct platform_driver vfio_platform_driver = { + .probe = vfio_platform_probe, + .remove = vfio_platform_remove, + .driver = { + .name = "vfio-platform", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(vfio_platform_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 82889c3..ea9514b 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -160,6 +160,7 @@ struct vfio_device_info { __u32 flags; #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ +#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- cgit v0.10.2 From 53161532394b3b3c7e1ec9c80658edd75446ac77 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:44 -0600 Subject: vfio: platform: add the VFIO PLATFORM module to Kconfig Enable building the VFIO PLATFORM driver that allows to use Linux platform devices with VFIO. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 14e27ab..d5322a4 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -27,3 +27,4 @@ menuconfig VFIO If you don't know what to do here, say N. source "drivers/vfio/pci/Kconfig" +source "drivers/vfio/platform/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 0b035b1..dadf0ca 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ +obj-$(CONFIG_VFIO_PLATFORM) += platform/ diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig new file mode 100644 index 0000000..c51af17 --- /dev/null +++ b/drivers/vfio/platform/Kconfig @@ -0,0 +1,9 @@ +config VFIO_PLATFORM + tristate "VFIO support for platform devices" + depends on VFIO && EVENTFD && ARM + help + Support for platform devices with VFIO. This is required to make + use of platform devices present on the system using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile new file mode 100644 index 0000000..279862b --- /dev/null +++ b/drivers/vfio/platform/Makefile @@ -0,0 +1,4 @@ + +vfio-platform-y := vfio_platform.o vfio_platform_common.o + +obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o -- cgit v0.10.2 From 36fe431f2811fa3b5fed15d272c585d5a47977aa Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:44 -0600 Subject: vfio: amba: VFIO support for AMBA devices Add support for discovering AMBA devices with VFIO and handle them similarly to Linux platform devices. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c new file mode 100644 index 0000000..ff0331f --- /dev/null +++ b/drivers/vfio/platform/vfio_amba.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis " +#define DRIVER_DESC "VFIO for AMBA devices - User Level meta-driver" + +/* probing devices from the AMBA bus */ + +static struct resource *get_amba_resource(struct vfio_platform_device *vdev, + int i) +{ + struct amba_device *adev = (struct amba_device *) vdev->opaque; + + if (i == 0) + return &adev->res; + + return NULL; +} + +static int get_amba_irq(struct vfio_platform_device *vdev, int i) +{ + struct amba_device *adev = (struct amba_device *) vdev->opaque; + int ret = 0; + + if (i < AMBA_NR_IRQS) + ret = adev->irq[i]; + + /* zero is an unset IRQ for AMBA devices */ + return ret ? ret : -ENXIO; +} + +static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->name = kasprintf(GFP_KERNEL, "vfio-amba-%08x", adev->periphid); + if (!vdev->name) { + kfree(vdev); + return -ENOMEM; + } + + vdev->opaque = (void *) adev; + vdev->flags = VFIO_DEVICE_FLAGS_AMBA; + vdev->get_resource = get_amba_resource; + vdev->get_irq = get_amba_irq; + + ret = vfio_platform_probe_common(vdev, &adev->dev); + if (ret) { + kfree(vdev->name); + kfree(vdev); + } + + return ret; +} + +static int vfio_amba_remove(struct amba_device *adev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(&adev->dev); + if (vdev) { + kfree(vdev->name); + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct amba_id pl330_ids[] = { + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl330_ids); + +static struct amba_driver vfio_amba_driver = { + .probe = vfio_amba_probe, + .remove = vfio_amba_remove, + .id_table = pl330_ids, + .drv = { + .name = "vfio-amba", + .owner = THIS_MODULE, + }, +}; + +module_amba_driver(vfio_amba_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ea9514b..b57b750 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -161,6 +161,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ +#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- cgit v0.10.2 From b13329adc2cd6c613ffd95b681a0d4cbd399939f Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:45 -0600 Subject: vfio: amba: add the VFIO for AMBA devices module to Kconfig Enable building the VFIO AMBA driver. VFIO_AMBA depends on VFIO_PLATFORM, since it is sharing a portion of the code, and it is essentially implemented as a platform device whose resources are discovered via AMBA specific APIs in the kernel. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index c51af17..c0a3bff 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -7,3 +7,13 @@ config VFIO_PLATFORM framework. If you don't know what to do here, say N. + +config VFIO_AMBA + tristate "VFIO support for AMBA devices" + depends on VFIO_PLATFORM && ARM_AMBA + help + Support for ARM AMBA devices with VFIO. This is required to make + use of ARM AMBA devices present on the system using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 279862b..1957170 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -2,3 +2,7 @@ vfio-platform-y := vfio_platform.o vfio_platform_common.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o + +vfio-amba-y := vfio_amba.o + +obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o -- cgit v0.10.2 From 2e8567bbb536cfc7336f8f105ef43adc98b9c156 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:46 -0600 Subject: vfio/platform: return info for bound device A VFIO userspace driver will start by opening the VFIO device that corresponds to an IOMMU group, and will use the ioctl interface to get the basic device info, such as number of memory regions and interrupts, and their properties. This patch enables the VFIO_DEVICE_GET_INFO ioctl call. Signed-off-by: Antonios Motakis [Baptiste Reynal: added include in vfio_platform_common.c] Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 34d023b..c2f853a 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "vfio_platform_private.h" @@ -38,10 +39,27 @@ static int vfio_platform_open(void *device_data) static long vfio_platform_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { - if (cmd == VFIO_DEVICE_GET_INFO) - return -EINVAL; + struct vfio_platform_device *vdev = device_data; + unsigned long minsz; + + if (cmd == VFIO_DEVICE_GET_INFO) { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.flags = vdev->flags; + info.num_regions = 0; + info.num_irqs = 0; + + return copy_to_user((void __user *)arg, &info, minsz); - else if (cmd == VFIO_DEVICE_GET_REGION_INFO) + } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) return -EINVAL; else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) -- cgit v0.10.2 From e8909e67cac3ad3868dc86cc6b1445f39c71bf63 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:46 -0600 Subject: vfio/platform: return info for device memory mapped IO regions This patch enables the IOCTLs VFIO_DEVICE_GET_REGION_INFO ioctl call, which allows the user to learn about the available MMIO resources of a device. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index c2f853a..47f6309 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -23,17 +23,97 @@ #include "vfio_platform_private.h" +static DEFINE_MUTEX(driver_lock); + +static int vfio_platform_regions_init(struct vfio_platform_device *vdev) +{ + int cnt = 0, i; + + while (vdev->get_resource(vdev, cnt)) + cnt++; + + vdev->regions = kcalloc(cnt, sizeof(struct vfio_platform_region), + GFP_KERNEL); + if (!vdev->regions) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + struct resource *res = + vdev->get_resource(vdev, i); + + if (!res) + goto err; + + vdev->regions[i].addr = res->start; + vdev->regions[i].size = resource_size(res); + vdev->regions[i].flags = 0; + + switch (resource_type(res)) { + case IORESOURCE_MEM: + vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_MMIO; + break; + case IORESOURCE_IO: + vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_PIO; + break; + default: + goto err; + } + } + + vdev->num_regions = cnt; + + return 0; +err: + kfree(vdev->regions); + return -EINVAL; +} + +static void vfio_platform_regions_cleanup(struct vfio_platform_device *vdev) +{ + vdev->num_regions = 0; + kfree(vdev->regions); +} + static void vfio_platform_release(void *device_data) { + struct vfio_platform_device *vdev = device_data; + + mutex_lock(&driver_lock); + + if (!(--vdev->refcnt)) { + vfio_platform_regions_cleanup(vdev); + } + + mutex_unlock(&driver_lock); + module_put(THIS_MODULE); } static int vfio_platform_open(void *device_data) { + struct vfio_platform_device *vdev = device_data; + int ret; + if (!try_module_get(THIS_MODULE)) return -ENODEV; + mutex_lock(&driver_lock); + + if (!vdev->refcnt) { + ret = vfio_platform_regions_init(vdev); + if (ret) + goto err_reg; + } + + vdev->refcnt++; + + mutex_unlock(&driver_lock); return 0; + +err_reg: + mutex_unlock(&driver_lock); + module_put(THIS_MODULE); + return ret; } static long vfio_platform_ioctl(void *device_data, @@ -54,15 +134,33 @@ static long vfio_platform_ioctl(void *device_data, return -EINVAL; info.flags = vdev->flags; - info.num_regions = 0; + info.num_regions = vdev->num_regions; info.num_irqs = 0; return copy_to_user((void __user *)arg, &info, minsz); - } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { + struct vfio_region_info info; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= vdev->num_regions) + return -EINVAL; + + /* map offset to the physical address */ + info.offset = VFIO_PLATFORM_INDEX_TO_OFFSET(info.index); + info.size = vdev->regions[info.index].size; + info.flags = vdev->regions[info.index].flags; + + return copy_to_user((void __user *)arg, &info, minsz); - else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) + } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) return -EINVAL; else if (cmd == VFIO_DEVICE_SET_IRQS) diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index c046988..3551f6d 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -18,7 +18,29 @@ #include #include +#define VFIO_PLATFORM_OFFSET_SHIFT 40 +#define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1) + +#define VFIO_PLATFORM_OFFSET_TO_INDEX(off) \ + (off >> VFIO_PLATFORM_OFFSET_SHIFT) + +#define VFIO_PLATFORM_INDEX_TO_OFFSET(index) \ + ((u64)(index) << VFIO_PLATFORM_OFFSET_SHIFT) + +struct vfio_platform_region { + u64 addr; + resource_size_t size; + u32 flags; + u32 type; +#define VFIO_PLATFORM_REGION_TYPE_MMIO 1 +#define VFIO_PLATFORM_REGION_TYPE_PIO 2 +}; + struct vfio_platform_device { + struct vfio_platform_region *regions; + u32 num_regions; + int refcnt; + /* * These fields should be filled by the bus specific binder */ -- cgit v0.10.2 From 6e3f264560099869f68830cb14b3b3e71e5ac76a Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:47 -0600 Subject: vfio/platform: read and write support for the device fd VFIO returns a file descriptor which we can use to manipulate the memory regions of the device. Usually, the user will mmap memory regions that are addressable on page boundaries, however for memory regions where this is not the case we cannot provide mmap functionality due to security concerns. For this reason we also allow to use read and write functions to the file descriptor pointing to the memory regions. We implement this functionality only for MMIO regions of platform devices; PIO regions are not being handled at this point. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 47f6309..4df66f5 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -51,6 +51,10 @@ static int vfio_platform_regions_init(struct vfio_platform_device *vdev) switch (resource_type(res)) { case IORESOURCE_MEM: vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_MMIO; + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; + if (!(res->flags & IORESOURCE_READONLY)) + vdev->regions[i].flags |= + VFIO_REGION_INFO_FLAG_WRITE; break; case IORESOURCE_IO: vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_PIO; @@ -70,6 +74,11 @@ err: static void vfio_platform_regions_cleanup(struct vfio_platform_device *vdev) { + int i; + + for (i = 0; i < vdev->num_regions; i++) + iounmap(vdev->regions[i].ioaddr); + vdev->num_regions = 0; kfree(vdev->regions); } @@ -172,15 +181,156 @@ static long vfio_platform_ioctl(void *device_data, return -ENOTTY; } +static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, + char __user *buf, size_t count, + loff_t off) +{ + unsigned int done = 0; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + + if (!reg.ioaddr) + return -ENOMEM; + } + + while (count) { + size_t filled; + + if (count >= 4 && !(off % 4)) { + u32 val; + + val = ioread32(reg.ioaddr + off); + if (copy_to_user(buf, &val, 4)) + goto err; + + filled = 4; + } else if (count >= 2 && !(off % 2)) { + u16 val; + + val = ioread16(reg.ioaddr + off); + if (copy_to_user(buf, &val, 2)) + goto err; + + filled = 2; + } else { + u8 val; + + val = ioread8(reg.ioaddr + off); + if (copy_to_user(buf, &val, 1)) + goto err; + + filled = 1; + } + + + count -= filled; + done += filled; + off += filled; + buf += filled; + } + + return done; +err: + return -EFAULT; +} + static ssize_t vfio_platform_read(void *device_data, char __user *buf, size_t count, loff_t *ppos) { + struct vfio_platform_device *vdev = device_data; + unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; + + if (index >= vdev->num_regions) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)) + return -EINVAL; + + if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_read_mmio(vdev->regions[index], + buf, count, off); + else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + return -EINVAL; } +static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, + const char __user *buf, size_t count, + loff_t off) +{ + unsigned int done = 0; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + + if (!reg.ioaddr) + return -ENOMEM; + } + + while (count) { + size_t filled; + + if (count >= 4 && !(off % 4)) { + u32 val; + + if (copy_from_user(&val, buf, 4)) + goto err; + iowrite32(val, reg.ioaddr + off); + + filled = 4; + } else if (count >= 2 && !(off % 2)) { + u16 val; + + if (copy_from_user(&val, buf, 2)) + goto err; + iowrite16(val, reg.ioaddr + off); + + filled = 2; + } else { + u8 val; + + if (copy_from_user(&val, buf, 1)) + goto err; + iowrite8(val, reg.ioaddr + off); + + filled = 1; + } + + count -= filled; + done += filled; + off += filled; + buf += filled; + } + + return done; +err: + return -EFAULT; +} + static ssize_t vfio_platform_write(void *device_data, const char __user *buf, size_t count, loff_t *ppos) { + struct vfio_platform_device *vdev = device_data; + unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; + + if (index >= vdev->num_regions) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)) + return -EINVAL; + + if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_write_mmio(vdev->regions[index], + buf, count, off); + else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + return -EINVAL; } diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 3551f6d..97c78f9 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -34,6 +34,7 @@ struct vfio_platform_region { u32 type; #define VFIO_PLATFORM_REGION_TYPE_MMIO 1 #define VFIO_PLATFORM_REGION_TYPE_PIO 2 + void __iomem *ioaddr; }; struct vfio_platform_device { -- cgit v0.10.2 From fad4d5b1f042a659e07ceba3a6d1744b30f8ff7c Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:48 -0600 Subject: vfio/platform: support MMAP of MMIO regions Allow to memory map the MMIO regions of the device so userspace can directly access them. PIO regions are not being handled at this point. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 4df66f5..d7fe2c7 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -55,6 +55,16 @@ static int vfio_platform_regions_init(struct vfio_platform_device *vdev) if (!(res->flags & IORESOURCE_READONLY)) vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE; + + /* + * Only regions addressed with PAGE granularity may be + * MMAPed securely. + */ + if (!(vdev->regions[i].addr & ~PAGE_MASK) && + !(vdev->regions[i].size & ~PAGE_MASK)) + vdev->regions[i].flags |= + VFIO_REGION_INFO_FLAG_MMAP; + break; case IORESOURCE_IO: vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_PIO; @@ -334,8 +344,63 @@ static ssize_t vfio_platform_write(void *device_data, const char __user *buf, return -EINVAL; } +static int vfio_platform_mmap_mmio(struct vfio_platform_region region, + struct vm_area_struct *vma) +{ + u64 req_len, pgoff, req_start; + + req_len = vma->vm_end - vma->vm_start; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (region.size < PAGE_SIZE || req_start + req_len > region.size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff; + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + req_len, vma->vm_page_prot); +} + static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma) { + struct vfio_platform_device *vdev = device_data; + unsigned int index; + + index = vma->vm_pgoff >> (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT); + + if (vma->vm_end < vma->vm_start) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (index >= vdev->num_regions) + return -EINVAL; + if (vma->vm_start & ~PAGE_MASK) + return -EINVAL; + if (vma->vm_end & ~PAGE_MASK) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) + && (vma->vm_flags & VM_READ)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) + && (vma->vm_flags & VM_WRITE)) + return -EINVAL; + + vma->vm_private_data = vdev; + + if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_mmap_mmio(vdev->regions[index], vma); + + else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + return -EINVAL; } -- cgit v0.10.2 From 682704c41e6d2238c1fb5c6ab83eedadd876fa0e Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:48 -0600 Subject: vfio/platform: return IRQ info Return information for the interrupts exposed by the device. This patch extends VFIO_DEVICE_GET_INFO with the number of IRQs and enables VFIO_DEVICE_GET_IRQ_INFO. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 1957170..81de144 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -1,5 +1,5 @@ -vfio-platform-y := vfio_platform.o vfio_platform_common.o +vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index d7fe2c7..908d510 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -101,6 +101,7 @@ static void vfio_platform_release(void *device_data) if (!(--vdev->refcnt)) { vfio_platform_regions_cleanup(vdev); + vfio_platform_irq_cleanup(vdev); } mutex_unlock(&driver_lock); @@ -122,6 +123,10 @@ static int vfio_platform_open(void *device_data) ret = vfio_platform_regions_init(vdev); if (ret) goto err_reg; + + ret = vfio_platform_irq_init(vdev); + if (ret) + goto err_irq; } vdev->refcnt++; @@ -129,6 +134,8 @@ static int vfio_platform_open(void *device_data) mutex_unlock(&driver_lock); return 0; +err_irq: + vfio_platform_regions_cleanup(vdev); err_reg: mutex_unlock(&driver_lock); module_put(THIS_MODULE); @@ -154,7 +161,7 @@ static long vfio_platform_ioctl(void *device_data, info.flags = vdev->flags; info.num_regions = vdev->num_regions; - info.num_irqs = 0; + info.num_irqs = vdev->num_irqs; return copy_to_user((void __user *)arg, &info, minsz); @@ -179,10 +186,26 @@ static long vfio_platform_ioctl(void *device_data, return copy_to_user((void __user *)arg, &info, minsz); - } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= vdev->num_irqs) + return -EINVAL; + + info.flags = vdev->irqs[info.index].flags; + info.count = vdev->irqs[info.index].count; + + return copy_to_user((void __user *)arg, &info, minsz); - else if (cmd == VFIO_DEVICE_SET_IRQS) + } else if (cmd == VFIO_DEVICE_SET_IRQS) return -EINVAL; else if (cmd == VFIO_DEVICE_RESET) diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c new file mode 100644 index 0000000..c6c3ec1 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -0,0 +1,51 @@ +/* + * VFIO platform devices interrupt handling + * + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +int vfio_platform_irq_init(struct vfio_platform_device *vdev) +{ + int cnt = 0, i; + + while (vdev->get_irq(vdev, cnt) >= 0) + cnt++; + + vdev->irqs = kcalloc(cnt, sizeof(struct vfio_platform_irq), GFP_KERNEL); + if (!vdev->irqs) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + vdev->irqs[i].flags = 0; + vdev->irqs[i].count = 1; + } + + vdev->num_irqs = cnt; + + return 0; +} + +void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) +{ + vdev->num_irqs = 0; + kfree(vdev->irqs); +} diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 97c78f9..a2e286e 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -27,6 +27,11 @@ #define VFIO_PLATFORM_INDEX_TO_OFFSET(index) \ ((u64)(index) << VFIO_PLATFORM_OFFSET_SHIFT) +struct vfio_platform_irq { + u32 flags; + u32 count; +}; + struct vfio_platform_region { u64 addr; resource_size_t size; @@ -40,6 +45,8 @@ struct vfio_platform_region { struct vfio_platform_device { struct vfio_platform_region *regions; u32 num_regions; + struct vfio_platform_irq *irqs; + u32 num_irqs; int refcnt; /* @@ -59,4 +66,7 @@ extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, extern struct vfio_platform_device *vfio_platform_remove_common (struct device *dev); +extern int vfio_platform_irq_init(struct vfio_platform_device *vdev); +extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev); + #endif /* VFIO_PLATFORM_PRIVATE_H */ -- cgit v0.10.2 From 9a36321c8d3350c4f7befa02adf3ce4583287ad9 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:49 -0600 Subject: vfio/platform: initial interrupts support code This patch is a skeleton for the VFIO_DEVICE_SET_IRQS IOCTL, around which most IRQ functionality is implemented in VFIO. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 908d510..abcff7a 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -205,10 +205,54 @@ static long vfio_platform_ioctl(void *device_data, return copy_to_user((void __user *)arg, &info, minsz); - } else if (cmd == VFIO_DEVICE_SET_IRQS) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_SET_IRQS) { + struct vfio_irq_set hdr; + u8 *data = NULL; + int ret = 0; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if (hdr.argsz < minsz) + return -EINVAL; + + if (hdr.index >= vdev->num_irqs) + return -EINVAL; + + if (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | + VFIO_IRQ_SET_ACTION_TYPE_MASK)) + return -EINVAL; - else if (cmd == VFIO_DEVICE_RESET) + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { + size_t size; + + if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) + size = sizeof(uint8_t); + else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD) + size = sizeof(int32_t); + else + return -EINVAL; + + if (hdr.argsz - minsz < size) + return -EINVAL; + + data = memdup_user((void __user *)(arg + minsz), size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + mutex_lock(&vdev->igate); + + ret = vfio_platform_set_irqs_ioctl(vdev, hdr.flags, hdr.index, + hdr.start, hdr.count, data); + mutex_unlock(&vdev->igate); + kfree(data); + + return ret; + + } else if (cmd == VFIO_DEVICE_RESET) return -EINVAL; return -ENOTTY; @@ -458,6 +502,8 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, return ret; } + mutex_init(&vdev->igate); + return 0; } EXPORT_SYMBOL_GPL(vfio_platform_probe_common); diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index c6c3ec1..df5c919 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -23,6 +23,56 @@ #include "vfio_platform_private.h" +static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, + void *data) +{ + return -EINVAL; +} + +static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, + void *data) +{ + return -EINVAL; +} + +static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, + void *data) +{ + return -EINVAL; +} + +int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, + uint32_t flags, unsigned index, unsigned start, + unsigned count, void *data) +{ + int (*func)(struct vfio_platform_device *vdev, unsigned index, + unsigned start, unsigned count, uint32_t flags, + void *data) = NULL; + + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + func = vfio_platform_set_irq_mask; + break; + case VFIO_IRQ_SET_ACTION_UNMASK: + func = vfio_platform_set_irq_unmask; + break; + case VFIO_IRQ_SET_ACTION_TRIGGER: + func = vfio_platform_set_irq_trigger; + break; + } + + if (!func) + return -ENOTTY; + + return func(vdev, index, start, count, flags, data); +} + int vfio_platform_irq_init(struct vfio_platform_device *vdev) { int cnt = 0, i; @@ -35,13 +85,22 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) return -ENOMEM; for (i = 0; i < cnt; i++) { + int hwirq = vdev->get_irq(vdev, i); + + if (hwirq < 0) + goto err; + vdev->irqs[i].flags = 0; vdev->irqs[i].count = 1; + vdev->irqs[i].hwirq = hwirq; } vdev->num_irqs = cnt; return 0; +err: + kfree(vdev->irqs); + return -EINVAL; } void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index a2e286e..b119a6c 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -30,6 +30,7 @@ struct vfio_platform_irq { u32 flags; u32 count; + int hwirq; }; struct vfio_platform_region { @@ -48,6 +49,7 @@ struct vfio_platform_device { struct vfio_platform_irq *irqs; u32 num_irqs; int refcnt; + struct mutex igate; /* * These fields should be filled by the bus specific binder @@ -69,4 +71,9 @@ extern struct vfio_platform_device *vfio_platform_remove_common extern int vfio_platform_irq_init(struct vfio_platform_device *vdev); extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev); +extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, + uint32_t flags, unsigned index, + unsigned start, unsigned count, + void *data); + #endif /* VFIO_PLATFORM_PRIVATE_H */ -- cgit v0.10.2 From 57f972e2b341dd6a73533f9293ec55d584a5d833 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:50 -0600 Subject: vfio/platform: trigger an interrupt via eventfd This patch allows to set an eventfd for a platform device's interrupt, and also to trigger the interrupt eventfd from userspace for testing. Level sensitive interrupts are marked as maskable and are handled in a later patch. Edge triggered interrupts are not advertised as maskable and are implemented here using a simple and efficient IRQ handler. Signed-off-by: Antonios Motakis [Baptiste Reynal: fix masked interrupt initialization] Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index df5c919..611ec80 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -39,12 +39,92 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, return -EINVAL; } +static irqreturn_t vfio_irq_handler(int irq, void *dev_id) +{ + struct vfio_platform_irq *irq_ctx = dev_id; + + eventfd_signal(irq_ctx->trigger, 1); + + return IRQ_HANDLED; +} + +static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, + int fd, irq_handler_t handler) +{ + struct vfio_platform_irq *irq = &vdev->irqs[index]; + struct eventfd_ctx *trigger; + int ret; + + if (irq->trigger) { + free_irq(irq->hwirq, irq); + kfree(irq->name); + eventfd_ctx_put(irq->trigger); + irq->trigger = NULL; + } + + if (fd < 0) /* Disable only */ + return 0; + + irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", + irq->hwirq, vdev->name); + if (!irq->name) + return -ENOMEM; + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + kfree(irq->name); + return PTR_ERR(trigger); + } + + irq->trigger = trigger; + + ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); + if (ret) { + kfree(irq->name); + eventfd_ctx_put(trigger); + irq->trigger = NULL; + return ret; + } + + return 0; +} + static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - return -EINVAL; + struct vfio_platform_irq *irq = &vdev->irqs[index]; + irq_handler_t handler; + + if (vdev->irqs[index].flags & VFIO_IRQ_INFO_AUTOMASKED) + return -EINVAL; /* not implemented */ + else + handler = vfio_irq_handler; + + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) + return vfio_set_trigger(vdev, index, -1, handler); + + if (start != 0 || count != 1) + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + return vfio_set_trigger(vdev, index, fd, handler); + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + handler(irq->hwirq, irq); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger = *(uint8_t *)data; + + if (trigger) + handler(irq->hwirq, irq); + } + + return 0; } int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, @@ -90,7 +170,12 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) if (hwirq < 0) goto err; - vdev->irqs[i].flags = 0; + vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; + + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) + vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE + | VFIO_IRQ_INFO_AUTOMASKED; + vdev->irqs[i].count = 1; vdev->irqs[i].hwirq = hwirq; } @@ -105,6 +190,11 @@ err: void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) { + int i; + + for (i = 0; i < vdev->num_irqs; i++) + vfio_set_trigger(vdev, i, -1, NULL); + vdev->num_irqs = 0; kfree(vdev->irqs); } diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index b119a6c..aa01cc3 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -31,6 +31,8 @@ struct vfio_platform_irq { u32 flags; u32 count; int hwirq; + char *name; + struct eventfd_ctx *trigger; }; struct vfio_platform_region { -- cgit v0.10.2 From 06211b40ce6b63903fe03831fd075a25630dc856 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:50 -0600 Subject: vfio/platform: support for level sensitive interrupts Level sensitive interrupts are exposed as maskable and automasked interrupts and are masked and disabled automatically when they fire. Signed-off-by: Antonios Motakis [Baptiste Reynal: Move masked interrupt initialization from "vfio/platform: trigger an interrupt via eventfd"] Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index 611ec80..e0e6388 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -23,12 +23,59 @@ #include "vfio_platform_private.h" +static void vfio_platform_mask(struct vfio_platform_irq *irq_ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_ctx->lock, flags); + + if (!irq_ctx->masked) { + disable_irq_nosync(irq_ctx->hwirq); + irq_ctx->masked = true; + } + + spin_unlock_irqrestore(&irq_ctx->lock, flags); +} + static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - return -EINVAL; + if (start != 0 || count != 1) + return -EINVAL; + + if (!(vdev->irqs[index].flags & VFIO_IRQ_INFO_MASKABLE)) + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) + return -EINVAL; /* not implemented yet */ + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + vfio_platform_mask(&vdev->irqs[index]); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t mask = *(uint8_t *)data; + + if (mask) + vfio_platform_mask(&vdev->irqs[index]); + } + + return 0; +} + +static void vfio_platform_unmask(struct vfio_platform_irq *irq_ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_ctx->lock, flags); + + if (irq_ctx->masked) { + enable_irq(irq_ctx->hwirq); + irq_ctx->masked = false; + } + + spin_unlock_irqrestore(&irq_ctx->lock, flags); } static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, @@ -36,7 +83,50 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, unsigned count, uint32_t flags, void *data) { - return -EINVAL; + if (start != 0 || count != 1) + return -EINVAL; + + if (!(vdev->irqs[index].flags & VFIO_IRQ_INFO_MASKABLE)) + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) + return -EINVAL; /* not implemented yet */ + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + vfio_platform_unmask(&vdev->irqs[index]); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t unmask = *(uint8_t *)data; + + if (unmask) + vfio_platform_unmask(&vdev->irqs[index]); + } + + return 0; +} + +static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) +{ + struct vfio_platform_irq *irq_ctx = dev_id; + unsigned long flags; + int ret = IRQ_NONE; + + spin_lock_irqsave(&irq_ctx->lock, flags); + + if (!irq_ctx->masked) { + ret = IRQ_HANDLED; + + /* automask maskable interrupts */ + disable_irq_nosync(irq_ctx->hwirq); + irq_ctx->masked = true; + } + + spin_unlock_irqrestore(&irq_ctx->lock, flags); + + if (ret == IRQ_HANDLED) + eventfd_signal(irq_ctx->trigger, 1); + + return ret; } static irqreturn_t vfio_irq_handler(int irq, void *dev_id) @@ -78,6 +168,7 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, irq->trigger = trigger; + irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); if (ret) { kfree(irq->name); @@ -86,6 +177,9 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, return ret; } + if (!irq->masked) + enable_irq(irq->hwirq); + return 0; } @@ -98,7 +192,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, irq_handler_t handler; if (vdev->irqs[index].flags & VFIO_IRQ_INFO_AUTOMASKED) - return -EINVAL; /* not implemented */ + handler = vfio_automasked_irq_handler; else handler = vfio_irq_handler; @@ -170,6 +264,8 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) if (hwirq < 0) goto err; + spin_lock_init(&vdev->irqs[i].lock); + vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) @@ -178,6 +274,7 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) vdev->irqs[i].count = 1; vdev->irqs[i].hwirq = hwirq; + vdev->irqs[i].masked = false; } vdev->num_irqs = cnt; diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index aa01cc3..ff2db1d 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -33,6 +33,8 @@ struct vfio_platform_irq { int hwirq; char *name; struct eventfd_ctx *trigger; + bool masked; + spinlock_t lock; }; struct vfio_platform_region { -- cgit v0.10.2 From bdc5e1021b7fa061d21e64fc6d308ee0ef3c7582 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:51 -0600 Subject: vfio: add a vfio_ prefix to virqfd_enable and virqfd_disable and export We want to reuse virqfd functionality in multiple VFIO drivers; before moving these functions to core VFIO, add the vfio_ prefix to the virqfd_enable and virqfd_disable functions, and export them so they can be used from other modules. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 2027a27..09e38b9 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -126,10 +126,10 @@ static void virqfd_inject(struct work_struct *work) virqfd->thread(virqfd->vdev, virqfd->data); } -static int virqfd_enable(struct vfio_pci_device *vdev, - int (*handler)(struct vfio_pci_device *, void *), - void (*thread)(struct vfio_pci_device *, void *), - void *data, struct virqfd **pvirqfd, int fd) +int vfio_virqfd_enable(struct vfio_pci_device *vdev, + int (*handler)(struct vfio_pci_device *, void *), + void (*thread)(struct vfio_pci_device *, void *), + void *data, struct virqfd **pvirqfd, int fd) { struct fd irqfd; struct eventfd_ctx *ctx; @@ -215,9 +215,9 @@ err_fd: return ret; } +EXPORT_SYMBOL_GPL(vfio_virqfd_enable); -static void virqfd_disable(struct vfio_pci_device *vdev, - struct virqfd **pvirqfd) +void vfio_virqfd_disable(struct vfio_pci_device *vdev, struct virqfd **pvirqfd) { unsigned long flags; @@ -237,6 +237,7 @@ static void virqfd_disable(struct vfio_pci_device *vdev, */ flush_workqueue(vfio_irqfd_cleanup_wq); } +EXPORT_SYMBOL_GPL(vfio_virqfd_disable); /* * INTx @@ -440,8 +441,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev, &vdev->ctx[0].unmask); - virqfd_disable(vdev, &vdev->ctx[0].mask); + vfio_virqfd_disable(vdev, &vdev->ctx[0].unmask); + vfio_virqfd_disable(vdev, &vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -605,8 +606,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev, &vdev->ctx[i].unmask); - virqfd_disable(vdev, &vdev->ctx[i].mask); + vfio_virqfd_disable(vdev, &vdev->ctx[i].unmask); + vfio_virqfd_disable(vdev, &vdev->ctx[i].mask); } if (msix) { @@ -639,11 +640,12 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) - return virqfd_enable(vdev, vfio_pci_intx_unmask_handler, - vfio_send_intx_eventfd, NULL, - &vdev->ctx[0].unmask, fd); + return vfio_virqfd_enable(vdev, + vfio_pci_intx_unmask_handler, + vfio_send_intx_eventfd, NULL, + &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev, &vdev->ctx[0].unmask); + vfio_virqfd_disable(vdev, &vdev->ctx[0].unmask); } return 0; -- cgit v0.10.2 From bb78e9eaab919b1ede37d62056b554bba611a012 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:52 -0600 Subject: vfio: virqfd: rename vfio_pci_virqfd_init and vfio_pci_virqfd_exit The functions vfio_pci_virqfd_init and vfio_pci_virqfd_exit are not really PCI specific, since we plan to reuse the virqfd code with more VFIO drivers in addition to VFIO_PCI. Signed-off-by: Antonios Motakis [Baptiste Reynal: Move rename vfio_pci_virqfd_init and vfio_pci_virqfd_exit from "vfio: add a vfio_ prefix to virqfd_enable and virqfd_disable and export"] Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f8a1863..668d37c 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1030,7 +1030,7 @@ put_devs: static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); - vfio_pci_virqfd_exit(); + vfio_virqfd_exit(); vfio_pci_uninit_perm_bits(); } @@ -1044,7 +1044,7 @@ static int __init vfio_pci_init(void) return ret; /* Start the virqfd cleanup handler */ - ret = vfio_pci_virqfd_init(); + ret = vfio_virqfd_init(); if (ret) goto out_virqfd; @@ -1056,7 +1056,7 @@ static int __init vfio_pci_init(void) return 0; out_driver: - vfio_pci_virqfd_exit(); + vfio_virqfd_exit(); out_virqfd: vfio_pci_uninit_perm_bits(); return ret; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 09e38b9..fa033c3 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -45,7 +45,7 @@ struct virqfd { static struct workqueue_struct *vfio_irqfd_cleanup_wq; -int __init vfio_pci_virqfd_init(void) +int __init vfio_virqfd_init(void) { vfio_irqfd_cleanup_wq = create_singlethread_workqueue("vfio-irqfd-cleanup"); @@ -55,7 +55,7 @@ int __init vfio_pci_virqfd_init(void) return 0; } -void vfio_pci_virqfd_exit(void) +void vfio_virqfd_exit(void) { destroy_workqueue(vfio_irqfd_cleanup_wq); } diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index c9f9b32..0253965 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -87,8 +87,8 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); -extern int vfio_pci_virqfd_init(void); -extern void vfio_pci_virqfd_exit(void); +extern int vfio_virqfd_init(void); +extern void vfio_virqfd_exit(void); extern int vfio_config_init(struct vfio_pci_device *vdev); extern void vfio_config_free(struct vfio_pci_device *vdev); -- cgit v0.10.2 From 9269c393e7a971f98c0f54d7fc350ac7636d1fa5 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:52 -0600 Subject: vfio: add local lock for virqfd instead of depending on VFIO PCI The Virqfd code needs to keep accesses to any struct *virqfd safe, but this comes into play only when creating or destroying eventfds, so sharing the same spinlock with the VFIO bus driver is not necessary. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index fa033c3..de06979 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -44,6 +44,7 @@ struct virqfd { }; static struct workqueue_struct *vfio_irqfd_cleanup_wq; +DEFINE_SPINLOCK(virqfd_lock); int __init vfio_virqfd_init(void) { @@ -80,21 +81,21 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) if (flags & POLLHUP) { unsigned long flags; - spin_lock_irqsave(&virqfd->vdev->irqlock, flags); + spin_lock_irqsave(&virqfd_lock, flags); /* * The eventfd is closing, if the virqfd has not yet been * queued for release, as determined by testing whether the - * vdev pointer to it is still valid, queue it now. As + * virqfd pointer to it is still valid, queue it now. As * with kvm irqfds, we know we won't race against the virqfd - * going away because we hold wqh->lock to get here. + * going away because we hold the lock to get here. */ if (*(virqfd->pvirqfd) == virqfd) { *(virqfd->pvirqfd) = NULL; virqfd_deactivate(virqfd); } - spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); + spin_unlock_irqrestore(&virqfd_lock, flags); } return 0; @@ -170,16 +171,16 @@ int vfio_virqfd_enable(struct vfio_pci_device *vdev, * we update the pointer to the virqfd under lock to avoid * pushing multiple jobs to release the same virqfd. */ - spin_lock_irq(&vdev->irqlock); + spin_lock_irq(&virqfd_lock); if (*pvirqfd) { - spin_unlock_irq(&vdev->irqlock); + spin_unlock_irq(&virqfd_lock); ret = -EBUSY; goto err_busy; } *pvirqfd = virqfd; - spin_unlock_irq(&vdev->irqlock); + spin_unlock_irq(&virqfd_lock); /* * Install our own custom wake-up handling so we are notified via @@ -217,18 +218,18 @@ err_fd: } EXPORT_SYMBOL_GPL(vfio_virqfd_enable); -void vfio_virqfd_disable(struct vfio_pci_device *vdev, struct virqfd **pvirqfd) +void vfio_virqfd_disable(struct virqfd **pvirqfd) { unsigned long flags; - spin_lock_irqsave(&vdev->irqlock, flags); + spin_lock_irqsave(&virqfd_lock, flags); if (*pvirqfd) { virqfd_deactivate(*pvirqfd); *pvirqfd = NULL; } - spin_unlock_irqrestore(&vdev->irqlock, flags); + spin_unlock_irqrestore(&virqfd_lock, flags); /* * Block until we know all outstanding shutdown jobs have completed. @@ -441,8 +442,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - vfio_virqfd_disable(vdev, &vdev->ctx[0].unmask); - vfio_virqfd_disable(vdev, &vdev->ctx[0].mask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -606,8 +607,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - vfio_virqfd_disable(vdev, &vdev->ctx[i].unmask); - vfio_virqfd_disable(vdev, &vdev->ctx[i].mask); + vfio_virqfd_disable(&vdev->ctx[i].unmask); + vfio_virqfd_disable(&vdev->ctx[i].mask); } if (msix) { @@ -645,7 +646,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, vfio_send_intx_eventfd, NULL, &vdev->ctx[0].unmask, fd); - vfio_virqfd_disable(vdev, &vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); } return 0; -- cgit v0.10.2 From 09bbcb8810c4673cb96477e0e83c9bcdfadc7741 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:53 -0600 Subject: vfio: pass an opaque pointer on virqfd initialization VFIO_PCI passes the VFIO device structure *vdev via eventfd to the handler that implements masking/unmasking of IRQs via an eventfd. We can replace it in the virqfd infrastructure with an opaque type so we can make use of the mechanism from other VFIO bus drivers. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index de06979..c84a129 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -31,10 +31,10 @@ * IRQfd - generic */ struct virqfd { - struct vfio_pci_device *vdev; + void *opaque; struct eventfd_ctx *eventfd; - int (*handler)(struct vfio_pci_device *, void *); - void (*thread)(struct vfio_pci_device *, void *); + int (*handler)(void *, void *); + void (*thread)(void *, void *); void *data; struct work_struct inject; wait_queue_t wait; @@ -74,7 +74,7 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) if (flags & POLLIN) { /* An event has been signaled, call function */ if ((!virqfd->handler || - virqfd->handler(virqfd->vdev, virqfd->data)) && + virqfd->handler(virqfd->opaque, virqfd->data)) && virqfd->thread) schedule_work(&virqfd->inject); } @@ -124,12 +124,12 @@ static void virqfd_inject(struct work_struct *work) { struct virqfd *virqfd = container_of(work, struct virqfd, inject); if (virqfd->thread) - virqfd->thread(virqfd->vdev, virqfd->data); + virqfd->thread(virqfd->opaque, virqfd->data); } -int vfio_virqfd_enable(struct vfio_pci_device *vdev, - int (*handler)(struct vfio_pci_device *, void *), - void (*thread)(struct vfio_pci_device *, void *), +int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd) { struct fd irqfd; @@ -143,7 +143,7 @@ int vfio_virqfd_enable(struct vfio_pci_device *vdev, return -ENOMEM; virqfd->pvirqfd = pvirqfd; - virqfd->vdev = vdev; + virqfd->opaque = opaque; virqfd->handler = handler; virqfd->thread = thread; virqfd->data = data; @@ -196,7 +196,7 @@ int vfio_virqfd_enable(struct vfio_pci_device *vdev, * before we registered and trigger it as if we didn't miss it. */ if (events & POLLIN) { - if ((!handler || handler(vdev, data)) && thread) + if ((!handler || handler(opaque, data)) && thread) schedule_work(&virqfd->inject); } @@ -243,8 +243,10 @@ EXPORT_SYMBOL_GPL(vfio_virqfd_disable); /* * INTx */ -static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused) +static void vfio_send_intx_eventfd(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; + if (likely(is_intx(vdev) && !vdev->virq_disabled)) eventfd_signal(vdev->ctx[0].trigger, 1); } @@ -287,9 +289,9 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev) * a signal is necessary, which can then be handled via a work queue * or directly depending on the caller. */ -static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, - void *unused) +static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; unsigned long flags; int ret = 0; @@ -641,7 +643,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) - return vfio_virqfd_enable(vdev, + return vfio_virqfd_enable((void *) vdev, vfio_pci_intx_unmask_handler, vfio_send_intx_eventfd, NULL, &vdev->ctx[0].unmask, fd); -- cgit v0.10.2 From 7e992d692750b2938224eb43fee907181d92a602 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:54 -0600 Subject: vfio: move eventfd support code for VFIO_PCI to a separate file The virqfd functionality that is used by VFIO_PCI to implement interrupt masking and unmasking via an eventfd, is generic enough and can be reused by another driver. Move it to a separate file in order to allow the code to be shared. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 1310792..c7c8644 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,4 +1,5 @@ -vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o \ + ../virqfd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index c84a129..1f577b4 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -19,228 +19,13 @@ #include #include #include -#include #include #include -#include #include #include "vfio_pci_private.h" /* - * IRQfd - generic - */ -struct virqfd { - void *opaque; - struct eventfd_ctx *eventfd; - int (*handler)(void *, void *); - void (*thread)(void *, void *); - void *data; - struct work_struct inject; - wait_queue_t wait; - poll_table pt; - struct work_struct shutdown; - struct virqfd **pvirqfd; -}; - -static struct workqueue_struct *vfio_irqfd_cleanup_wq; -DEFINE_SPINLOCK(virqfd_lock); - -int __init vfio_virqfd_init(void) -{ - vfio_irqfd_cleanup_wq = - create_singlethread_workqueue("vfio-irqfd-cleanup"); - if (!vfio_irqfd_cleanup_wq) - return -ENOMEM; - - return 0; -} - -void vfio_virqfd_exit(void) -{ - destroy_workqueue(vfio_irqfd_cleanup_wq); -} - -static void virqfd_deactivate(struct virqfd *virqfd) -{ - queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); -} - -static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - struct virqfd *virqfd = container_of(wait, struct virqfd, wait); - unsigned long flags = (unsigned long)key; - - if (flags & POLLIN) { - /* An event has been signaled, call function */ - if ((!virqfd->handler || - virqfd->handler(virqfd->opaque, virqfd->data)) && - virqfd->thread) - schedule_work(&virqfd->inject); - } - - if (flags & POLLHUP) { - unsigned long flags; - spin_lock_irqsave(&virqfd_lock, flags); - - /* - * The eventfd is closing, if the virqfd has not yet been - * queued for release, as determined by testing whether the - * virqfd pointer to it is still valid, queue it now. As - * with kvm irqfds, we know we won't race against the virqfd - * going away because we hold the lock to get here. - */ - if (*(virqfd->pvirqfd) == virqfd) { - *(virqfd->pvirqfd) = NULL; - virqfd_deactivate(virqfd); - } - - spin_unlock_irqrestore(&virqfd_lock, flags); - } - - return 0; -} - -static void virqfd_ptable_queue_proc(struct file *file, - wait_queue_head_t *wqh, poll_table *pt) -{ - struct virqfd *virqfd = container_of(pt, struct virqfd, pt); - add_wait_queue(wqh, &virqfd->wait); -} - -static void virqfd_shutdown(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - u64 cnt; - - eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); - flush_work(&virqfd->inject); - eventfd_ctx_put(virqfd->eventfd); - - kfree(virqfd); -} - -static void virqfd_inject(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, inject); - if (virqfd->thread) - virqfd->thread(virqfd->opaque, virqfd->data); -} - -int vfio_virqfd_enable(void *opaque, - int (*handler)(void *, void *), - void (*thread)(void *, void *), - void *data, struct virqfd **pvirqfd, int fd) -{ - struct fd irqfd; - struct eventfd_ctx *ctx; - struct virqfd *virqfd; - int ret = 0; - unsigned int events; - - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); - if (!virqfd) - return -ENOMEM; - - virqfd->pvirqfd = pvirqfd; - virqfd->opaque = opaque; - virqfd->handler = handler; - virqfd->thread = thread; - virqfd->data = data; - - INIT_WORK(&virqfd->shutdown, virqfd_shutdown); - INIT_WORK(&virqfd->inject, virqfd_inject); - - irqfd = fdget(fd); - if (!irqfd.file) { - ret = -EBADF; - goto err_fd; - } - - ctx = eventfd_ctx_fileget(irqfd.file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto err_ctx; - } - - virqfd->eventfd = ctx; - - /* - * virqfds can be released by closing the eventfd or directly - * through ioctl. These are both done through a workqueue, so - * we update the pointer to the virqfd under lock to avoid - * pushing multiple jobs to release the same virqfd. - */ - spin_lock_irq(&virqfd_lock); - - if (*pvirqfd) { - spin_unlock_irq(&virqfd_lock); - ret = -EBUSY; - goto err_busy; - } - *pvirqfd = virqfd; - - spin_unlock_irq(&virqfd_lock); - - /* - * Install our own custom wake-up handling so we are notified via - * a callback whenever someone signals the underlying eventfd. - */ - init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); - init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); - - events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); - - /* - * Check if there was an event already pending on the eventfd - * before we registered and trigger it as if we didn't miss it. - */ - if (events & POLLIN) { - if ((!handler || handler(opaque, data)) && thread) - schedule_work(&virqfd->inject); - } - - /* - * Do not drop the file until the irqfd is fully initialized, - * otherwise we might race against the POLLHUP. - */ - fdput(irqfd); - - return 0; -err_busy: - eventfd_ctx_put(ctx); -err_ctx: - fdput(irqfd); -err_fd: - kfree(virqfd); - - return ret; -} -EXPORT_SYMBOL_GPL(vfio_virqfd_enable); - -void vfio_virqfd_disable(struct virqfd **pvirqfd) -{ - unsigned long flags; - - spin_lock_irqsave(&virqfd_lock, flags); - - if (*pvirqfd) { - virqfd_deactivate(*pvirqfd); - *pvirqfd = NULL; - } - - spin_unlock_irqrestore(&virqfd_lock, flags); - - /* - * Block until we know all outstanding shutdown jobs have completed. - * Even if we don't queue the job, flush the wq to be sure it's - * been released. - */ - flush_workqueue(vfio_irqfd_cleanup_wq); -} -EXPORT_SYMBOL_GPL(vfio_virqfd_disable); - -/* * INTx */ static void vfio_send_intx_eventfd(void *opaque, void *unused) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 0253965..ae0e1b4 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -87,9 +87,6 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); -extern int vfio_virqfd_init(void); -extern void vfio_virqfd_exit(void); - extern int vfio_config_init(struct vfio_pci_device *vdev); extern void vfio_config_free(struct vfio_pci_device *vdev); #endif /* VFIO_PCI_PRIVATE_H */ diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c new file mode 100644 index 0000000..5967899 --- /dev/null +++ b/drivers/vfio/virqfd.c @@ -0,0 +1,213 @@ +/* + * VFIO generic eventfd code for IRQFD support. + * Derived from drivers/vfio/pci/vfio_pci_intrs.c + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +static struct workqueue_struct *vfio_irqfd_cleanup_wq; +DEFINE_SPINLOCK(virqfd_lock); + +int __init vfio_virqfd_init(void) +{ + vfio_irqfd_cleanup_wq = + create_singlethread_workqueue("vfio-irqfd-cleanup"); + if (!vfio_irqfd_cleanup_wq) + return -ENOMEM; + + return 0; +} + +void vfio_virqfd_exit(void) +{ + destroy_workqueue(vfio_irqfd_cleanup_wq); +} + +static void virqfd_deactivate(struct virqfd *virqfd) +{ + queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); +} + +static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct virqfd *virqfd = container_of(wait, struct virqfd, wait); + unsigned long flags = (unsigned long)key; + + if (flags & POLLIN) { + /* An event has been signaled, call function */ + if ((!virqfd->handler || + virqfd->handler(virqfd->opaque, virqfd->data)) && + virqfd->thread) + schedule_work(&virqfd->inject); + } + + if (flags & POLLHUP) { + unsigned long flags; + spin_lock_irqsave(&virqfd_lock, flags); + + /* + * The eventfd is closing, if the virqfd has not yet been + * queued for release, as determined by testing whether the + * virqfd pointer to it is still valid, queue it now. As + * with kvm irqfds, we know we won't race against the virqfd + * going away because we hold the lock to get here. + */ + if (*(virqfd->pvirqfd) == virqfd) { + *(virqfd->pvirqfd) = NULL; + virqfd_deactivate(virqfd); + } + + spin_unlock_irqrestore(&virqfd_lock, flags); + } + + return 0; +} + +static void virqfd_ptable_queue_proc(struct file *file, + wait_queue_head_t *wqh, poll_table *pt) +{ + struct virqfd *virqfd = container_of(pt, struct virqfd, pt); + add_wait_queue(wqh, &virqfd->wait); +} + +static void virqfd_shutdown(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); + u64 cnt; + + eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); + flush_work(&virqfd->inject); + eventfd_ctx_put(virqfd->eventfd); + + kfree(virqfd); +} + +static void virqfd_inject(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, inject); + if (virqfd->thread) + virqfd->thread(virqfd->opaque, virqfd->data); +} + +int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), + void *data, struct virqfd **pvirqfd, int fd) +{ + struct fd irqfd; + struct eventfd_ctx *ctx; + struct virqfd *virqfd; + int ret = 0; + unsigned int events; + + virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); + if (!virqfd) + return -ENOMEM; + + virqfd->pvirqfd = pvirqfd; + virqfd->opaque = opaque; + virqfd->handler = handler; + virqfd->thread = thread; + virqfd->data = data; + + INIT_WORK(&virqfd->shutdown, virqfd_shutdown); + INIT_WORK(&virqfd->inject, virqfd_inject); + + irqfd = fdget(fd); + if (!irqfd.file) { + ret = -EBADF; + goto err_fd; + } + + ctx = eventfd_ctx_fileget(irqfd.file); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto err_ctx; + } + + virqfd->eventfd = ctx; + + /* + * virqfds can be released by closing the eventfd or directly + * through ioctl. These are both done through a workqueue, so + * we update the pointer to the virqfd under lock to avoid + * pushing multiple jobs to release the same virqfd. + */ + spin_lock_irq(&virqfd_lock); + + if (*pvirqfd) { + spin_unlock_irq(&virqfd_lock); + ret = -EBUSY; + goto err_busy; + } + *pvirqfd = virqfd; + + spin_unlock_irq(&virqfd_lock); + + /* + * Install our own custom wake-up handling so we are notified via + * a callback whenever someone signals the underlying eventfd. + */ + init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); + init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); + + events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); + + /* + * Check if there was an event already pending on the eventfd + * before we registered and trigger it as if we didn't miss it. + */ + if (events & POLLIN) { + if ((!handler || handler(opaque, data)) && thread) + schedule_work(&virqfd->inject); + } + + /* + * Do not drop the file until the irqfd is fully initialized, + * otherwise we might race against the POLLHUP. + */ + fdput(irqfd); + + return 0; +err_busy: + eventfd_ctx_put(ctx); +err_ctx: + fdput(irqfd); +err_fd: + kfree(virqfd); + + return ret; +} +EXPORT_SYMBOL_GPL(vfio_virqfd_enable); + +void vfio_virqfd_disable(struct virqfd **pvirqfd) +{ + unsigned long flags; + + spin_lock_irqsave(&virqfd_lock, flags); + + if (*pvirqfd) { + virqfd_deactivate(*pvirqfd); + *pvirqfd = NULL; + } + + spin_unlock_irqrestore(&virqfd_lock, flags); + + /* + * Block until we know all outstanding shutdown jobs have completed. + * Even if we don't queue the job, flush the wq to be sure it's + * been released. + */ + flush_workqueue(vfio_irqfd_cleanup_wq); +} +EXPORT_SYMBOL_GPL(vfio_virqfd_disable); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2d67b89..683b514 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -14,6 +14,8 @@ #include #include +#include +#include #include /** @@ -123,4 +125,29 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return -ENOTTY; } #endif /* CONFIG_EEH */ + +/* + * IRQfd - generic + */ +struct virqfd { + void *opaque; + struct eventfd_ctx *eventfd; + int (*handler)(void *, void *); + void (*thread)(void *, void *); + void *data; + struct work_struct inject; + wait_queue_t wait; + poll_table pt; + struct work_struct shutdown; + struct virqfd **pvirqfd; +}; + +extern int vfio_virqfd_init(void); +extern void vfio_virqfd_exit(void); +extern int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), + void *data, struct virqfd **pvirqfd, int fd); +extern void vfio_virqfd_disable(struct virqfd **pvirqfd); + #endif /* VFIO_H */ -- cgit v0.10.2 From 42ac9bd18d4fc28c36c7927847f0f6e90ecd7710 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:54 -0600 Subject: vfio: initialize the virqfd workqueue in VFIO generic code Now we have finally completely decoupled virqfd from VFIO_PCI. We can initialize it from the VFIO generic code, in order to safely use it from multiple independent VFIO bus drivers. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index dadf0ca..d798b09 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,6 @@ -obj-$(CONFIG_VFIO) += vfio.o +vfio_core-y := vfio.o virqfd.o + +obj-$(CONFIG_VFIO) += vfio_core.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index c7c8644..1310792 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,5 +1,4 @@ -vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o \ - ../virqfd.o +vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 668d37c..2f865d07 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1030,7 +1030,6 @@ put_devs: static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); - vfio_virqfd_exit(); vfio_pci_uninit_perm_bits(); } @@ -1043,11 +1042,6 @@ static int __init vfio_pci_init(void) if (ret) return ret; - /* Start the virqfd cleanup handler */ - ret = vfio_virqfd_init(); - if (ret) - goto out_virqfd; - /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); if (ret) @@ -1056,8 +1050,6 @@ static int __init vfio_pci_init(void) return 0; out_driver: - vfio_virqfd_exit(); -out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 4cde855..23ba12a 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1553,6 +1553,11 @@ static int __init vfio_init(void) if (ret) goto err_cdev_add; + /* Start the virqfd cleanup handler used by some VFIO bus drivers */ + ret = vfio_virqfd_init(); + if (ret) + goto err_virqfd; + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); /* @@ -1565,6 +1570,8 @@ static int __init vfio_init(void) return 0; +err_virqfd: + cdev_del(&vfio.group_cdev); err_cdev_add: unregister_chrdev_region(vfio.group_devt, MINORMASK); err_alloc_chrdev: @@ -1579,6 +1586,7 @@ static void __exit vfio_cleanup(void) { WARN_ON(!list_empty(&vfio.group_list)); + vfio_virqfd_exit(); idr_destroy(&vfio.group_idr); cdev_del(&vfio.group_cdev); unregister_chrdev_region(vfio.group_devt, MINORMASK); -- cgit v0.10.2 From a7fa7c77cf15fb22d0f33fcc88770de0246c5588 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:55 -0600 Subject: vfio/platform: implement IRQ masking/unmasking via an eventfd With this patch the VFIO user will be able to set an eventfd that can be used in order to mask and unmask IRQs of platform devices. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index e0e6388..88bba57 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -37,6 +37,15 @@ static void vfio_platform_mask(struct vfio_platform_irq *irq_ctx) spin_unlock_irqrestore(&irq_ctx->lock, flags); } +static int vfio_platform_mask_handler(void *opaque, void *unused) +{ + struct vfio_platform_irq *irq_ctx = opaque; + + vfio_platform_mask(irq_ctx); + + return 0; +} + static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, @@ -48,8 +57,18 @@ static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, if (!(vdev->irqs[index].flags & VFIO_IRQ_INFO_MASKABLE)) return -EINVAL; - if (flags & VFIO_IRQ_SET_DATA_EVENTFD) - return -EINVAL; /* not implemented yet */ + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + if (fd >= 0) + return vfio_virqfd_enable((void *) &vdev->irqs[index], + vfio_platform_mask_handler, + NULL, NULL, + &vdev->irqs[index].mask, fd); + + vfio_virqfd_disable(&vdev->irqs[index].mask); + return 0; + } if (flags & VFIO_IRQ_SET_DATA_NONE) { vfio_platform_mask(&vdev->irqs[index]); @@ -78,6 +97,15 @@ static void vfio_platform_unmask(struct vfio_platform_irq *irq_ctx) spin_unlock_irqrestore(&irq_ctx->lock, flags); } +static int vfio_platform_unmask_handler(void *opaque, void *unused) +{ + struct vfio_platform_irq *irq_ctx = opaque; + + vfio_platform_unmask(irq_ctx); + + return 0; +} + static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, @@ -89,8 +117,19 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, if (!(vdev->irqs[index].flags & VFIO_IRQ_INFO_MASKABLE)) return -EINVAL; - if (flags & VFIO_IRQ_SET_DATA_EVENTFD) - return -EINVAL; /* not implemented yet */ + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + if (fd >= 0) + return vfio_virqfd_enable((void *) &vdev->irqs[index], + vfio_platform_unmask_handler, + NULL, NULL, + &vdev->irqs[index].unmask, + fd); + + vfio_virqfd_disable(&vdev->irqs[index].unmask); + return 0; + } if (flags & VFIO_IRQ_SET_DATA_NONE) { vfio_platform_unmask(&vdev->irqs[index]); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index ff2db1d..5d31e04 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -35,6 +35,8 @@ struct vfio_platform_irq { struct eventfd_ctx *trigger; bool masked; spinlock_t lock; + struct virqfd *unmask; + struct virqfd *mask; }; struct vfio_platform_region { -- cgit v0.10.2 From 2f51bf4be99386f49841b6365a85a5cabc148565 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 16 Mar 2015 14:08:56 -0600 Subject: vfio: put off the allocation of "minor" in vfio_create_group The next code fragment "list_for_each_entry" is not depend on "minor". With this patch, the free of "minor" in "list_for_each_entry" can be reduced, and there is no functional change. Signed-off-by: Zhen Lei Signed-off-by: Alex Williamson diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 23ba12a..86aac7e 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -234,22 +234,21 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) mutex_lock(&vfio.group_lock); - minor = vfio_alloc_group_minor(group); - if (minor < 0) { - vfio_group_unlock_and_free(group); - return ERR_PTR(minor); - } - /* Did we race creating this group? */ list_for_each_entry(tmp, &vfio.group_list, vfio_next) { if (tmp->iommu_group == iommu_group) { vfio_group_get(tmp); - vfio_free_group_minor(minor); vfio_group_unlock_and_free(group); return tmp; } } + minor = vfio_alloc_group_minor(group); + if (minor < 0) { + vfio_group_unlock_and_free(group); + return ERR_PTR(minor); + } + dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.group_devt), minor), group, "%d", iommu_group_id(iommu_group)); -- cgit v0.10.2 From 66fdc052d7dba5bb8386f7a1a38107ba8307a59e Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 17 Mar 2015 07:43:21 +0800 Subject: vfio: virqfd_lock can be static Signed-off-by: Fengguang Wu Reviewed-by: Eric Auger Signed-off-by: Alex Williamson diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 5967899..3d19aaf 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -16,7 +16,7 @@ #include static struct workqueue_struct *vfio_irqfd_cleanup_wq; -DEFINE_SPINLOCK(virqfd_lock); +static DEFINE_SPINLOCK(virqfd_lock); int __init vfio_virqfd_init(void) { -- cgit v0.10.2 From 71be3423a62be548c56bab5b818e1a1383e659d2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 17 Mar 2015 08:33:38 -0600 Subject: vfio: Split virqfd into a separate module for vfio bus drivers An unintended consequence of commit 42ac9bd18d4f ("vfio: initialize the virqfd workqueue in VFIO generic code") is that the vfio module is renamed to vfio_core so that it can include both vfio and virqfd. That's a user visible change that may break module loading scritps and it imposes eventfd support as a dependency on the core vfio code, which it's really not. virqfd is intended to be provided as a service to vfio bus drivers, so instead of wrapping it into vfio.ko, we can make it a stand-alone module toggled by vfio bus drivers. This has the additional benefit of removing initialization and exit from the core vfio code. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index d5322a4..7d092dd 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -13,6 +13,11 @@ config VFIO_SPAPR_EEH depends on EEH && VFIO_IOMMU_SPAPR_TCE default n +config VFIO_VIRQFD + tristate + depends on VFIO && EVENTFD + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index d798b09..7b8a31f 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,6 +1,7 @@ -vfio_core-y := vfio.o virqfd.o +vfio_virqfd-y := virqfd.o -obj-$(CONFIG_VFIO) += vfio_core.o +obj-$(CONFIG_VFIO) += vfio.o +obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index c6bb5da..579d83b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,6 +1,7 @@ config VFIO_PCI tristate "VFIO support for PCI devices" depends on VFIO && PCI && EVENTFD + select VFIO_VIRQFD help Support for the PCI VFIO bus driver. This is required to make use of PCI drivers using the VFIO framework. diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index c0a3bff..9a4403e 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,6 +1,7 @@ config VFIO_PLATFORM tristate "VFIO support for platform devices" depends on VFIO && EVENTFD && ARM + select VFIO_VIRQFD help Support for platform devices with VFIO. This is required to make use of platform devices present on the system using the VFIO diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 86aac7e..0d33662 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1552,11 +1552,6 @@ static int __init vfio_init(void) if (ret) goto err_cdev_add; - /* Start the virqfd cleanup handler used by some VFIO bus drivers */ - ret = vfio_virqfd_init(); - if (ret) - goto err_virqfd; - pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); /* @@ -1569,8 +1564,6 @@ static int __init vfio_init(void) return 0; -err_virqfd: - cdev_del(&vfio.group_cdev); err_cdev_add: unregister_chrdev_region(vfio.group_devt, MINORMASK); err_alloc_chrdev: @@ -1585,7 +1578,6 @@ static void __exit vfio_cleanup(void) { WARN_ON(!list_empty(&vfio.group_list)); - vfio_virqfd_exit(); idr_destroy(&vfio.group_idr); cdev_del(&vfio.group_cdev); unregister_chrdev_region(vfio.group_devt, MINORMASK); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 3d19aaf..27c89cd 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -13,12 +13,17 @@ #include #include #include +#include #include +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Alex Williamson " +#define DRIVER_DESC "IRQFD support for VFIO bus drivers" + static struct workqueue_struct *vfio_irqfd_cleanup_wq; static DEFINE_SPINLOCK(virqfd_lock); -int __init vfio_virqfd_init(void) +static int __init vfio_virqfd_init(void) { vfio_irqfd_cleanup_wq = create_singlethread_workqueue("vfio-irqfd-cleanup"); @@ -28,7 +33,7 @@ int __init vfio_virqfd_init(void) return 0; } -void vfio_virqfd_exit(void) +static void __exit vfio_virqfd_exit(void) { destroy_workqueue(vfio_irqfd_cleanup_wq); } @@ -211,3 +216,11 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) flush_workqueue(vfio_irqfd_cleanup_wq); } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); + +module_init(vfio_virqfd_init); +module_exit(vfio_virqfd_exit); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 683b514..cbed15f 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -142,8 +142,6 @@ struct virqfd { struct virqfd **pvirqfd; }; -extern int vfio_virqfd_init(void); -extern void vfio_virqfd_exit(void); extern int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), -- cgit v0.10.2 From 0ea18b40cdd382a8a338d02e06c4646c35bd753b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:38 -0600 Subject: vgaarb: Stub vga_set_legacy_decoding() vga_set_legacy_decoding() is defined in drivers/gpu/vga/vgaarb.c, which is only compiled with CONFIG_VGA_ARB. A caller would therefore get an undefined symbol if the VGA arbiter is not enabled. Signed-off-by: Alex Williamson Acked-by: Dave Airlie diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index c37bd4d..8c3b412 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -65,8 +65,13 @@ struct pci_dev; * out of the arbitration process (and can be safe to take * interrupts at any time. */ +#if defined(CONFIG_VGA_ARB) extern void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes); +#else +static inline void vga_set_legacy_decoding(struct pci_dev *pdev, + unsigned int decodes) { }; +#endif /** * vga_get - acquire & locks VGA resources -- cgit v0.10.2 From 88c0dead9fb2cc66962b64064770558eecf3eafd Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:40 -0600 Subject: vfio-pci: Add module option to disable VGA region access Add a module option so that we don't require a CONFIG change and kernel rebuild to disable VGA support. Not only can VGA support be troublesome in itself, but by disabling it we can reduce the impact to host devices by doing a VGA arbitration opt-out. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 2f865d07..7053110 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -37,8 +37,23 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); +#ifdef CONFIG_VFIO_PCI_VGA +static bool disable_vga; +module_param(disable_vga, bool, S_IRUGO); +MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci"); +#endif + static DEFINE_MUTEX(driver_lock); +static inline bool vfio_vga_disabled(void) +{ +#ifdef CONFIG_VFIO_PCI_VGA + return disable_vga; +#else + return true; +#endif +} + static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static int vfio_pci_enable(struct vfio_pci_device *vdev) @@ -93,10 +108,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; -#ifdef CONFIG_VFIO_PCI_VGA - if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + if (!vfio_vga_disabled() && (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) vdev->has_vga = true; -#endif return 0; } -- cgit v0.10.2 From ecaa1f6a01544604de5f9531379a303eee886162 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:41 -0600 Subject: vfio-pci: Add VGA arbiter client If VFIO VGA access is disabled for the user, either by CONFIG option or module parameter, we can often opt-out of VGA arbitration. We can do this when PCI bridge control of VGA routing is possible. This means that we must have a parent bridge and there must only be a single VGA device below that bridge. Fortunately this is the typical case for discrete GPUs. Doing this allows us to minimize the impact of additional GPUs, in terms of VGA arbitration, when they are only used via vfio-pci for non-VGA applications. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 7053110..25aef05 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -54,6 +55,50 @@ static inline bool vfio_vga_disabled(void) #endif } +/* + * Our VGA arbiter participation is limited since we don't know anything + * about the device itself. However, if the device is the only VGA device + * downstream of a bridge and VFIO VGA support is disabled, then we can + * safely return legacy VGA IO and memory as not decoded since the user + * has no way to get to it and routing can be disabled externally at the + * bridge. + */ +static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga) +{ + struct vfio_pci_device *vdev = opaque; + struct pci_dev *tmp = NULL, *pdev = vdev->pdev; + unsigned char max_busnr; + unsigned int decodes; + + if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus)) + return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; + + max_busnr = pci_bus_max_busnr(pdev->bus); + decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; + + while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) { + if (tmp == pdev || + pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) || + pci_is_root_bus(tmp->bus)) + continue; + + if (tmp->bus->number >= pdev->bus->number && + tmp->bus->number <= max_busnr) { + pci_dev_put(tmp); + decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; + break; + } + } + + return decodes; +} + +static inline bool vfio_pci_is_vga(struct pci_dev *pdev) +{ + return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; +} + static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static int vfio_pci_enable(struct vfio_pci_device *vdev) @@ -108,7 +153,7 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; - if (!vfio_vga_disabled() && (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; return 0; @@ -900,6 +945,12 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) kfree(vdev); } + if (vfio_pci_is_vga(pdev)) { + vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode); + vga_set_legacy_decoding(pdev, + vfio_pci_set_vga_decode(vdev, false)); + } + return ret; } @@ -908,9 +959,17 @@ static void vfio_pci_remove(struct pci_dev *pdev) struct vfio_pci_device *vdev; vdev = vfio_del_group_dev(&pdev->dev); - if (vdev) { - iommu_group_put(pdev->dev.iommu_group); - kfree(vdev); + if (!vdev) + return; + + iommu_group_put(pdev->dev.iommu_group); + kfree(vdev); + + if (vfio_pci_is_vga(pdev)) { + vga_client_register(pdev, NULL, NULL, NULL); + vga_set_legacy_decoding(pdev, + VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); } } -- cgit v0.10.2 From 80c7e8cc2aaa36944acdfbce46f373101b9f21ff Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:43 -0600 Subject: vfio-pci: Allow PCI IDs to be specified as module options This copies the same support from pci-stub for exactly the same purpose, enabling a set of PCI IDs to be automatically added to the driver's dynamic ID table at module load time. The code here is pretty simple and both vfio-pci and pci-stub are fairly unique in being meta drivers, capable of attaching to any device, so there's no attempt made to generalize the code into pci-core. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 25aef05..43517ce 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -11,6 +11,8 @@ * Author: Tom Lyon, pugs@cisco.com */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -33,6 +35,10 @@ #define DRIVER_AUTHOR "Alex Williamson " #define DRIVER_DESC "VFIO PCI - User Level meta-driver" +static char ids[1024] __initdata; +module_param_string(ids, ids, sizeof(ids), 0); +MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); + static bool nointxmask; module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, @@ -1105,6 +1111,47 @@ static void __exit vfio_pci_cleanup(void) vfio_pci_uninit_perm_bits(); } +static void __init vfio_pci_fill_ids(void) +{ + char *p, *id; + int rc; + + /* no ids passed actually */ + if (ids[0] == '\0') + return; + + /* add ids specified in the module parameter */ + p = ids; + while ((id = strsep(&p, ","))) { + unsigned int vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields; + + if (!strlen(id)) + continue; + + fields = sscanf(id, "%x:%x:%x:%x:%x:%x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + + if (fields < 2) { + pr_warn("invalid id string \"%s\"\n", id); + continue; + } + + rc = pci_add_dynid(&vfio_pci_driver, vendor, device, + subvendor, subdevice, class, class_mask, 0); + if (rc) + pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n", + vendor, device, subvendor, subdevice, + class, class_mask, rc); + else + pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n", + vendor, device, subvendor, subdevice, + class, class_mask); + } +} + static int __init vfio_pci_init(void) { int ret; @@ -1119,6 +1166,8 @@ static int __init vfio_pci_init(void) if (ret) goto out_driver; + vfio_pci_fill_ids(); + return 0; out_driver: -- cgit v0.10.2 From 561d72ddbbe480e2cae6c1a63c688986586ef2b1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:44 -0600 Subject: vfio-pci: Remove warning if try-reset fails As indicated in the comment, this is not entirely uncommon and causes user concern for no reason. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 43517ce..d0f1e70 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -217,14 +217,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) * Try to reset the device. The success of this is dependent on * being able to lock the device, which is not always possible. */ - if (vdev->reset_works) { - int ret = pci_try_reset_function(pdev); - if (ret) - pr_warn("%s: Failed to reset device %s (%d)\n", - __func__, dev_name(&pdev->dev), ret); - else - vdev->needs_reset = false; - } + if (vdev->reset_works && !pci_try_reset_function(pdev)) + vdev->needs_reset = false; pci_restore_state(pdev); out: -- cgit v0.10.2 From 6eb7018705de09b4138ca05d6a157203e0ea791a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:46 -0600 Subject: vfio-pci: Move idle devices to D3hot power state We can save some power by putting devices that are bound to vfio-pci but not in use by the user in the D3hot power state. Devices get woken into D0 when opened by the user. Resets return the device to D0, so we need to re-apply the low power state after a bus reset. It's tempting to try to use D3cold, but we have no reason to inhibit hotplug of idle devices and we might get into a loop of having the device disappear before we have a chance to try to use it. A new module parameter allows this feature to be disabled if there are devices that misbehave as a result of this change. Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index d0f1e70..049b9e9 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -50,6 +50,11 @@ module_param(disable_vga, bool, S_IRUGO); MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci"); #endif +static bool disable_idle_d3; +module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable_idle_d3, + "Disable using the PCI D3 low power state for idle, unused devices"); + static DEFINE_MUTEX(driver_lock); static inline bool vfio_vga_disabled(void) @@ -114,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + pci_set_power_state(pdev, PCI_D0); + /* Don't allow our initial saved state to include busmaster */ pci_clear_master(pdev); @@ -225,6 +232,9 @@ out: pci_disable_device(pdev); vfio_pci_try_bus_reset(vdev); + + if (!disable_idle_d3) + pci_set_power_state(pdev, PCI_D3hot); } static void vfio_pci_release(void *device_data) @@ -951,6 +961,20 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vfio_pci_set_vga_decode(vdev, false)); } + if (!disable_idle_d3) { + /* + * pci-core sets the device power state to an unknown value at + * bootup and after being removed from a driver. The only + * transition it allows from this unknown state is to D0, which + * typically happens when a driver calls pci_enable_device(). + * We're not ready to enable the device yet, but we do want to + * be able to get to D3. Therefore first do a D0 transition + * before going to D3. + */ + pci_set_power_state(pdev, PCI_D0); + pci_set_power_state(pdev, PCI_D3hot); + } + return ret; } @@ -971,6 +995,9 @@ static void vfio_pci_remove(struct pci_dev *pdev) VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); } + + if (!disable_idle_d3) + pci_set_power_state(pdev, PCI_D0); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -1089,10 +1116,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) put_devs: for (i = 0; i < devs.cur_index; i++) { - if (!ret) { - tmp = vfio_device_data(devs.devices[i]); + tmp = vfio_device_data(devs.devices[i]); + if (!ret) tmp->needs_reset = false; - } + + if (!tmp->refcnt && !disable_idle_d3) + pci_set_power_state(tmp->pdev, PCI_D3hot); + vfio_device_put(devs.devices[i]); } -- cgit v0.10.2 From 5a0ff17741c1785b27229a16b5ab77470d71b170 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 8 Apr 2015 08:11:51 -0600 Subject: vfio-pci: Fix use after free Reported by 0-day test infrastructure. Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client") Signed-off-by: Alex Williamson diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 049b9e9..69fab0f 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -953,6 +953,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) { iommu_group_put(group); kfree(vdev); + return ret; } if (vfio_pci_is_vga(pdev)) { -- cgit v0.10.2