From 1460432cb513f0c16136ed132c20ecfbf8ccf942 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:05 -0400 Subject: iommu: Add iommu_device_group callback and iommu_group sysfs entry An IOMMU group is a set of devices for which the IOMMU cannot distinguish transactions. For PCI devices, a group often occurs when a PCI bridge is involved. Transactions from any device behind the bridge appear to be sourced from the bridge itself. We leave it to the IOMMU driver to define the grouping restraints for their platform. Using this new interface, the group for a device can be retrieved using the iommu_device_group() callback. Users will compare the value returned against the value returned for other devices to determine whether they are part of the same group. Devices with no group are not translated by the IOMMU. There should be no expectations about the group numbers as they may be arbitrarily assigned by the IOMMU driver and may not be persistent across boots. We also provide a sysfs interface to the group numbers here so that userspace can understand IOMMU dependencies between devices for managing safe, userspace drivers. [Some code changes by Joerg Roedel ] Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2fb2963..9c35be4 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -25,8 +25,59 @@ #include #include +static ssize_t show_iommu_group(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid)) + return 0; + + return sprintf(buf, "%u", groupid); +} +static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL); + +static int add_iommu_group(struct device *dev, void *data) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid) == 0) + return device_create_file(dev, &dev_attr_iommu_group); + + return 0; +} + +static int remove_iommu_group(struct device *dev) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid) == 0) + device_remove_file(dev, &dev_attr_iommu_group); + + return 0; +} + +static int iommu_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE) + return add_iommu_group(dev, NULL); + else if (action == BUS_NOTIFY_DEL_DEVICE) + return remove_iommu_group(dev); + + return 0; +} + +static struct notifier_block iommu_device_nb = { + .notifier_call = iommu_device_notifier, +}; + static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) { + bus_register_notifier(bus, &iommu_device_nb); + bus_for_each_dev(bus, NULL, NULL, add_iommu_group); } /** @@ -186,3 +237,12 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) return domain->ops->unmap(domain, iova, gfp_order); } EXPORT_SYMBOL_GPL(iommu_unmap); + +int iommu_device_group(struct device *dev, unsigned int *groupid) +{ + if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group) + return dev->bus->iommu_ops->device_group(dev, groupid); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_device_group); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 432acc4..93617e7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -61,6 +61,7 @@ struct iommu_ops { unsigned long iova); int (*domain_has_cap)(struct iommu_domain *domain, unsigned long cap); + int (*device_group)(struct device *dev, unsigned int *groupid); }; extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops); @@ -81,6 +82,7 @@ extern int iommu_domain_has_cap(struct iommu_domain *domain, unsigned long cap); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler); +extern int iommu_device_group(struct device *dev, unsigned int *groupid); /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework @@ -179,6 +181,11 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } +static inline int iommu_device_group(struct device *dev, unsigned int *groupid); +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ -- cgit v0.10.2 From 70ae6f0d55bd216b2f773fa5fa5018c0490a9e50 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:11 -0400 Subject: iommu/intel: Implement iommu_device_group We generally have BDF granularity for devices, so we just need to make sure devices aren't hidden behind PCIe-to-PCI bridges. We can then make up a group number that's simply the concatenated seg|bus|dev|fn so we don't have to track them (not that users should depend on that). Signed-off-by: Alex Williamson Acked-By: David Woodhouse Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c0c7820..39ca6bb 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4060,6 +4060,51 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +/* + * Group numbers are arbitrary. Device with the same group number + * indicate the iommu cannot differentiate between them. To avoid + * tracking used groups we just use the seg|bus|devfn of the lowest + * level we're able to differentiate devices + */ +static int intel_iommu_device_group(struct device *dev, unsigned int *groupid) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev *bridge; + union { + struct { + u8 devfn; + u8 bus; + u16 segment; + } pci; + u32 group; + } id; + + if (iommu_no_mapping(dev)) + return -ENODEV; + + id.pci.segment = pci_domain_nr(pdev->bus); + id.pci.bus = pdev->bus->number; + id.pci.devfn = pdev->devfn; + + if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn)) + return -ENODEV; + + bridge = pci_find_upstream_pcie_bridge(pdev); + if (bridge) { + if (pci_is_pcie(bridge)) { + id.pci.bus = bridge->subordinate->number; + id.pci.devfn = 0; + } else { + id.pci.bus = bridge->bus->number; + id.pci.devfn = bridge->devfn; + } + } + + *groupid = id.group; + + return 0; +} + static struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, @@ -4069,6 +4114,7 @@ static struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, .domain_has_cap = intel_iommu_domain_has_cap, + .device_group = intel_iommu_device_group, }; static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) -- cgit v0.10.2 From 8fbdce659549d93dfb257ec4eabacf63a188e506 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:18 -0400 Subject: iommu/amd: Implement iommu_device_group Just use the amd_iommu_alias_table directly. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4ee277a..1d82b63 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2773,6 +2773,18 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +static int amd_iommu_device_group(struct device *dev, unsigned int *groupid) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + + if (!dev_data) + return -ENODEV; + + *groupid = amd_iommu_alias_table[dev_data->devid]; + + return 0; +} + static struct iommu_ops amd_iommu_ops = { .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, @@ -2782,6 +2794,7 @@ static struct iommu_ops amd_iommu_ops = { .unmap = amd_iommu_unmap, .iova_to_phys = amd_iommu_iova_to_phys, .domain_has_cap = amd_iommu_domain_has_cap, + .device_group = amd_iommu_device_group, }; /***************************************************************************** -- cgit v0.10.2 From bcb71abe7d4c5a0d0368c67da0a7def4fc73497a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:24 -0400 Subject: iommu: Add option to group multi-function devices The option iommu=group_mf indicates the that the iommu driver should expose all functions of a multi-function PCI device as the same iommu_device_group. This is useful for disallowing individual functions being exposed as independent devices to userspace as there are often hidden dependencies. Virtual functions are not affected by this option. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a0c5c5f..e1b6e44 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1059,7 +1059,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nomerge forcesac soft - pt [x86, IA-64] + pt [x86, IA-64] + group_mf [x86, IA-64] + io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 105c93b..b6a809f 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -11,10 +11,12 @@ extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_pass_through; extern int iommu_detected; +extern int iommu_group_mf; #else #define iommu_pass_through (0) #define no_iommu (1) #define iommu_detected (0) +#define iommu_group_mf (0) #endif extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index c16162c..eb11757 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -33,6 +33,7 @@ int force_iommu __read_mostly; #endif int iommu_pass_through; +int iommu_group_mf; /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 345c99c..dffc38e 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; +extern int iommu_group_mf; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 80dc793..1c4d769 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; +/* + * Group multi-function PCI devices into a single device-group for the + * iommu_device_group interface. This tells the iommu driver to pretend + * it cannot distinguish between functions of a device, exposing only one + * group for the device. Useful for disallowing use of individual PCI + * functions from userspace drivers. + */ +int iommu_group_mf __read_mostly; + extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; /* Dummy device used for NULL arguments (normally ISA). */ @@ -169,6 +178,8 @@ static __init int iommu_setup(char *p) #endif if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; + if (!strncmp(p, "group_mf", 8)) + iommu_group_mf = 1; gart_parse_options(p); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1d82b63..6f75536 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2776,11 +2776,19 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, static int amd_iommu_device_group(struct device *dev, unsigned int *groupid) { struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid; if (!dev_data) return -ENODEV; - *groupid = amd_iommu_alias_table[dev_data->devid]; + if (pdev->is_virtfn || !iommu_group_mf) + devid = dev_data->devid; + else + devid = calc_devid(pdev->bus->number, + PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + + *groupid = amd_iommu_alias_table[devid]; return 0; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 39ca6bb..9ef16d6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4100,6 +4100,9 @@ static int intel_iommu_device_group(struct device *dev, unsigned int *groupid) } } + if (!pdev->is_virtfn && iommu_group_mf) + id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0); + *groupid = id.group; return 0; -- cgit v0.10.2 From 95bdaf71ccf2cb4bba0c9a3d2baea0e7916f466b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 15 Nov 2011 12:48:29 +0100 Subject: iommu: Fix compile error with !IOMMU_API Signed-off-by: Joerg Roedel diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 93617e7..0f318fd54 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -181,7 +181,7 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } -static inline int iommu_device_group(struct device *dev, unsigned int *groupid); +static inline int iommu_device_group(struct device *dev, unsigned int *groupid) { return -ENODEV; } -- cgit v0.10.2