From 44caf2f37f009b2affa743073fa935826b6ab2fd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 13 Feb 2015 14:25:24 +0000 Subject: iommu/vt-d: kill bogus ecap_niotlb_iunits() As far back as I can see (which right now is a draft of the v1.2 spec dating from September 2008), bits 24-31 of the Extended Capability Register have already been reserved. I have no idea why anyone ever thought there would be multiple sets of IOTLB registers, but we've never supported them and all we do is make sure we map enough MMIO space for them. Kill it dead. Those bits do actually have a different meaning now. Signed-off-by: David Woodhouse diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a65208a..ee24ada 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,10 +115,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ -#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) -#define ecap_max_iotlb_offset(e) \ - (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) #define ecap_pass_through(e) ((e >> 6) & 0x1) -- cgit v0.10.2 From 68c1b89cf5653acd1107253e146b332420a1f4a7 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Sat, 21 Feb 2015 13:07:27 -0800 Subject: iommu/vt-d: Print x2apic opt out info instead of printing a warning BIOS can set up x2apic_opt_out bit on some platforms, for various misguided reasons like insane SMM code with weird assumptions about what descriptors look like, or wanting Windows not to enable the IOMMU so that the graphics driver will take it over for SVM in "driver mode". A user can either disable the x2apic_opt_out bit in BIOS or by kernel parameter "no_x2apic_optout". Instead of printing a warning, we just print information of x2apic opt out. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index a55b207..07d1a68 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -615,10 +615,7 @@ static int __init intel_enable_irq_remapping(void) eim = !dmar_x2apic_optout(); if (!eim) - printk(KERN_WARNING - "Your BIOS is broken and requested that x2apic be disabled.\n" - "This will slightly decrease performance.\n" - "Use 'intremap=no_x2apic_optout' to override BIOS request.\n"); + pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n"); } for_each_iommu(iommu, drhd) { -- cgit v0.10.2 From 18436afdc11a00ac881990b454cfb2eae81d6003 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 25 Mar 2015 15:05:47 +0000 Subject: iommu/vt-d: Allow RMRR on graphics devices too Commit c875d2c1 ("iommu/vt-d: Exclude devices using RMRRs from IOMMU API domains") prevents certain options for devices with RMRRs. This even prevents those devices from getting a 1:1 mapping with 'iommu=pt', because we don't have the code to handle *preserving* the RMRR regions when moving the device between domains. There's already an exclusion for USB devices, because we know the only reason for RMRRs there is a misguided desire to keep legacy keyboard/mouse emulation running in some theoretical OS which doesn't have support for USB in its own right... but which *does* enable the IOMMU. Add an exclusion for graphics devices too, so that 'iommu=pt' works there. We should be able to successfully assign graphics devices to guests too, as long as the initial handling of stolen memory is reconfigured appropriately. This has certainly worked in the past. Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 40dfbc0..e3276ee 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -50,6 +50,7 @@ #define CONTEXT_SIZE VTD_PAGE_SIZE #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) +#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) @@ -2561,6 +2562,10 @@ static bool device_has_rmrr(struct device *dev) * In both cases we assume that PCI USB devices with RMRRs have them largely * for historical reasons and that the RMRR space is not actively used post * boot. This exclusion may change if vendors begin to abuse it. + * + * The same exception is made for graphics devices, with the requirement that + * any use of the RMRR regions will be torn down before assigning the device + * to a guest. */ static bool device_is_rmrr_locked(struct device *dev) { @@ -2570,7 +2575,7 @@ static bool device_is_rmrr_locked(struct device *dev) if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) + if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) return false; } -- cgit v0.10.2 From 4423f5e7d28c26af31df711c5c21eeacfac737b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 25 Mar 2015 15:43:39 +0000 Subject: iommu/vt-d: Add new extended capabilities from v2.3 VT-d specification Signed-off-by: David Woodhouse diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ee24ada..796ef96 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,6 +115,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_pss(e) ((e >> 35) & 0x1f) +#define ecap_eafs(e) ((e >> 34) & 0x1) +#define ecap_nwfs(e) ((e >> 33) & 0x1) +#define ecap_srs(e) ((e >> 31) & 0x1) +#define ecap_ers(e) ((e >> 30) & 0x1) +#define ecap_prs(e) ((e >> 29) & 0x1) +#define ecap_pasid(e) ((e >> 28) & 0x1) +#define ecap_dis(e) ((e >> 27) & 0x1) +#define ecap_nest(e) ((e >> 26) & 0x1) +#define ecap_mts(e) ((e >> 25) & 0x1) +#define ecap_ecs(e) ((e >> 24) & 0x1) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) #define ecap_coherent(e) ((e) & 0x1) @@ -178,6 +189,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_IRES (((u32)1) << 25) #define DMA_GSTS_CFIS (((u32)1) << 23) +/* DMA_RTADDR_REG */ +#define DMA_RTADDR_RTT (((u64)1) << 11) + /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) -- cgit v0.10.2 From 03ecc32c5274962b9b1904d7a730e71c95bac05f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 13 Feb 2015 14:35:21 +0000 Subject: iommu/vt-d: support extended root and context entries Add a new function iommu_context_addr() which takes care of the differences and returns a pointer to a context entry which may be in either format. The formats are binary compatible for all the old fields anyway; the new one is just larger and some of the reserved bits in the original 128 are now meaningful. So far, nothing actually uses the new fields in the extended context entry. Modulo hardware bugs with interpreting the new-style tables, this should basically be a no-op. Signed-off-by: David Woodhouse diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e3276ee..3df27c5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -182,32 +182,11 @@ static int force_on = 0; * 64-127: Reserved */ struct root_entry { - u64 val; - u64 rsvd1; + u64 lo; + u64 hi; }; #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val &= ~VTD_PAGE_MASK; - root->val |= value & VTD_PAGE_MASK; -} -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & VTD_PAGE_MASK) : - NULL); -} /* * low 64 bits: @@ -681,6 +660,40 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain->iommu_superpage = domain_update_iommu_superpage(NULL); } +static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu, + u8 bus, u8 devfn, int alloc) +{ + struct root_entry *root = &iommu->root_entry[bus]; + struct context_entry *context; + u64 *entry; + + if (ecap_ecs(iommu->ecap)) { + if (devfn >= 0x80) { + devfn -= 0x80; + entry = &root->hi; + } + devfn *= 2; + } + entry = &root->lo; + if (*entry & 1) + context = phys_to_virt(*entry & VTD_PAGE_MASK); + else { + unsigned long phy_addr; + if (!alloc) + return NULL; + + context = alloc_pgtable_page(iommu->node); + if (!context) + return NULL; + + __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); + phy_addr = virt_to_phys((void *)context); + *entry = phy_addr | 1; + __iommu_flush_cache(iommu, entry, sizeof(*entry)); + } + return &context[devfn]; +} + static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; @@ -740,75 +753,36 @@ static void domain_flush_cache(struct dmar_domain *domain, clflush_cache_range(addr, size); } -/* Gets context entry for a given bus and devfn */ -static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, - u8 bus, u8 devfn) -{ - struct root_entry *root; - struct context_entry *context; - unsigned long phy_addr; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); - root = &iommu->root_entry[bus]; - context = get_context_addr_from_root(root); - if (!context) { - context = (struct context_entry *) - alloc_pgtable_page(iommu->node); - if (!context) { - spin_unlock_irqrestore(&iommu->lock, flags); - return NULL; - } - __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); - phy_addr = virt_to_phys((void *)context); - set_root_value(root, phy_addr); - set_root_present(root); - __iommu_flush_cache(iommu, root, sizeof(*root)); - } - spin_unlock_irqrestore(&iommu->lock, flags); - return &context[devfn]; -} - static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) { - struct root_entry *root; struct context_entry *context; - int ret; + int ret = 0; unsigned long flags; spin_lock_irqsave(&iommu->lock, flags); - root = &iommu->root_entry[bus]; - context = get_context_addr_from_root(root); - if (!context) { - ret = 0; - goto out; - } - ret = context_present(&context[devfn]); -out: + context = iommu_context_addr(iommu, bus, devfn, 0); + if (context) + ret = context_present(context); spin_unlock_irqrestore(&iommu->lock, flags); return ret; } static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) { - struct root_entry *root; struct context_entry *context; unsigned long flags; spin_lock_irqsave(&iommu->lock, flags); - root = &iommu->root_entry[bus]; - context = get_context_addr_from_root(root); + context = iommu_context_addr(iommu, bus, devfn, 0); if (context) { - context_clear_entry(&context[devfn]); - __iommu_flush_cache(iommu, &context[devfn], \ - sizeof(*context)); + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); } spin_unlock_irqrestore(&iommu->lock, flags); } static void free_context_table(struct intel_iommu *iommu) { - struct root_entry *root; int i; unsigned long flags; struct context_entry *context; @@ -818,10 +792,17 @@ static void free_context_table(struct intel_iommu *iommu) goto out; } for (i = 0; i < ROOT_ENTRY_NR; i++) { - root = &iommu->root_entry[i]; - context = get_context_addr_from_root(root); + context = iommu_context_addr(iommu, i, 0, 0); + if (context) + free_pgtable_page(context); + + if (!ecap_ecs(iommu->ecap)) + continue; + + context = iommu_context_addr(iommu, i, 0x80, 0); if (context) free_pgtable_page(context); + } free_pgtable_page(iommu->root_entry); iommu->root_entry = NULL; @@ -1145,14 +1126,16 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) static void iommu_set_root_entry(struct intel_iommu *iommu) { - void *addr; + u64 addr; u32 sts; unsigned long flag; - addr = iommu->root_entry; + addr = virt_to_phys(iommu->root_entry); + if (ecap_ecs(iommu->ecap)) + addr |= DMA_RTADDR_RTT; raw_spin_lock_irqsave(&iommu->register_lock, flag); - dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); + dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); @@ -1798,7 +1781,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain, BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && translation != CONTEXT_TT_MULTI_LEVEL); - context = device_to_context_entry(iommu, bus, devfn); + spin_lock_irqsave(&iommu->lock, flags); + context = iommu_context_addr(iommu, bus, devfn, 1); + spin_unlock_irqrestore(&iommu->lock, flags); if (!context) return -ENOMEM; spin_lock_irqsave(&iommu->lock, flags); -- cgit v0.10.2