From 0076cd3d063a43f69eba2d0a1d13927897c8ed35 Mon Sep 17 00:00:00 2001 From: Wan Zongshun Date: Tue, 10 May 2016 09:21:01 -0400 Subject: iommu/amd: Set AMD iommu callbacks for platform bus driver AMD has more drivers will use ACPI to platform bus driver later, all those devices need iommu support, for example: eMMC driver. For latest AMD eMMC controller, it will utilize sdhci-acpi.c driver, which will rely on platform bus to match device and driver, where we will set 'dev' of struct platform_device as map_sg parameter passing to iommu driver for DMA request, so the iommu-ops are needed on the platform bus. Signed-off-by: Wan Zongshun Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 634f636..921111e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2958,6 +2959,9 @@ int __init amd_iommu_init_api(void) if (err) return err; #endif + err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops); + if (err) + return err; return 0; } -- cgit v0.10.2 From b54b874fbaf5e024723e50dfb035a9916d6752b4 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 20 May 2016 15:48:21 +0200 Subject: iommu/exynos: Suppress unbinding to prevent system failure Removal of IOMMU driver cannot be done reliably, so Exynos IOMMU driver doesn't support this operation. It is essential for system operation, so it makes sense to prevent unbinding by disabling bind/unbind sysfs feature for SYSMMU controller driver to avoid kernel ops or trashing memory caused by such operation. Signed-off-by: Marek Szyprowski CC: stable@vger.kernel.org # v4.2+ Reviewed-by: Krzysztof Kozlowski Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 5ecc86c..e27e3b7df 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -709,6 +709,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = { .name = "exynos-sysmmu", .of_match_table = sysmmu_of_match, .pm = &sysmmu_pm_ops, + .suppress_bind_attrs = true, } }; -- cgit v0.10.2 From 86f004c77c5aba6761d2f6b308a79b1913bea50a Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 21 May 2016 02:41:51 +0000 Subject: iommu/vt-d: Reduce extra first level entry in iommu->domains In commit <8bf478163e69> ("iommu/vt-d: Split up iommu->domains array"), it it splits iommu->domains in two levels. Each first level contains 256 entries of second level. In case of the ndomains is exact a multiple of 256, it would have one more extra first level entry for current implementation. This patch refines this calculation to reduce the extra first level entry. Signed-off-by: Wei Yang Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a644d0c..748e5e4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1672,7 +1672,7 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } - size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **); + size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **); iommu->domains = kzalloc(size, GFP_KERNEL); if (iommu->domains) { @@ -1737,7 +1737,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { if ((iommu->domains) && (iommu->domain_ids)) { - int elems = (cap_ndoms(iommu->cap) >> 8) + 1; + int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8; int i; for (i = 0; i < elems; i++) -- cgit v0.10.2 From 0c2b063f1813ac99238b9c61edb58752eb7762cf Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 23 May 2016 11:30:07 +0200 Subject: iommu/exynos: Return proper errors from getting clocks This patch reworks driver probe code to propagate error codes from clk_get() operation. This will allow to properly handle deferred probe in the future. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index e27e3b7df..9893656 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -602,37 +602,31 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) } data->clk = devm_clk_get(dev, "sysmmu"); - if (!IS_ERR(data->clk)) { - ret = clk_prepare(data->clk); - if (ret) { - dev_err(dev, "Failed to prepare clk\n"); - return ret; - } - } else { + if (PTR_ERR(data->clk) == -ENOENT) data->clk = NULL; - } + else if (IS_ERR(data->clk)) + return PTR_ERR(data->clk); + ret = clk_prepare(data->clk); + if (ret) + return ret; data->aclk = devm_clk_get(dev, "aclk"); - if (!IS_ERR(data->aclk)) { - ret = clk_prepare(data->aclk); - if (ret) { - dev_err(dev, "Failed to prepare aclk\n"); - return ret; - } - } else { + if (PTR_ERR(data->aclk) == -ENOENT) data->aclk = NULL; - } + else if (IS_ERR(data->aclk)) + return PTR_ERR(data->aclk); + ret = clk_prepare(data->aclk); + if (ret) + return ret; data->pclk = devm_clk_get(dev, "pclk"); - if (!IS_ERR(data->pclk)) { - ret = clk_prepare(data->pclk); - if (ret) { - dev_err(dev, "Failed to prepare pclk\n"); - return ret; - } - } else { + if (PTR_ERR(data->pclk) == -ENOENT) data->pclk = NULL; - } + else if (IS_ERR(data->pclk)) + return PTR_ERR(data->pclk); + ret = clk_prepare(data->pclk); + if (ret) + return ret; if (!data->clk && (!data->aclk || !data->pclk)) { dev_err(dev, "Failed to get device clock(s)!\n"); @@ -640,15 +634,13 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) } data->clk_master = devm_clk_get(dev, "master"); - if (!IS_ERR(data->clk_master)) { - ret = clk_prepare(data->clk_master); - if (ret) { - dev_err(dev, "Failed to prepare master's clk\n"); - return ret; - } - } else { + if (PTR_ERR(data->clk_master) == -ENOENT) data->clk_master = NULL; - } + else if (IS_ERR(data->clk_master)) + return PTR_ERR(data->clk_master); + ret = clk_prepare(data->clk_master); + if (ret) + return ret; data->sysmmu = dev; spin_lock_init(&data->lock); -- cgit v0.10.2 From 01324ab2c990bbd39ec0ef388b7c311a54452a0b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 23 May 2016 11:30:08 +0200 Subject: iommu/exynos: Fix master clock management for inactive SYSMMU If SYSMMU controller is not active, there is no point in enabling master's clock just for doing the the of internal state. This patch moves enabling that clock to the block which actually does the register access. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 9893656..018bcd5 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -524,16 +524,15 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, { unsigned long flags; - clk_enable(data->clk_master); spin_lock_irqsave(&data->lock, flags); - if (is_sysmmu_active(data)) { - if (data->version >= MAKE_MMU_VER(3, 3)) - __sysmmu_tlb_invalidate_entry(data, iova, 1); + if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) { + clk_enable(data->clk_master); + __sysmmu_tlb_invalidate_entry(data, iova, 1); + clk_disable(data->clk_master); } spin_unlock_irqrestore(&data->lock, flags); - clk_disable(data->clk_master); } static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, -- cgit v0.10.2 From fecc49db884b902f7abbea4c9287d17a56966be4 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 23 May 2016 11:30:09 +0200 Subject: iommu/exynos: Prepare clocks when needed, not in driver probe Make clock preparation together with clk_enable(). This way inactive SYSMMU controllers will not keep clocks prepared all the time. This change allows more fine graded power management in the future. All the code assumes that clock management doesn't fail, so guard clock_prepare_enable() it with BUG_ON(). Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 018bcd5..e0b8343 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -322,14 +322,27 @@ static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd) __sysmmu_tlb_invalidate(data); } +static void __sysmmu_enable_clocks(struct sysmmu_drvdata *data) +{ + BUG_ON(clk_prepare_enable(data->clk_master)); + BUG_ON(clk_prepare_enable(data->clk)); + BUG_ON(clk_prepare_enable(data->pclk)); + BUG_ON(clk_prepare_enable(data->aclk)); +} + +static void __sysmmu_disable_clocks(struct sysmmu_drvdata *data) +{ + clk_disable_unprepare(data->aclk); + clk_disable_unprepare(data->pclk); + clk_disable_unprepare(data->clk); + clk_disable_unprepare(data->clk_master); +} + static void __sysmmu_get_version(struct sysmmu_drvdata *data) { u32 ver; - clk_enable(data->clk_master); - clk_enable(data->clk); - clk_enable(data->pclk); - clk_enable(data->aclk); + __sysmmu_enable_clocks(data); ver = readl(data->sfrbase + REG_MMU_VERSION); @@ -342,10 +355,7 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data) dev_dbg(data->sysmmu, "hardware version: %d.%d\n", MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version)); - clk_disable(data->aclk); - clk_disable(data->pclk); - clk_disable(data->clk); - clk_disable(data->clk_master); + __sysmmu_disable_clocks(data); } static void show_fault_information(struct sysmmu_drvdata *data, @@ -427,10 +437,7 @@ static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data) writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL); writel(0, data->sfrbase + REG_MMU_CFG); - clk_disable(data->aclk); - clk_disable(data->pclk); - clk_disable(data->clk); - clk_disable(data->clk_master); + __sysmmu_disable_clocks(data); } static bool __sysmmu_disable(struct sysmmu_drvdata *data) @@ -475,10 +482,7 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data) static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data) { - clk_enable(data->clk_master); - clk_enable(data->clk); - clk_enable(data->pclk); - clk_enable(data->aclk); + __sysmmu_enable_clocks(data); writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL); @@ -488,6 +492,12 @@ static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data) writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL); + /* + * SYSMMU driver keeps master's clock enabled only for the short + * time, while accessing the registers. For performing address + * translation during DMA transaction it relies on the client + * driver to enable it. + */ clk_disable(data->clk_master); } @@ -605,27 +615,18 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) data->clk = NULL; else if (IS_ERR(data->clk)) return PTR_ERR(data->clk); - ret = clk_prepare(data->clk); - if (ret) - return ret; data->aclk = devm_clk_get(dev, "aclk"); if (PTR_ERR(data->aclk) == -ENOENT) data->aclk = NULL; else if (IS_ERR(data->aclk)) return PTR_ERR(data->aclk); - ret = clk_prepare(data->aclk); - if (ret) - return ret; data->pclk = devm_clk_get(dev, "pclk"); if (PTR_ERR(data->pclk) == -ENOENT) data->pclk = NULL; else if (IS_ERR(data->pclk)) return PTR_ERR(data->pclk); - ret = clk_prepare(data->pclk); - if (ret) - return ret; if (!data->clk && (!data->aclk || !data->pclk)) { dev_err(dev, "Failed to get device clock(s)!\n"); @@ -637,9 +638,6 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) data->clk_master = NULL; else if (IS_ERR(data->clk_master)) return PTR_ERR(data->clk_master); - ret = clk_prepare(data->clk_master); - if (ret) - return ret; data->sysmmu = dev; spin_lock_init(&data->lock); -- cgit v0.10.2 From 96f6655700d16d1bae8ef49db6ce28607ebee6f3 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 23 May 2016 13:01:27 +0200 Subject: iommu/exynos: Prepare for deferred probe support Register iommu_ops at the end of successful probe instead of doing that unconditionally. This makes Exynos IOMMU driver ready for deferred probe caused by not-yet-available clocks. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index e0b8343..633e6d0 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -581,6 +581,8 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, spin_unlock_irqrestore(&data->lock, flags); } +static struct iommu_ops exynos_iommu_ops; + static int __init exynos_sysmmu_probe(struct platform_device *pdev) { int irq, ret; @@ -654,6 +656,8 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); + of_iommu_set_ops(dev->of_node, &exynos_iommu_ops); + return 0; } @@ -1347,7 +1351,6 @@ static int __init exynos_iommu_of_setup(struct device_node *np) if (!dma_dev) dma_dev = &pdev->dev; - of_iommu_set_ops(np, &exynos_iommu_ops); return 0; } -- cgit v0.10.2 From ffb2d1eb88c3262f7a7001c5afeec6babe144b73 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 2 Jun 2016 17:46:10 -0700 Subject: iommu/vt-d: Don't reject NTB devices due to scope mismatch On a system with an Intel PCIe port configured as an NTB device, iommu initialization fails with DMAR: Device scope type does not match for 0000:80:03.0 This is because the DMAR table reports this device as having scope 2 (ACPI_DMAR_SCOPE_TYPE_BRIDGE): [0A0h 0160 1] Device Scope Entry Type : 02 [0A1h 0161 1] Entry Length : 08 [0A2h 0162 2] Reserved : 0000 [0A4h 0164 1] Enumeration ID : 00 [0A5h 0165 1] PCI Bus Number : 80 [0A6h 0166 2] PCI Path : 03,00 but the device has a type 0 PCI header: 80:03.0 Bridge [0680]: Intel Corporation Device [8086:2f0d] (rev 02) 00: 86 80 0d 2f 00 00 10 00 02 00 80 06 10 00 80 00 10: 0c 00 c0 00 c0 38 00 00 0c 00 00 00 80 38 00 00 20: 00 00 00 c8 00 00 10 c8 00 00 00 00 86 80 00 00 30: 00 00 00 00 60 00 00 00 00 00 00 00 ff 01 00 00 VT-d works perfectly on this system, so there's no reason to bail out on initialization due to this apparent scope mismatch. Use the class 0x0680 ("Other bridge device") as a heuristic for allowing DMAR initialization for non-bridge PCI devices listed with scope bridge. Signed-off-by: Roland Dreier Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 6a86b5d..2eff7b6 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -241,8 +241,20 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, if (!dmar_match_pci_path(info, scope->bus, path, level)) continue; - if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^ - (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) { + /* + * We expect devices with endpoint scope to have normal PCI + * headers, and devices with bridge scope to have bridge PCI + * headers. However PCI NTB devices may be listed in the + * DMAR table with bridge scope, even though they have a + * normal PCI header. NTB devices are identified by class + * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch + * for this special case. + */ + if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && + info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) || + (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE && + (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) { pr_warn("Device scope type does not match for %s\n", pci_name(info->dev)); return -EINVAL; -- cgit v0.10.2 From cf7513e759d19908c29cf2c129587ecda34a5c19 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Sat, 18 Jun 2016 13:58:30 +0530 Subject: iommu/amd: Remove create_workqueue alloc_workqueue replaces deprecated create_workqueue(). A dedicated workqueue has been used since the workitem (viz &fault->work), is involved in IO page-fault handling. WQ_MEM_RECLAIM has been set to guarantee forward progress under memory pressure, which is a requirement here. Since there are only a fixed number of work items, explicit concurrency limit is unnecessary. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 56999d2f..0d52ceb 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -961,7 +961,7 @@ static int __init amd_iommu_v2_init(void) spin_lock_init(&state_lock); ret = -ENOMEM; - iommu_wq = create_workqueue("amd_iommu_v2"); + iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); if (iommu_wq == NULL) goto out; -- cgit v0.10.2 From 9fec79df898602f5a20c0ab489bf9780aa98185d Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Wed, 8 Jun 2016 17:50:44 +0800 Subject: iommu/mediatek: Do not call of_node_put in mtk_iommu_of_xlate The device_node will be released in of_iommu_configure, it may be double released if call of_node_put in mtk_iommu_of_xlate. Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c3043d8..493bd3e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -455,7 +455,6 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) if (!dev->archdata.iommu) { /* Get the m4u device */ m4updev = of_find_device_by_node(args->np); - of_node_put(args->np); if (WARN_ON(!m4updev)) return -EINVAL; -- cgit v0.10.2 From 7e42626ad3f4540143e68ba41e5e553f2715b451 Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Wed, 8 Jun 2016 17:50:57 +0800 Subject: dt-bindings: mediatek: add descriptions for mediatek mt2701 iommu and smi This patch defines the local arbitor port IDs for mediatek SoC MT2701 and add descriptions of binding for mediatek generation one iommu and smi. Signed-off-by: Honghui Zhang Acked-by: Rob Herring Signed-off-by: Joerg Roedel diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt index cd1b1cd..53c20ca 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt @@ -1,7 +1,9 @@ * Mediatek IOMMU Architecture Implementation - Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U) which -uses the ARM Short-Descriptor translation table format for address translation. + Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and +this M4U have two generations of HW architecture. Generation one uses flat +pagetable, and only supports 4K size page mapping. Generation two uses the +ARM Short-Descriptor translation table format for address translation. About the M4U Hardware Block Diagram, please check below: @@ -36,7 +38,9 @@ in each larb. Take a example, There are many ports like MC, PP, VLD in the video decode local arbiter, all these ports are according to the video HW. Required properties: -- compatible : must be "mediatek,mt8173-m4u". +- compatible : must be one of the following string: + "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. + "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. - reg : m4u register base and size. - interrupts : the interrupt of m4u. - clocks : must contain one entry for each clock-names. @@ -46,7 +50,8 @@ Required properties: according to the local arbiter index, like larb0, larb1, larb2... - iommu-cells : must be 1. This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as defined in - dt-binding/memory/mt8173-larb-port.h. + dt-binding/memory/mt2701-larb-port.h for mt2701 and + dt-binding/memory/mt8173-larb-port.h for mt8173 Example: iommu: iommu@10205000 { diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt index 06a83ce..aa614b2 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt @@ -2,16 +2,31 @@ SMI (Smart Multimedia Interface) Common The hardware block diagram please check bindings/iommu/mediatek,iommu.txt +Mediatek SMI have two generations of HW architecture, mt8173 uses the second +generation of SMI HW while mt2701 uses the first generation HW of SMI. + +There's slight differences between the two SMI, for generation 2, the +register which control the iommu port is at each larb's register base. But +for generation 1, the register is at smi ao base(smi always on register +base). Besides that, the smi async clock should be prepared and enabled for +SMI generation 1 to transform the smi clock into emi clock domain, but that is +not needed for SMI generation 2. + Required properties: -- compatible : must be "mediatek,mt8173-smi-common" +- compatible : must be one of : + "mediatek,mt2701-smi-common" + "mediatek,mt8173-smi-common" - reg : the register and size of the SMI block. - power-domains : a phandle to the power domain of this local arbiter. - clocks : Must contain an entry for each entry in clock-names. -- clock-names : must contain 2 entries, as follows: +- clock-names : must contain 3 entries for generation 1 smi HW and 2 entries + for generation 2 smi HW as follows: - "apb" : Advanced Peripheral Bus clock, It's the clock for setting the register. - "smi" : It's the clock for transfer data and command. - They may be the same if both source clocks are the same. + They may be the same if both source clocks are the same. + - "async" : asynchronous clock, it help transform the smi clock into the emi + clock domain, this clock is only needed by generation 1 smi HW. Example: smi_common: smi@14022000 { diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt index 55ff3b7..21277a5 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt @@ -3,7 +3,9 @@ SMI (Smart Multimedia Interface) Local Arbiter The hardware block diagram please check bindings/iommu/mediatek,iommu.txt Required properties: -- compatible : must be "mediatek,mt8173-smi-larb" +- compatible : must be one of : + "mediatek,mt8173-smi-larb" + "mediatek,mt2701-smi-larb" - reg : the register and size of this local arbiter. - mediatek,smi : a phandle to the smi_common node. - power-domains : a phandle to the power domain of this local arbiter. diff --git a/include/dt-bindings/memory/mt2701-larb-port.h b/include/dt-bindings/memory/mt2701-larb-port.h new file mode 100644 index 0000000..78f6678 --- /dev/null +++ b/include/dt-bindings/memory/mt2701-larb-port.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 MediaTek Inc. + * Author: Honghui Zhang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _MT2701_LARB_PORT_H_ +#define _MT2701_LARB_PORT_H_ + +/* + * Mediatek m4u generation 1 such as mt2701 has flat m4u port numbers, + * the first port's id for larb[N] would be the last port's id of larb[N - 1] + * plus one while larb[0]'s first port number is 0. The definition of + * MT2701_M4U_ID_LARBx is following HW register spec. + * But m4u generation 2 like mt8173 have different port number, it use fixed + * offset for each larb, the first port's id for larb[N] would be (N * 32). + */ +#define LARB0_PORT_OFFSET 0 +#define LARB1_PORT_OFFSET 11 +#define LARB2_PORT_OFFSET 21 +#define LARB3_PORT_OFFSET 43 + +#define MT2701_M4U_ID_LARB0(port) ((port) + LARB0_PORT_OFFSET) +#define MT2701_M4U_ID_LARB1(port) ((port) + LARB1_PORT_OFFSET) +#define MT2701_M4U_ID_LARB2(port) ((port) + LARB2_PORT_OFFSET) + +/* Port define for larb0 */ +#define MT2701_M4U_PORT_DISP_OVL_0 MT2701_M4U_ID_LARB0(0) +#define MT2701_M4U_PORT_DISP_RDMA1 MT2701_M4U_ID_LARB0(1) +#define MT2701_M4U_PORT_DISP_RDMA MT2701_M4U_ID_LARB0(2) +#define MT2701_M4U_PORT_DISP_WDMA MT2701_M4U_ID_LARB0(3) +#define MT2701_M4U_PORT_MM_CMDQ MT2701_M4U_ID_LARB0(4) +#define MT2701_M4U_PORT_MDP_RDMA MT2701_M4U_ID_LARB0(5) +#define MT2701_M4U_PORT_MDP_WDMA MT2701_M4U_ID_LARB0(6) +#define MT2701_M4U_PORT_MDP_ROTO MT2701_M4U_ID_LARB0(7) +#define MT2701_M4U_PORT_MDP_ROTCO MT2701_M4U_ID_LARB0(8) +#define MT2701_M4U_PORT_MDP_ROTVO MT2701_M4U_ID_LARB0(9) +#define MT2701_M4U_PORT_MDP_RDMA1 MT2701_M4U_ID_LARB0(10) + +/* Port define for larb1 */ +#define MT2701_M4U_PORT_VDEC_MC_EXT MT2701_M4U_ID_LARB1(0) +#define MT2701_M4U_PORT_VDEC_PP_EXT MT2701_M4U_ID_LARB1(1) +#define MT2701_M4U_PORT_VDEC_PPWRAP_EXT MT2701_M4U_ID_LARB1(2) +#define MT2701_M4U_PORT_VDEC_AVC_MV_EXT MT2701_M4U_ID_LARB1(3) +#define MT2701_M4U_PORT_VDEC_PRED_RD_EXT MT2701_M4U_ID_LARB1(4) +#define MT2701_M4U_PORT_VDEC_PRED_WR_EXT MT2701_M4U_ID_LARB1(5) +#define MT2701_M4U_PORT_VDEC_VLD_EXT MT2701_M4U_ID_LARB1(6) +#define MT2701_M4U_PORT_VDEC_VLD2_EXT MT2701_M4U_ID_LARB1(7) +#define MT2701_M4U_PORT_VDEC_TILE_EXT MT2701_M4U_ID_LARB1(8) +#define MT2701_M4U_PORT_VDEC_IMG_RESZ_EXT MT2701_M4U_ID_LARB1(9) + +/* Port define for larb2 */ +#define MT2701_M4U_PORT_VENC_RCPU MT2701_M4U_ID_LARB2(0) +#define MT2701_M4U_PORT_VENC_REC_FRM MT2701_M4U_ID_LARB2(1) +#define MT2701_M4U_PORT_VENC_BSDMA MT2701_M4U_ID_LARB2(2) +#define MT2701_M4U_PORT_JPGENC_RDMA MT2701_M4U_ID_LARB2(3) +#define MT2701_M4U_PORT_VENC_LT_RCPU MT2701_M4U_ID_LARB2(4) +#define MT2701_M4U_PORT_VENC_LT_REC_FRM MT2701_M4U_ID_LARB2(5) +#define MT2701_M4U_PORT_VENC_LT_BSDMA MT2701_M4U_ID_LARB2(6) +#define MT2701_M4U_PORT_JPGDEC_BSDMA MT2701_M4U_ID_LARB2(7) +#define MT2701_M4U_PORT_VENC_SV_COMV MT2701_M4U_ID_LARB2(8) +#define MT2701_M4U_PORT_VENC_RD_COMV MT2701_M4U_ID_LARB2(9) +#define MT2701_M4U_PORT_JPGENC_BSDMA MT2701_M4U_ID_LARB2(10) +#define MT2701_M4U_PORT_VENC_CUR_LUMA MT2701_M4U_ID_LARB2(11) +#define MT2701_M4U_PORT_VENC_CUR_CHROMA MT2701_M4U_ID_LARB2(12) +#define MT2701_M4U_PORT_VENC_REF_LUMA MT2701_M4U_ID_LARB2(13) +#define MT2701_M4U_PORT_VENC_REF_CHROMA MT2701_M4U_ID_LARB2(14) +#define MT2701_M4U_PORT_IMG_RESZ MT2701_M4U_ID_LARB2(15) +#define MT2701_M4U_PORT_VENC_LT_SV_COMV MT2701_M4U_ID_LARB2(16) +#define MT2701_M4U_PORT_VENC_LT_RD_COMV MT2701_M4U_ID_LARB2(17) +#define MT2701_M4U_PORT_VENC_LT_CUR_LUMA MT2701_M4U_ID_LARB2(18) +#define MT2701_M4U_PORT_VENC_LT_CUR_CHROMA MT2701_M4U_ID_LARB2(19) +#define MT2701_M4U_PORT_VENC_LT_REF_LUMA MT2701_M4U_ID_LARB2(20) +#define MT2701_M4U_PORT_VENC_LT_REF_CHROMA MT2701_M4U_ID_LARB2(21) +#define MT2701_M4U_PORT_JPGDEC_WDMA MT2701_M4U_ID_LARB2(22) + +#endif -- cgit v0.10.2 From 9ca340c98c0dc6cb60b5ebd7847302f57648f0ba Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Wed, 8 Jun 2016 17:50:58 +0800 Subject: iommu/mediatek: move the common struct into header file Move the struct defines of mtk iommu into a new header files for common use. Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 493bd3e..b12c12d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -34,7 +34,7 @@ #include #include -#include "io-pgtable.h" +#include "mtk_iommu.h" #define REG_MMU_PT_BASE_ADDR 0x000 @@ -93,20 +93,6 @@ #define MTK_PROTECT_PA_ALIGN 128 -struct mtk_iommu_suspend_reg { - u32 standard_axi_mode; - u32 dcm_dis; - u32 ctrl_reg; - u32 int_control0; - u32 int_main_control; -}; - -struct mtk_iommu_client_priv { - struct list_head client; - unsigned int mtk_m4u_id; - struct device *m4udev; -}; - struct mtk_iommu_domain { spinlock_t pgtlock; /* lock for page table */ @@ -116,19 +102,6 @@ struct mtk_iommu_domain { struct iommu_domain domain; }; -struct mtk_iommu_data { - void __iomem *base; - int irq; - struct device *dev; - struct clk *bclk; - phys_addr_t protect_base; /* protect memory base */ - struct mtk_iommu_suspend_reg reg; - struct mtk_iommu_domain *m4u_dom; - struct iommu_group *m4u_group; - struct mtk_smi_iommu smi_imu; /* SMI larb iommu info */ - bool enable_4GB; -}; - static struct iommu_ops mtk_iommu_ops; static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) @@ -551,25 +524,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return 0; } -static int compare_of(struct device *dev, void *data) -{ - return dev->of_node == data; -} - -static int mtk_iommu_bind(struct device *dev) -{ - struct mtk_iommu_data *data = dev_get_drvdata(dev); - - return component_bind_all(dev, &data->smi_imu); -} - -static void mtk_iommu_unbind(struct device *dev) -{ - struct mtk_iommu_data *data = dev_get_drvdata(dev); - - component_unbind_all(dev, &data->smi_imu); -} - static const struct component_master_ops mtk_iommu_com_ops = { .bind = mtk_iommu_bind, .unbind = mtk_iommu_unbind, diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h new file mode 100644 index 0000000..9ed0a84 --- /dev/null +++ b/drivers/iommu/mtk_iommu.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2015-2016 MediaTek Inc. + * Author: Honghui Zhang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _MTK_IOMMU_H_ +#define _MTK_IOMMU_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io-pgtable.h" + +struct mtk_iommu_suspend_reg { + u32 standard_axi_mode; + u32 dcm_dis; + u32 ctrl_reg; + u32 int_control0; + u32 int_main_control; +}; + +struct mtk_iommu_client_priv { + struct list_head client; + unsigned int mtk_m4u_id; + struct device *m4udev; +}; + +struct mtk_iommu_domain; + +struct mtk_iommu_data { + void __iomem *base; + int irq; + struct device *dev; + struct clk *bclk; + phys_addr_t protect_base; /* protect memory base */ + struct mtk_iommu_suspend_reg reg; + struct mtk_iommu_domain *m4u_dom; + struct iommu_group *m4u_group; + struct mtk_smi_iommu smi_imu; /* SMI larb iommu info */ + bool enable_4GB; +}; + +static int compare_of(struct device *dev, void *data) +{ + return dev->of_node == data; +} + +static int mtk_iommu_bind(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + + return component_bind_all(dev, &data->smi_imu); +} + +static void mtk_iommu_unbind(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + + component_unbind_all(dev, &data->smi_imu); +} + +#endif -- cgit v0.10.2 From 3c8f4ad85c4b61fcf2c56e1d281d691ac595243a Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Wed, 8 Jun 2016 17:50:59 +0800 Subject: memory/mediatek: add support for mt2701 Mediatek SMI has two generations of HW architecture, mt8173 uses the second generation of SMI HW while mt2701 uses the first generation HW of SMI. There's slight differences between the two generations, for generation 2, the register which control the iommu port access PA or IOVA is at each larb's register base. But for generation 1, the register is at smi ao base(smi always on register base). Besides that, the smi async clock should be prepared and enabled for SMI generation 1 HW to transform the smi clock into emi clock domain, but is not needed for SMI generation 2. This patch add SMI driver for mt2701 which use generation 1 SMI HW. Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index f6b5757..4afbc41 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -21,19 +21,50 @@ #include #include #include +#include #define SMI_LARB_MMU_EN 0xf00 +#define REG_SMI_SECUR_CON_BASE 0x5c0 + +/* every register control 8 port, register offset 0x4 */ +#define REG_SMI_SECUR_CON_OFFSET(id) (((id) >> 3) << 2) +#define REG_SMI_SECUR_CON_ADDR(id) \ + (REG_SMI_SECUR_CON_BASE + REG_SMI_SECUR_CON_OFFSET(id)) + +/* + * every port have 4 bit to control, bit[port + 3] control virtual or physical, + * bit[port + 2 : port + 1] control the domain, bit[port] control the security + * or non-security. + */ +#define SMI_SECUR_CON_VAL_MSK(id) (~(0xf << (((id) & 0x7) << 2))) +#define SMI_SECUR_CON_VAL_VIRT(id) BIT((((id) & 0x7) << 2) + 3) +/* mt2701 domain should be set to 3 */ +#define SMI_SECUR_CON_VAL_DOMAIN(id) (0x3 << ((((id) & 0x7) << 2) + 1)) + +struct mtk_smi_larb_gen { + int port_in_larb[MTK_LARB_NR_MAX + 1]; + void (*config_port)(struct device *); +}; struct mtk_smi { - struct device *dev; - struct clk *clk_apb, *clk_smi; + struct device *dev; + struct clk *clk_apb, *clk_smi; + struct clk *clk_async; /*only needed by mt2701*/ + void __iomem *smi_ao_base; }; struct mtk_smi_larb { /* larb: local arbiter */ - struct mtk_smi smi; - void __iomem *base; - struct device *smi_common_dev; - u32 *mmu; + struct mtk_smi smi; + void __iomem *base; + struct device *smi_common_dev; + const struct mtk_smi_larb_gen *larb_gen; + int larbid; + u32 *mmu; +}; + +enum mtk_smi_gen { + MTK_SMI_GEN1, + MTK_SMI_GEN2 }; static int mtk_smi_enable(const struct mtk_smi *smi) @@ -71,6 +102,7 @@ static void mtk_smi_disable(const struct mtk_smi *smi) int mtk_smi_larb_get(struct device *larbdev) { struct mtk_smi_larb *larb = dev_get_drvdata(larbdev); + const struct mtk_smi_larb_gen *larb_gen = larb->larb_gen; struct mtk_smi *common = dev_get_drvdata(larb->smi_common_dev); int ret; @@ -87,7 +119,7 @@ int mtk_smi_larb_get(struct device *larbdev) } /* Configure the iommu info for this larb */ - writel(*larb->mmu, larb->base + SMI_LARB_MMU_EN); + larb_gen->config_port(larbdev); return 0; } @@ -126,6 +158,45 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) return -ENODEV; } +static void mtk_smi_larb_config_port(struct device *dev) +{ + struct mtk_smi_larb *larb = dev_get_drvdata(dev); + + writel(*larb->mmu, larb->base + SMI_LARB_MMU_EN); +} + + +static void mtk_smi_larb_config_port_gen1(struct device *dev) +{ + struct mtk_smi_larb *larb = dev_get_drvdata(dev); + const struct mtk_smi_larb_gen *larb_gen = larb->larb_gen; + struct mtk_smi *common = dev_get_drvdata(larb->smi_common_dev); + int i, m4u_port_id, larb_port_num; + u32 sec_con_val, reg_val; + + m4u_port_id = larb_gen->port_in_larb[larb->larbid]; + larb_port_num = larb_gen->port_in_larb[larb->larbid + 1] + - larb_gen->port_in_larb[larb->larbid]; + + for (i = 0; i < larb_port_num; i++, m4u_port_id++) { + if (*larb->mmu & BIT(i)) { + /* bit[port + 3] controls the virtual or physical */ + sec_con_val = SMI_SECUR_CON_VAL_VIRT(m4u_port_id); + } else { + /* do not need to enable m4u for this port */ + continue; + } + reg_val = readl(common->smi_ao_base + + REG_SMI_SECUR_CON_ADDR(m4u_port_id)); + reg_val &= SMI_SECUR_CON_VAL_MSK(m4u_port_id); + reg_val |= sec_con_val; + reg_val |= SMI_SECUR_CON_VAL_DOMAIN(m4u_port_id); + writel(reg_val, + common->smi_ao_base + + REG_SMI_SECUR_CON_ADDR(m4u_port_id)); + } +} + static void mtk_smi_larb_unbind(struct device *dev, struct device *master, void *data) { @@ -137,6 +208,31 @@ static const struct component_ops mtk_smi_larb_component_ops = { .unbind = mtk_smi_larb_unbind, }; +static const struct mtk_smi_larb_gen mtk_smi_larb_mt8173 = { + /* mt8173 do not need the port in larb */ + .config_port = mtk_smi_larb_config_port, +}; + +static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = { + .port_in_larb = { + LARB0_PORT_OFFSET, LARB1_PORT_OFFSET, + LARB2_PORT_OFFSET, LARB3_PORT_OFFSET + }, + .config_port = mtk_smi_larb_config_port_gen1, +}; + +static const struct of_device_id mtk_smi_larb_of_ids[] = { + { + .compatible = "mediatek,mt8173-smi-larb", + .data = &mtk_smi_larb_mt8173 + }, + { + .compatible = "mediatek,mt2701-smi-larb", + .data = &mtk_smi_larb_mt2701 + }, + {} +}; + static int mtk_smi_larb_probe(struct platform_device *pdev) { struct mtk_smi_larb *larb; @@ -144,14 +240,20 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *smi_node; struct platform_device *smi_pdev; + const struct of_device_id *of_id; if (!dev->pm_domain) return -EPROBE_DEFER; + of_id = of_match_node(mtk_smi_larb_of_ids, pdev->dev.of_node); + if (!of_id) + return -EINVAL; + larb = devm_kzalloc(dev, sizeof(*larb), GFP_KERNEL); if (!larb) return -ENOMEM; + larb->larb_gen = of_id->data; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); larb->base = devm_ioremap_resource(dev, res); if (IS_ERR(larb->base)) @@ -191,24 +293,34 @@ static int mtk_smi_larb_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id mtk_smi_larb_of_ids[] = { - { .compatible = "mediatek,mt8173-smi-larb",}, - {} -}; - static struct platform_driver mtk_smi_larb_driver = { .probe = mtk_smi_larb_probe, - .remove = mtk_smi_larb_remove, + .remove = mtk_smi_larb_remove, .driver = { .name = "mtk-smi-larb", .of_match_table = mtk_smi_larb_of_ids, } }; +static const struct of_device_id mtk_smi_common_of_ids[] = { + { + .compatible = "mediatek,mt8173-smi-common", + .data = (void *)MTK_SMI_GEN2 + }, + { + .compatible = "mediatek,mt2701-smi-common", + .data = (void *)MTK_SMI_GEN1 + }, + {} +}; + static int mtk_smi_common_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_smi *common; + struct resource *res; + const struct of_device_id *of_id; + enum mtk_smi_gen smi_gen; if (!dev->pm_domain) return -EPROBE_DEFER; @@ -226,6 +338,29 @@ static int mtk_smi_common_probe(struct platform_device *pdev) if (IS_ERR(common->clk_smi)) return PTR_ERR(common->clk_smi); + of_id = of_match_node(mtk_smi_common_of_ids, pdev->dev.of_node); + if (!of_id) + return -EINVAL; + + /* + * for mtk smi gen 1, we need to get the ao(always on) base to config + * m4u port, and we need to enable the aync clock for transform the smi + * clock into emi clock domain, but for mtk smi gen2, there's no smi ao + * base. + */ + smi_gen = (enum mtk_smi_gen)of_id->data; + if (smi_gen == MTK_SMI_GEN1) { + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + common->smi_ao_base = devm_ioremap_resource(dev, res); + if (IS_ERR(common->smi_ao_base)) + return PTR_ERR(common->smi_ao_base); + + common->clk_async = devm_clk_get(dev, "async"); + if (IS_ERR(common->clk_async)) + return PTR_ERR(common->clk_async); + + clk_prepare_enable(common->clk_async); + } pm_runtime_enable(dev); platform_set_drvdata(pdev, common); return 0; @@ -237,11 +372,6 @@ static int mtk_smi_common_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id mtk_smi_common_of_ids[] = { - { .compatible = "mediatek,mt8173-smi-common", }, - {} -}; - static struct platform_driver mtk_smi_common_driver = { .probe = mtk_smi_common_probe, .remove = mtk_smi_common_remove, @@ -272,4 +402,5 @@ err_unreg_smi: platform_driver_unregister(&mtk_smi_common_driver); return ret; } + subsys_initcall(mtk_smi_init); -- cgit v0.10.2 From b17336c55d8928c4c693d3feb6245508e562aab5 Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Wed, 8 Jun 2016 17:51:00 +0800 Subject: iommu/mediatek: add support for mtk iommu generation one HW Mediatek SoC's M4U has two generations of HW architcture. Generation one uses flat, one layer pagetable, and was shipped with ARM architecture, it only supports 4K size page mapping. MT2701 SoC uses this generation one m4u HW. Generation two uses the ARM short-descriptor translation table format for address translation, and was shipped with ARM64 architecture, MT8173 uses this generation two m4u HW. All the two generation iommu HW only have one iommu domain, and all its iommu clients share the same iova address. These two generation m4u HW have slit different register groups and register offset, but most register names are the same. This patch add iommu support for mediatek SoC mt2701. Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ad08603..32bb1e5 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -343,4 +343,22 @@ config MTK_IOMMU If unsure, say N here. +config MTK_IOMMU_V1 + bool "MTK IOMMU Version 1 (M4U gen1) Support" + depends on ARM + depends on ARCH_MEDIATEK || COMPILE_TEST + select ARM_DMA_USE_IOMMU + select IOMMU_API + select MEMORY + select MTK_SMI + select COMMON_CLK_MT2701_MMSYS + select COMMON_CLK_MT2701_IMGSYS + select COMMON_CLK_MT2701_VDECSYS + help + Support for the M4U on certain Mediatek SoCs. M4U generation 1 HW is + Multimedia Memory Managememt Unit. This option enables remapping of + DMA memory accesses for the multimedia subsystem. + + if unsure, say N here. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6edb31..778baf5 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o +obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c new file mode 100644 index 0000000..294485d --- /dev/null +++ b/drivers/iommu/mtk_iommu_v1.c @@ -0,0 +1,727 @@ +/* + * Copyright (c) 2015-2016 MediaTek Inc. + * Author: Honghui Zhang + * + * Based on driver/iommu/mtk_iommu.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mtk_iommu.h" + +#define REG_MMU_PT_BASE_ADDR 0x000 + +#define F_ALL_INVLD 0x2 +#define F_MMU_INV_RANGE 0x1 +#define F_INVLD_EN0 BIT(0) +#define F_INVLD_EN1 BIT(1) + +#define F_MMU_FAULT_VA_MSK 0xfffff000 +#define MTK_PROTECT_PA_ALIGN 128 + +#define REG_MMU_CTRL_REG 0x210 +#define F_MMU_CTRL_COHERENT_EN BIT(8) +#define REG_MMU_IVRP_PADDR 0x214 +#define REG_MMU_INT_CONTROL 0x220 +#define F_INT_TRANSLATION_FAULT BIT(0) +#define F_INT_MAIN_MULTI_HIT_FAULT BIT(1) +#define F_INT_INVALID_PA_FAULT BIT(2) +#define F_INT_ENTRY_REPLACEMENT_FAULT BIT(3) +#define F_INT_TABLE_WALK_FAULT BIT(4) +#define F_INT_TLB_MISS_FAULT BIT(5) +#define F_INT_PFH_DMA_FIFO_OVERFLOW BIT(6) +#define F_INT_MISS_DMA_FIFO_OVERFLOW BIT(7) + +#define F_MMU_TF_PROTECT_SEL(prot) (((prot) & 0x3) << 5) +#define F_INT_CLR_BIT BIT(12) + +#define REG_MMU_FAULT_ST 0x224 +#define REG_MMU_FAULT_VA 0x228 +#define REG_MMU_INVLD_PA 0x22C +#define REG_MMU_INT_ID 0x388 +#define REG_MMU_INVALIDATE 0x5c0 +#define REG_MMU_INVLD_START_A 0x5c4 +#define REG_MMU_INVLD_END_A 0x5c8 + +#define REG_MMU_INV_SEL 0x5d8 +#define REG_MMU_STANDARD_AXI_MODE 0x5e8 + +#define REG_MMU_DCM 0x5f0 +#define F_MMU_DCM_ON BIT(1) +#define REG_MMU_CPE_DONE 0x60c +#define F_DESC_VALID 0x2 +#define F_DESC_NONSEC BIT(3) +#define MT2701_M4U_TF_LARB(TF) (6 - (((TF) >> 13) & 0x7)) +#define MT2701_M4U_TF_PORT(TF) (((TF) >> 8) & 0xF) +/* MTK generation one iommu HW only support 4K size mapping */ +#define MT2701_IOMMU_PAGE_SHIFT 12 +#define MT2701_IOMMU_PAGE_SIZE (1UL << MT2701_IOMMU_PAGE_SHIFT) + +/* + * MTK m4u support 4GB iova address space, and only support 4K page + * mapping. So the pagetable size should be exactly as 4M. + */ +#define M2701_IOMMU_PGT_SIZE SZ_4M + +struct mtk_iommu_domain { + spinlock_t pgtlock; /* lock for page table */ + struct iommu_domain domain; + u32 *pgt_va; + dma_addr_t pgt_pa; + struct mtk_iommu_data *data; +}; + +static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct mtk_iommu_domain, domain); +} + +static const int mt2701_m4u_in_larb[] = { + LARB0_PORT_OFFSET, LARB1_PORT_OFFSET, + LARB2_PORT_OFFSET, LARB3_PORT_OFFSET +}; + +static inline int mt2701_m4u_to_larb(int id) +{ + int i; + + for (i = ARRAY_SIZE(mt2701_m4u_in_larb) - 1; i >= 0; i--) + if ((id) >= mt2701_m4u_in_larb[i]) + return i; + + return 0; +} + +static inline int mt2701_m4u_to_port(int id) +{ + int larb = mt2701_m4u_to_larb(id); + + return id - mt2701_m4u_in_larb[larb]; +} + +static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) +{ + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + REG_MMU_INV_SEL); + writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); + wmb(); /* Make sure the tlb flush all done */ +} + +static void mtk_iommu_tlb_flush_range(struct mtk_iommu_data *data, + unsigned long iova, size_t size) +{ + int ret; + u32 tmp; + + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + REG_MMU_INV_SEL); + writel_relaxed(iova & F_MMU_FAULT_VA_MSK, + data->base + REG_MMU_INVLD_START_A); + writel_relaxed((iova + size - 1) & F_MMU_FAULT_VA_MSK, + data->base + REG_MMU_INVLD_END_A); + writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); + + ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, + tmp, tmp != 0, 10, 100000); + if (ret) { + dev_warn(data->dev, + "Partial TLB flush timed out, falling back to full flush\n"); + mtk_iommu_tlb_flush_all(data); + } + /* Clear the CPE status */ + writel_relaxed(0, data->base + REG_MMU_CPE_DONE); +} + +static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) +{ + struct mtk_iommu_data *data = dev_id; + struct mtk_iommu_domain *dom = data->m4u_dom; + u32 int_state, regval, fault_iova, fault_pa; + unsigned int fault_larb, fault_port; + + /* Read error information from registers */ + int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST); + fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA); + + fault_iova &= F_MMU_FAULT_VA_MSK; + fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA); + regval = readl_relaxed(data->base + REG_MMU_INT_ID); + fault_larb = MT2701_M4U_TF_LARB(regval); + fault_port = MT2701_M4U_TF_PORT(regval); + + /* + * MTK v1 iommu HW could not determine whether the fault is read or + * write fault, report as read fault. + */ + if (report_iommu_fault(&dom->domain, data->dev, fault_iova, + IOMMU_FAULT_READ)) + dev_err_ratelimited(data->dev, + "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d\n", + int_state, fault_iova, fault_pa, + fault_larb, fault_port); + + /* Interrupt clear */ + regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL); + regval |= F_INT_CLR_BIT; + writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL); + + mtk_iommu_tlb_flush_all(data); + + return IRQ_HANDLED; +} + +static void mtk_iommu_config(struct mtk_iommu_data *data, + struct device *dev, bool enable) +{ + struct mtk_iommu_client_priv *head, *cur, *next; + struct mtk_smi_larb_iommu *larb_mmu; + unsigned int larbid, portid; + + head = dev->archdata.iommu; + list_for_each_entry_safe(cur, next, &head->client, client) { + larbid = mt2701_m4u_to_larb(cur->mtk_m4u_id); + portid = mt2701_m4u_to_port(cur->mtk_m4u_id); + larb_mmu = &data->smi_imu.larb_imu[larbid]; + + dev_dbg(dev, "%s iommu port: %d\n", + enable ? "enable" : "disable", portid); + + if (enable) + larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); + else + larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); + } +} + +static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) +{ + struct mtk_iommu_domain *dom = data->m4u_dom; + + spin_lock_init(&dom->pgtlock); + + dom->pgt_va = dma_zalloc_coherent(data->dev, + M2701_IOMMU_PGT_SIZE, + &dom->pgt_pa, GFP_KERNEL); + if (!dom->pgt_va) + return -ENOMEM; + + writel(dom->pgt_pa, data->base + REG_MMU_PT_BASE_ADDR); + + dom->data = data; + + return 0; +} + +static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) +{ + struct mtk_iommu_domain *dom; + + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + dom = kzalloc(sizeof(*dom), GFP_KERNEL); + if (!dom) + return NULL; + + return &dom->domain; +} + +static void mtk_iommu_domain_free(struct iommu_domain *domain) +{ + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_data *data = dom->data; + + dma_free_coherent(data->dev, M2701_IOMMU_PGT_SIZE, + dom->pgt_va, dom->pgt_pa); + kfree(to_mtk_domain(domain)); +} + +static int mtk_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_client_priv *priv = dev->archdata.iommu; + struct mtk_iommu_data *data; + int ret; + + if (!priv) + return -ENODEV; + + data = dev_get_drvdata(priv->m4udev); + if (!data->m4u_dom) { + data->m4u_dom = dom; + ret = mtk_iommu_domain_finalise(data); + if (ret) { + data->m4u_dom = NULL; + return ret; + } + } + + mtk_iommu_config(data, dev, true); + return 0; +} + +static void mtk_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct mtk_iommu_client_priv *priv = dev->archdata.iommu; + struct mtk_iommu_data *data; + + if (!priv) + return; + + data = dev_get_drvdata(priv->m4udev); + mtk_iommu_config(data, dev, false); +} + +static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; + unsigned long flags; + unsigned int i; + u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); + u32 pabase = (u32)paddr; + int map_size = 0; + + spin_lock_irqsave(&dom->pgtlock, flags); + for (i = 0; i < page_num; i++) { + if (pgt_base_iova[i]) { + memset(pgt_base_iova, 0, i * sizeof(u32)); + break; + } + pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC; + pabase += MT2701_IOMMU_PAGE_SIZE; + map_size += MT2701_IOMMU_PAGE_SIZE; + } + + spin_unlock_irqrestore(&dom->pgtlock, flags); + + mtk_iommu_tlb_flush_range(dom->data, iova, size); + + return map_size == size ? 0 : -EEXIST; +} + +static size_t mtk_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + unsigned long flags; + u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); + unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; + + spin_lock_irqsave(&dom->pgtlock, flags); + memset(pgt_base_iova, 0, page_num * sizeof(u32)); + spin_unlock_irqrestore(&dom->pgtlock, flags); + + mtk_iommu_tlb_flush_range(dom->data, iova, size); + + return size; +} + +static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + unsigned long flags; + phys_addr_t pa; + + spin_lock_irqsave(&dom->pgtlock, flags); + pa = *(dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT)); + pa = pa & (~(MT2701_IOMMU_PAGE_SIZE - 1)); + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return pa; +} + +/* + * MTK generation one iommu HW only support one iommu domain, and all the client + * sharing the same iova address space. + */ +static int mtk_iommu_create_mapping(struct device *dev, + struct of_phandle_args *args) +{ + struct mtk_iommu_client_priv *head, *priv, *next; + struct platform_device *m4updev; + struct dma_iommu_mapping *mtk_mapping; + struct device *m4udev; + int ret; + + if (args->args_count != 1) { + dev_err(dev, "invalid #iommu-cells(%d) property for IOMMU\n", + args->args_count); + return -EINVAL; + } + + if (!dev->archdata.iommu) { + /* Get the m4u device */ + m4updev = of_find_device_by_node(args->np); + if (WARN_ON(!m4updev)) + return -EINVAL; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOMEM; + + dev->archdata.iommu = head; + INIT_LIST_HEAD(&head->client); + head->m4udev = &m4updev->dev; + } else { + head = dev->archdata.iommu; + } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto err_free_mem; + } + priv->mtk_m4u_id = args->args[0]; + list_add_tail(&priv->client, &head->client); + + m4udev = head->m4udev; + mtk_mapping = m4udev->archdata.iommu; + if (!mtk_mapping) { + /* MTK iommu support 4GB iova address space. */ + mtk_mapping = arm_iommu_create_mapping(&platform_bus_type, + 0, 1ULL << 32); + if (IS_ERR(mtk_mapping)) { + ret = PTR_ERR(mtk_mapping); + goto err_free_mem; + } + m4udev->archdata.iommu = mtk_mapping; + } + + ret = arm_iommu_attach_device(dev, mtk_mapping); + if (ret) + goto err_release_mapping; + + return 0; + +err_release_mapping: + arm_iommu_release_mapping(mtk_mapping); + m4udev->archdata.iommu = NULL; +err_free_mem: + list_for_each_entry_safe(priv, next, &head->client, client) + kfree(priv); + kfree(head); + dev->archdata.iommu = NULL; + return ret; +} + +static int mtk_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + struct of_phandle_args iommu_spec; + struct of_phandle_iterator it; + int err; + + of_for_each_phandle(&it, err, dev->of_node, "iommus", + "#iommu-cells", 0) { + int count = of_phandle_iterator_args(&it, iommu_spec.args, + MAX_PHANDLE_ARGS); + iommu_spec.np = of_node_get(it.node); + iommu_spec.args_count = count; + + mtk_iommu_create_mapping(dev, &iommu_spec); + of_node_put(iommu_spec.np); + } + + if (!dev->archdata.iommu) /* Not a iommu client device */ + return -ENODEV; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; +} + +static void mtk_iommu_remove_device(struct device *dev) +{ + struct mtk_iommu_client_priv *head, *cur, *next; + + head = dev->archdata.iommu; + if (!head) + return; + + list_for_each_entry_safe(cur, next, &head->client, client) { + list_del(&cur->client); + kfree(cur); + } + kfree(head); + dev->archdata.iommu = NULL; + + iommu_group_remove_device(dev); +} + +static struct iommu_group *mtk_iommu_device_group(struct device *dev) +{ + struct mtk_iommu_data *data; + struct mtk_iommu_client_priv *priv; + + priv = dev->archdata.iommu; + if (!priv) + return ERR_PTR(-ENODEV); + + /* All the client devices are in the same m4u iommu-group */ + data = dev_get_drvdata(priv->m4udev); + if (!data->m4u_group) { + data->m4u_group = iommu_group_alloc(); + if (IS_ERR(data->m4u_group)) + dev_err(dev, "Failed to allocate M4U IOMMU group\n"); + } + return data->m4u_group; +} + +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) +{ + u32 regval; + int ret; + + ret = clk_prepare_enable(data->bclk); + if (ret) { + dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret); + return ret; + } + + regval = F_MMU_CTRL_COHERENT_EN | F_MMU_TF_PROTECT_SEL(2); + writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); + + regval = F_INT_TRANSLATION_FAULT | + F_INT_MAIN_MULTI_HIT_FAULT | + F_INT_INVALID_PA_FAULT | + F_INT_ENTRY_REPLACEMENT_FAULT | + F_INT_TABLE_WALK_FAULT | + F_INT_TLB_MISS_FAULT | + F_INT_PFH_DMA_FIFO_OVERFLOW | + F_INT_MISS_DMA_FIFO_OVERFLOW; + writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL); + + /* protect memory,hw will write here while translation fault */ + writel_relaxed(data->protect_base, + data->base + REG_MMU_IVRP_PADDR); + + writel_relaxed(F_MMU_DCM_ON, data->base + REG_MMU_DCM); + + if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, + dev_name(data->dev), (void *)data)) { + writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR); + clk_disable_unprepare(data->bclk); + dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq); + return -ENODEV; + } + + return 0; +} + +static struct iommu_ops mtk_iommu_ops = { + .domain_alloc = mtk_iommu_domain_alloc, + .domain_free = mtk_iommu_domain_free, + .attach_dev = mtk_iommu_attach_device, + .detach_dev = mtk_iommu_detach_device, + .map = mtk_iommu_map, + .unmap = mtk_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = mtk_iommu_iova_to_phys, + .add_device = mtk_iommu_add_device, + .remove_device = mtk_iommu_remove_device, + .device_group = mtk_iommu_device_group, + .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, +}; + +static const struct of_device_id mtk_iommu_of_ids[] = { + { .compatible = "mediatek,mt2701-m4u", }, + {} +}; + +static const struct component_master_ops mtk_iommu_com_ops = { + .bind = mtk_iommu_bind, + .unbind = mtk_iommu_unbind, +}; + +static int mtk_iommu_probe(struct platform_device *pdev) +{ + struct mtk_iommu_data *data; + struct device *dev = &pdev->dev; + struct resource *res; + struct component_match *match = NULL; + struct of_phandle_args larb_spec; + struct of_phandle_iterator it; + void *protect; + int larb_nr, ret, err; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->dev = dev; + + /* Protect memory. HW will access here while translation fault.*/ + protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, + GFP_KERNEL | GFP_DMA); + if (!protect) + return -ENOMEM; + data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->base = devm_ioremap_resource(dev, res); + if (IS_ERR(data->base)) + return PTR_ERR(data->base); + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + data->bclk = devm_clk_get(dev, "bclk"); + if (IS_ERR(data->bclk)) + return PTR_ERR(data->bclk); + + larb_nr = 0; + of_for_each_phandle(&it, err, dev->of_node, + "mediatek,larbs", NULL, 0) { + struct platform_device *plarbdev; + int count = of_phandle_iterator_args(&it, larb_spec.args, + MAX_PHANDLE_ARGS); + + if (count) + continue; + + larb_spec.np = of_node_get(it.node); + if (!of_device_is_available(larb_spec.np)) + continue; + + plarbdev = of_find_device_by_node(larb_spec.np); + of_node_put(larb_spec.np); + if (!plarbdev) { + plarbdev = of_platform_device_create( + larb_spec.np, NULL, + platform_bus_type.dev_root); + if (!plarbdev) + return -EPROBE_DEFER; + } + + data->smi_imu.larb_imu[larb_nr].dev = &plarbdev->dev; + component_match_add(dev, &match, compare_of, larb_spec.np); + larb_nr++; + } + + data->smi_imu.larb_nr = larb_nr; + + platform_set_drvdata(pdev, data); + + ret = mtk_iommu_hw_init(data); + if (ret) + return ret; + + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + + return component_master_add_with_match(dev, &mtk_iommu_com_ops, match); +} + +static int mtk_iommu_remove(struct platform_device *pdev) +{ + struct mtk_iommu_data *data = platform_get_drvdata(pdev); + + if (iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, NULL); + + clk_disable_unprepare(data->bclk); + devm_free_irq(&pdev->dev, data->irq, data); + component_master_del(&pdev->dev, &mtk_iommu_com_ops); + return 0; +} + +static int __maybe_unused mtk_iommu_suspend(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + struct mtk_iommu_suspend_reg *reg = &data->reg; + void __iomem *base = data->base; + + reg->standard_axi_mode = readl_relaxed(base + + REG_MMU_STANDARD_AXI_MODE); + reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM); + reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); + reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL); + return 0; +} + +static int __maybe_unused mtk_iommu_resume(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + struct mtk_iommu_suspend_reg *reg = &data->reg; + void __iomem *base = data->base; + + writel_relaxed(data->m4u_dom->pgt_pa, base + REG_MMU_PT_BASE_ADDR); + writel_relaxed(reg->standard_axi_mode, + base + REG_MMU_STANDARD_AXI_MODE); + writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM); + writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG); + writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL); + writel_relaxed(data->protect_base, base + REG_MMU_IVRP_PADDR); + return 0; +} + +const struct dev_pm_ops mtk_iommu_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) +}; + +static struct platform_driver mtk_iommu_driver = { + .probe = mtk_iommu_probe, + .remove = mtk_iommu_remove, + .driver = { + .name = "mtk-iommu", + .of_match_table = mtk_iommu_of_ids, + .pm = &mtk_iommu_pm_ops, + } +}; + +static int __init m4u_init(void) +{ + return platform_driver_register(&mtk_iommu_driver); +} + +static void __exit m4u_exit(void) +{ + return platform_driver_unregister(&mtk_iommu_driver); +} + +subsys_initcall(m4u_init); +module_exit(m4u_exit); + +MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations"); +MODULE_AUTHOR("Honghui Zhang "); +MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From 131bc8ebb46a84f93d90a6d95347cfb9402f58a3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 21 Jun 2016 11:52:13 +0200 Subject: iommu/mediatek: Make mtk_iommu_pm_ops static The symbol exists elsewhere already, so that is fails to link if the symbol is non-static. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 294485d..b8aeb07 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -695,7 +695,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) return 0; } -const struct dev_pm_ops mtk_iommu_pm_ops = { +static const struct dev_pm_ops mtk_iommu_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) }; -- cgit v0.10.2 From 6ae5343c26f9cba5e9ef8ba6f23b5bb255ebc798 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 8 Jun 2016 19:31:10 +0100 Subject: iommu/exynos: update to use iommu big-endian Add initial support for big endian by always writing the pte in le32. Note, revisit if hardware capable of doing big endian fetches. Signed-off-by: Ben Dooks Acked-by: Marek Szyprowski Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 633e6d0..33dcc29 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -54,6 +54,10 @@ typedef u32 sysmmu_pte_t; #define lv2ent_small(pent) ((*(pent) & 2) == 2) #define lv2ent_large(pent) ((*(pent) & 3) == 1) +#ifdef CONFIG_BIG_ENDIAN +#warning "revisit driver if we can enable big-endian ptes" +#endif + /* * v1.x - v3.x SYSMMU supports 32bit physical and 32bit virtual address spaces * v5.0 introduced support for 36bit physical address space by shifting @@ -710,7 +714,7 @@ static inline void update_pte(sysmmu_pte_t *ent, sysmmu_pte_t val) { dma_sync_single_for_cpu(dma_dev, virt_to_phys(ent), sizeof(*ent), DMA_TO_DEVICE); - *ent = val; + *ent = cpu_to_le32(val); dma_sync_single_for_device(dma_dev, virt_to_phys(ent), sizeof(*ent), DMA_TO_DEVICE); } -- cgit v0.10.2 From 109bd48ea2e1fb4e924712018397a51c7b2aaadd Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 13 Jun 2016 17:06:02 +0530 Subject: iommu/msm: Add DT adaptation The driver currently works based on platform data. Remove this and add support for DT. A single master can have multiple ports connected to more than one iommu. master | | | ------------------------ | | IOMMU0 IOMMU1 | | ctx0 ctx1 ctx0 ctx1 This association of master and iommus/contexts were previously represented by platform data parent/child device details. The client drivers were responsible for programming all of the iommus/contexts for the device. Now while adapting to generic DT bindings we maintain the list of iommus, contexts that each master domain is connected to and program all of them on attach/detach. Signed-off-by: Sricharan R Tested-by: Archit Taneja Tested-by: Srinivas Kandagatla Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index e321fa5..bc1a4e3 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -48,6 +48,7 @@ __asm__ __volatile__ ( \ static int msm_iommu_tex_class[4]; DEFINE_SPINLOCK(msm_iommu_lock); +static LIST_HEAD(qcom_iommu_devices); struct msm_priv { unsigned long *pgtable; @@ -60,35 +61,37 @@ static struct msm_priv *to_msm_priv(struct iommu_domain *dom) return container_of(dom, struct msm_priv, domain); } -static int __enable_clocks(struct msm_iommu_drvdata *drvdata) +static int __enable_clocks(struct msm_iommu_dev *iommu) { int ret; - ret = clk_enable(drvdata->pclk); + ret = clk_enable(iommu->pclk); if (ret) goto fail; - if (drvdata->clk) { - ret = clk_enable(drvdata->clk); + if (iommu->clk) { + ret = clk_enable(iommu->clk); if (ret) - clk_disable(drvdata->pclk); + clk_disable(iommu->pclk); } fail: return ret; } -static void __disable_clocks(struct msm_iommu_drvdata *drvdata) +static void __disable_clocks(struct msm_iommu_dev *iommu) { - clk_disable(drvdata->clk); - clk_disable(drvdata->pclk); + if (iommu->clk) + clk_disable(iommu->clk); + clk_disable(iommu->pclk); } static int __flush_iotlb(struct iommu_domain *domain) { struct msm_priv *priv = to_msm_priv(domain); - struct msm_iommu_drvdata *iommu_drvdata; - struct msm_iommu_ctx_drvdata *ctx_drvdata; + struct msm_iommu_dev *iommu = NULL; + struct msm_iommu_ctx_dev *master; int ret = 0; + #ifndef CONFIG_IOMMU_PGTABLES_L2 unsigned long *fl_table = priv->pgtable; int i; @@ -105,24 +108,67 @@ static int __flush_iotlb(struct iommu_domain *domain) } #endif - list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) { - - BUG_ON(!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent); - - iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent); - BUG_ON(!iommu_drvdata); - - ret = __enable_clocks(iommu_drvdata); + list_for_each_entry(iommu, &priv->list_attached, dom_node) { + ret = __enable_clocks(iommu); if (ret) goto fail; - SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0); - __disable_clocks(iommu_drvdata); + list_for_each_entry(master, &iommu->ctx_list, list) + SET_CTX_TLBIALL(iommu->base, master->num, 0); + + __disable_clocks(iommu); } fail: return ret; } +static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end) +{ + int idx; + + do { + idx = find_next_zero_bit(map, end, start); + if (idx == end) + return -ENOSPC; + } while (test_and_set_bit(idx, map)); + + return idx; +} + +static void msm_iommu_free_ctx(unsigned long *map, int idx) +{ + clear_bit(idx, map); +} + +static void config_mids(struct msm_iommu_dev *iommu, + struct msm_iommu_ctx_dev *master) +{ + int mid, ctx, i; + + for (i = 0; i < master->num_mids; i++) { + mid = master->mids[i]; + ctx = master->num; + + SET_M2VCBR_N(iommu->base, mid, 0); + SET_CBACR_N(iommu->base, ctx, 0); + + /* Set VMID = 0 */ + SET_VMID(iommu->base, mid, 0); + + /* Set the context number for that MID to this context */ + SET_CBNDX(iommu->base, mid, ctx); + + /* Set MID associated with this context bank to 0*/ + SET_CBVMID(iommu->base, ctx, 0); + + /* Set the ASID for TLB tagging for this context */ + SET_CONTEXTIDR_ASID(iommu->base, ctx, ctx); + + /* Set security bit override to be Non-secure */ + SET_NSCFG(iommu->base, mid, 3); + } +} + static void __reset_context(void __iomem *base, int ctx) { SET_BPRCOSH(base, ctx, 0); @@ -272,94 +318,76 @@ static void msm_iommu_domain_free(struct iommu_domain *domain) static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { - struct msm_priv *priv; - struct msm_iommu_ctx_dev *ctx_dev; - struct msm_iommu_drvdata *iommu_drvdata; - struct msm_iommu_ctx_drvdata *ctx_drvdata; - struct msm_iommu_ctx_drvdata *tmp_drvdata; int ret = 0; unsigned long flags; + struct msm_iommu_dev *iommu; + struct msm_priv *priv = to_msm_priv(domain); + struct msm_iommu_ctx_dev *master; spin_lock_irqsave(&msm_iommu_lock, flags); - - priv = to_msm_priv(domain); - - if (!dev) { - ret = -EINVAL; - goto fail; - } - - iommu_drvdata = dev_get_drvdata(dev->parent); - ctx_drvdata = dev_get_drvdata(dev); - ctx_dev = dev->platform_data; - - if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) { - ret = -EINVAL; - goto fail; - } - - if (!list_empty(&ctx_drvdata->attached_elm)) { - ret = -EBUSY; - goto fail; - } - - list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm) - if (tmp_drvdata == ctx_drvdata) { - ret = -EBUSY; - goto fail; + list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) { + master = list_first_entry(&iommu->ctx_list, + struct msm_iommu_ctx_dev, + list); + if (master->of_node == dev->of_node) { + ret = __enable_clocks(iommu); + if (ret) + goto fail; + + list_for_each_entry(master, &iommu->ctx_list, list) { + if (master->num) { + dev_err(dev, "domain already attached"); + ret = -EEXIST; + goto fail; + } + master->num = + msm_iommu_alloc_ctx(iommu->context_map, + 0, iommu->ncb); + if (IS_ERR_VALUE(master->num)) { + ret = -ENODEV; + goto fail; + } + config_mids(iommu, master); + __program_context(iommu->base, master->num, + __pa(priv->pgtable)); + } + __disable_clocks(iommu); + list_add(&iommu->dom_node, &priv->list_attached); } + } - ret = __enable_clocks(iommu_drvdata); - if (ret) - goto fail; - - __program_context(iommu_drvdata->base, ctx_dev->num, - __pa(priv->pgtable)); - - __disable_clocks(iommu_drvdata); - list_add(&(ctx_drvdata->attached_elm), &priv->list_attached); ret = __flush_iotlb(domain); - fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); + return ret; } static void msm_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) { - struct msm_priv *priv; - struct msm_iommu_ctx_dev *ctx_dev; - struct msm_iommu_drvdata *iommu_drvdata; - struct msm_iommu_ctx_drvdata *ctx_drvdata; + struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; + struct msm_iommu_dev *iommu; + struct msm_iommu_ctx_dev *master; int ret; spin_lock_irqsave(&msm_iommu_lock, flags); - priv = to_msm_priv(domain); - - if (!dev) - goto fail; - - iommu_drvdata = dev_get_drvdata(dev->parent); - ctx_drvdata = dev_get_drvdata(dev); - ctx_dev = dev->platform_data; - - if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) - goto fail; - ret = __flush_iotlb(domain); if (ret) goto fail; - ret = __enable_clocks(iommu_drvdata); - if (ret) - goto fail; - - __reset_context(iommu_drvdata->base, ctx_dev->num); - __disable_clocks(iommu_drvdata); - list_del_init(&ctx_drvdata->attached_elm); + list_for_each_entry(iommu, &priv->list_attached, dom_node) { + ret = __enable_clocks(iommu); + if (ret) + goto fail; + list_for_each_entry(master, &iommu->ctx_list, list) { + msm_iommu_free_ctx(iommu->context_map, master->num); + __reset_context(iommu->base, master->num); + } + __disable_clocks(iommu); + } fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); } @@ -555,47 +583,46 @@ static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t va) { struct msm_priv *priv; - struct msm_iommu_drvdata *iommu_drvdata; - struct msm_iommu_ctx_drvdata *ctx_drvdata; + struct msm_iommu_dev *iommu; + struct msm_iommu_ctx_dev *master; unsigned int par; unsigned long flags; - void __iomem *base; phys_addr_t ret = 0; - int ctx; spin_lock_irqsave(&msm_iommu_lock, flags); priv = to_msm_priv(domain); - if (list_empty(&priv->list_attached)) - goto fail; + iommu = list_first_entry(&priv->list_attached, + struct msm_iommu_dev, dom_node); - ctx_drvdata = list_entry(priv->list_attached.next, - struct msm_iommu_ctx_drvdata, attached_elm); - iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent); + if (list_empty(&iommu->ctx_list)) + goto fail; - base = iommu_drvdata->base; - ctx = ctx_drvdata->num; + master = list_first_entry(&iommu->ctx_list, + struct msm_iommu_ctx_dev, list); + if (!master) + goto fail; - ret = __enable_clocks(iommu_drvdata); + ret = __enable_clocks(iommu); if (ret) goto fail; /* Invalidate context TLB */ - SET_CTX_TLBIALL(base, ctx, 0); - SET_V2PPR(base, ctx, va & V2Pxx_VA); + SET_CTX_TLBIALL(iommu->base, master->num, 0); + SET_V2PPR(iommu->base, master->num, va & V2Pxx_VA); - par = GET_PAR(base, ctx); + par = GET_PAR(iommu->base, master->num); /* We are dealing with a supersection */ - if (GET_NOFAULT_SS(base, ctx)) + if (GET_NOFAULT_SS(iommu->base, master->num)) ret = (par & 0xFF000000) | (va & 0x00FFFFFF); else /* Upper 20 bits from PAR, lower 12 from VA */ ret = (par & 0xFFFFF000) | (va & 0x00000FFF); - if (GET_FAULT(base, ctx)) + if (GET_FAULT(iommu->base, master->num)) ret = 0; - __disable_clocks(iommu_drvdata); + __disable_clocks(iommu); fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); return ret; @@ -635,37 +662,34 @@ static void print_ctx_regs(void __iomem *base, int ctx) irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id) { - struct msm_iommu_drvdata *drvdata = dev_id; - void __iomem *base; + struct msm_iommu_dev *iommu = dev_id; unsigned int fsr; int i, ret; spin_lock(&msm_iommu_lock); - if (!drvdata) { + if (!iommu) { pr_err("Invalid device ID in context interrupt handler\n"); goto fail; } - base = drvdata->base; - pr_err("Unexpected IOMMU page fault!\n"); - pr_err("base = %08x\n", (unsigned int) base); + pr_err("base = %08x\n", (unsigned int)iommu->base); - ret = __enable_clocks(drvdata); + ret = __enable_clocks(iommu); if (ret) goto fail; - for (i = 0; i < drvdata->ncb; i++) { - fsr = GET_FSR(base, i); + for (i = 0; i < iommu->ncb; i++) { + fsr = GET_FSR(iommu->base, i); if (fsr) { pr_err("Fault occurred in context %d.\n", i); pr_err("Interesting registers:\n"); - print_ctx_regs(base, i); - SET_FSR(base, i, 0x4000000F); + print_ctx_regs(iommu->base, i); + SET_FSR(iommu->base, i, 0x4000000F); } } - __disable_clocks(drvdata); + __disable_clocks(iommu); fail: spin_unlock(&msm_iommu_lock); return 0; diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h index 5c7c955..4ca25d5 100644 --- a/drivers/iommu/msm_iommu.h +++ b/drivers/iommu/msm_iommu.h @@ -42,74 +42,53 @@ */ #define MAX_NUM_MIDS 32 +/* Maximum number of context banks that can be present in IOMMU */ +#define IOMMU_MAX_CBS 128 + /** * struct msm_iommu_dev - a single IOMMU hardware instance - * name Human-readable name given to this IOMMU HW instance * ncb Number of context banks present on this IOMMU HW instance + * dev: IOMMU device + * irq: Interrupt number + * clk: The bus clock for this IOMMU hardware instance + * pclk: The clock for the IOMMU bus interconnect + * dev_node: list head in qcom_iommu_device_list + * dom_node: list head for domain + * ctx_list: list of 'struct msm_iommu_ctx_dev' + * context_map: Bitmap to track allocated context banks */ struct msm_iommu_dev { - const char *name; + void __iomem *base; int ncb; + struct device *dev; + int irq; + struct clk *clk; + struct clk *pclk; + struct list_head dev_node; + struct list_head dom_node; + struct list_head ctx_list; + DECLARE_BITMAP(context_map, IOMMU_MAX_CBS); }; /** * struct msm_iommu_ctx_dev - an IOMMU context bank instance - * name Human-readable name given to this context bank + * of_node node ptr of client device * num Index of this context bank within the hardware * mids List of Machine IDs that are to be mapped into this context * bank, terminated by -1. The MID is a set of signals on the * AXI bus that identifies the function associated with a specific * memory request. (See ARM spec). + * num_mids Total number of mids + * node list head in ctx_list */ struct msm_iommu_ctx_dev { - const char *name; + struct device_node *of_node; int num; int mids[MAX_NUM_MIDS]; + int num_mids; + struct list_head list; }; - -/** - * struct msm_iommu_drvdata - A single IOMMU hardware instance - * @base: IOMMU config port base address (VA) - * @ncb The number of contexts on this IOMMU - * @irq: Interrupt number - * @clk: The bus clock for this IOMMU hardware instance - * @pclk: The clock for the IOMMU bus interconnect - * - * A msm_iommu_drvdata holds the global driver data about a single piece - * of an IOMMU hardware instance. - */ -struct msm_iommu_drvdata { - void __iomem *base; - int irq; - int ncb; - struct clk *clk; - struct clk *pclk; -}; - -/** - * struct msm_iommu_ctx_drvdata - an IOMMU context bank instance - * @num: Hardware context number of this context - * @pdev: Platform device associated wit this HW instance - * @attached_elm: List element for domains to track which devices are - * attached to them - * - * A msm_iommu_ctx_drvdata holds the driver data for a single context bank - * within each IOMMU hardware instance - */ -struct msm_iommu_ctx_drvdata { - int num; - struct platform_device *pdev; - struct list_head attached_elm; -}; - -/* - * Look up an IOMMU context device by its context name. NULL if none found. - * Useful for testing and drivers that do not yet fully have IOMMU stuff in - * their platform devices. - */ -struct device *msm_iommu_get_ctx(const char *ctx_name); - /* * Interrupt handler for the IOMMU context fault interrupt. Hooking the * interrupt is not supported in the API yet, but this will print an error diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c index 4b09e81..be01cc4 100644 --- a/drivers/iommu/msm_iommu_dev.c +++ b/drivers/iommu/msm_iommu_dev.c @@ -30,60 +30,6 @@ #include "msm_iommu_hw-8xxx.h" #include "msm_iommu.h" -struct iommu_ctx_iter_data { - /* input */ - const char *name; - - /* output */ - struct device *dev; -}; - -static struct platform_device *msm_iommu_root_dev; - -static int each_iommu_ctx(struct device *dev, void *data) -{ - struct iommu_ctx_iter_data *res = data; - struct msm_iommu_ctx_dev *c = dev->platform_data; - - if (!res || !c || !c->name || !res->name) - return -EINVAL; - - if (!strcmp(res->name, c->name)) { - res->dev = dev; - return 1; - } - return 0; -} - -static int each_iommu(struct device *dev, void *data) -{ - return device_for_each_child(dev, data, each_iommu_ctx); -} - -struct device *msm_iommu_get_ctx(const char *ctx_name) -{ - struct iommu_ctx_iter_data r; - int found; - - if (!msm_iommu_root_dev) { - pr_err("No root IOMMU device.\n"); - goto fail; - } - - r.name = ctx_name; - found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu); - - if (!found) { - pr_err("Could not find context <%s>\n", ctx_name); - goto fail; - } - - return r.dev; -fail: - return NULL; -} -EXPORT_SYMBOL(msm_iommu_get_ctx); - static void msm_iommu_reset(void __iomem *base, int ncb) { int ctx; @@ -128,237 +74,122 @@ static void msm_iommu_reset(void __iomem *base, int ncb) static int msm_iommu_probe(struct platform_device *pdev) { struct resource *r; - struct clk *iommu_clk; - struct clk *iommu_pclk; - struct msm_iommu_drvdata *drvdata; - struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev); - void __iomem *regs_base; - int ret, irq, par; + struct msm_iommu_dev *iommu; + int ret, par, val; - if (pdev->id == -1) { - msm_iommu_root_dev = pdev; - return 0; - } + iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return -ENODEV; - drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL); + iommu->dev = &pdev->dev; + INIT_LIST_HEAD(&iommu->ctx_list); - if (!drvdata) { - ret = -ENOMEM; - goto fail; + iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); + if (IS_ERR(iommu->pclk)) { + dev_err(iommu->dev, "could not get smmu_pclk\n"); + return PTR_ERR(iommu->pclk); } - if (!iommu_dev) { - ret = -ENODEV; - goto fail; + ret = clk_prepare(iommu->pclk); + if (ret) { + dev_err(iommu->dev, "could not prepare smmu_pclk\n"); + return ret; } - iommu_pclk = clk_get(NULL, "smmu_pclk"); - if (IS_ERR(iommu_pclk)) { - ret = -ENODEV; - goto fail; + iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); + if (IS_ERR(iommu->clk)) { + dev_err(iommu->dev, "could not get iommu_clk\n"); + clk_unprepare(iommu->pclk); + return PTR_ERR(iommu->clk); } - ret = clk_prepare_enable(iommu_pclk); - if (ret) - goto fail_enable; - - iommu_clk = clk_get(&pdev->dev, "iommu_clk"); - - if (!IS_ERR(iommu_clk)) { - if (clk_get_rate(iommu_clk) == 0) - clk_set_rate(iommu_clk, 1); - - ret = clk_prepare_enable(iommu_clk); - if (ret) { - clk_put(iommu_clk); - goto fail_pclk; - } - } else - iommu_clk = NULL; + ret = clk_prepare(iommu->clk); + if (ret) { + dev_err(iommu->dev, "could not prepare iommu_clk\n"); + clk_unprepare(iommu->pclk); + return ret; + } - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase"); - regs_base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(regs_base)) { - ret = PTR_ERR(regs_base); - goto fail_clk; + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iommu->base = devm_ioremap_resource(iommu->dev, r); + if (IS_ERR(iommu->base)) { + dev_err(iommu->dev, "could not get iommu base\n"); + ret = PTR_ERR(iommu->base); + goto fail; } - irq = platform_get_irq_byname(pdev, "secure_irq"); - if (irq < 0) { + iommu->irq = platform_get_irq(pdev, 0); + if (iommu->irq < 0) { + dev_err(iommu->dev, "could not get iommu irq\n"); ret = -ENODEV; - goto fail_clk; + goto fail; } - msm_iommu_reset(regs_base, iommu_dev->ncb); + ret = of_property_read_u32(iommu->dev->of_node, "ncb", &val); + if (ret) { + dev_err(iommu->dev, "could not get ncb\n"); + goto fail; + } + iommu->ncb = val; - SET_M(regs_base, 0, 1); - SET_PAR(regs_base, 0, 0); - SET_V2PCFG(regs_base, 0, 1); - SET_V2PPR(regs_base, 0, 0); - par = GET_PAR(regs_base, 0); - SET_V2PCFG(regs_base, 0, 0); - SET_M(regs_base, 0, 0); + msm_iommu_reset(iommu->base, iommu->ncb); + SET_M(iommu->base, 0, 1); + SET_PAR(iommu->base, 0, 0); + SET_V2PCFG(iommu->base, 0, 1); + SET_V2PPR(iommu->base, 0, 0); + par = GET_PAR(iommu->base, 0); + SET_V2PCFG(iommu->base, 0, 0); + SET_M(iommu->base, 0, 0); if (!par) { - pr_err("%s: Invalid PAR value detected\n", iommu_dev->name); + pr_err("Invalid PAR value detected\n"); ret = -ENODEV; - goto fail_clk; + goto fail; } - ret = request_irq(irq, msm_iommu_fault_handler, 0, - "msm_iommu_secure_irpt_handler", drvdata); + ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL, + msm_iommu_fault_handler, + IRQF_ONESHOT | IRQF_SHARED, + "msm_iommu_secure_irpt_handler", + iommu); if (ret) { - pr_err("Request IRQ %d failed with ret=%d\n", irq, ret); - goto fail_clk; + pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret); + goto fail; } + list_add(&iommu->dev_node, &qcom_iommu_devices); - drvdata->pclk = iommu_pclk; - drvdata->clk = iommu_clk; - drvdata->base = regs_base; - drvdata->irq = irq; - drvdata->ncb = iommu_dev->ncb; - - pr_info("device %s mapped at %p, irq %d with %d ctx banks\n", - iommu_dev->name, regs_base, irq, iommu_dev->ncb); - - platform_set_drvdata(pdev, drvdata); - - clk_disable(iommu_clk); - - clk_disable(iommu_pclk); - - return 0; -fail_clk: - if (iommu_clk) { - clk_disable(iommu_clk); - clk_put(iommu_clk); - } -fail_pclk: - clk_disable_unprepare(iommu_pclk); -fail_enable: - clk_put(iommu_pclk); + pr_info("device mapped at %p, irq %d with %d ctx banks\n", + iommu->base, iommu->irq, iommu->ncb); fail: - kfree(drvdata); + clk_unprepare(iommu->clk); + clk_unprepare(iommu->pclk); return ret; } +static const struct of_device_id msm_iommu_dt_match[] = { + { .compatible = "qcom,apq8064-iommu" }, + {} +}; + static int msm_iommu_remove(struct platform_device *pdev) { - struct msm_iommu_drvdata *drv = NULL; + struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); - drv = platform_get_drvdata(pdev); - if (drv) { - if (drv->clk) { - clk_unprepare(drv->clk); - clk_put(drv->clk); - } - clk_unprepare(drv->pclk); - clk_put(drv->pclk); - memset(drv, 0, sizeof(*drv)); - kfree(drv); - } - return 0; -} - -static int msm_iommu_ctx_probe(struct platform_device *pdev) -{ - struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev); - struct msm_iommu_drvdata *drvdata; - struct msm_iommu_ctx_drvdata *ctx_drvdata; - int i, ret; - - if (!c || !pdev->dev.parent) - return -EINVAL; - - drvdata = dev_get_drvdata(pdev->dev.parent); - if (!drvdata) - return -ENODEV; - - ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL); - if (!ctx_drvdata) - return -ENOMEM; - - ctx_drvdata->num = c->num; - ctx_drvdata->pdev = pdev; - - INIT_LIST_HEAD(&ctx_drvdata->attached_elm); - platform_set_drvdata(pdev, ctx_drvdata); - - ret = clk_prepare_enable(drvdata->pclk); - if (ret) - goto fail; - - if (drvdata->clk) { - ret = clk_prepare_enable(drvdata->clk); - if (ret) { - clk_disable_unprepare(drvdata->pclk); - goto fail; - } - } - - /* Program the M2V tables for this context */ - for (i = 0; i < MAX_NUM_MIDS; i++) { - int mid = c->mids[i]; - if (mid == -1) - break; - - SET_M2VCBR_N(drvdata->base, mid, 0); - SET_CBACR_N(drvdata->base, c->num, 0); - - /* Set VMID = 0 */ - SET_VMID(drvdata->base, mid, 0); - - /* Set the context number for that MID to this context */ - SET_CBNDX(drvdata->base, mid, c->num); - - /* Set MID associated with this context bank to 0*/ - SET_CBVMID(drvdata->base, c->num, 0); - - /* Set the ASID for TLB tagging for this context */ - SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num); - - /* Set security bit override to be Non-secure */ - SET_NSCFG(drvdata->base, mid, 3); - } - - clk_disable(drvdata->clk); - clk_disable(drvdata->pclk); - - dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num); - return 0; -fail: - kfree(ctx_drvdata); - return ret; -} - -static int msm_iommu_ctx_remove(struct platform_device *pdev) -{ - struct msm_iommu_ctx_drvdata *drv = NULL; - drv = platform_get_drvdata(pdev); - if (drv) { - memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata)); - kfree(drv); - } + clk_unprepare(iommu->clk); + clk_unprepare(iommu->pclk); return 0; } static struct platform_driver msm_iommu_driver = { .driver = { .name = "msm_iommu", + .of_match_table = msm_iommu_dt_match, }, .probe = msm_iommu_probe, .remove = msm_iommu_remove, }; -static struct platform_driver msm_iommu_ctx_driver = { - .driver = { - .name = "msm_iommu_ctx", - }, - .probe = msm_iommu_ctx_probe, - .remove = msm_iommu_ctx_remove, -}; - static struct platform_driver * const drivers[] = { &msm_iommu_driver, &msm_iommu_ctx_driver, -- cgit v0.10.2 From 00c698e09e6b8d1008d0686330c6ec3904ea6bb2 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 13 Jun 2016 17:06:03 +0530 Subject: documentation: iommu: Add bindings for msm,iommu-v0 ip The MSM IOMMU is an implementation compatible with the ARM VMSA short descriptor page tables. It provides address translation for bus masters outside of the CPU, each connected to the IOMMU through a port called micro-TLB. Adding the DT bindings for the same. Signed-off-by: Sricharan R Acked-by: Rob Herring Signed-off-by: Joerg Roedel diff --git a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt new file mode 100644 index 0000000..2023638 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt @@ -0,0 +1,64 @@ +* QCOM IOMMU + +The MSM IOMMU is an implementation compatible with the ARM VMSA short +descriptor page tables. It provides address translation for bus masters outside +of the CPU, each connected to the IOMMU through a port called micro-TLB. + +Required Properties: + + - compatible: Must contain "qcom,apq8064-iommu". + - reg: Base address and size of the IOMMU registers. + - interrupts: Specifiers for the MMU fault interrupts. For instances that + support secure mode two interrupts must be specified, for non-secure and + secure mode, in that order. For instances that don't support secure mode a + single interrupt must be specified. + - #iommu-cells: The number of cells needed to specify the stream id. This + is always 1. + - qcom,ncb: The total number of context banks in the IOMMU. + - clocks : List of clocks to be used during SMMU register access. See + Documentation/devicetree/bindings/clock/clock-bindings.txt + for information about the format. For each clock specified + here, there must be a corresponding entry in clock-names + (see below). + + - clock-names : List of clock names corresponding to the clocks specified in + the "clocks" property (above). + Should be "smmu_pclk" for specifying the interface clock + required for iommu's register accesses. + Should be "smmu_clk" for specifying the functional clock + required by iommu for bus accesses. + +Each bus master connected to an IOMMU must reference the IOMMU in its device +node with the following property: + + - iommus: A reference to the IOMMU in multiple cells. The first cell is a + phandle to the IOMMU and the second cell is the stream id. + A single master device can be connected to more than one iommu + and multiple contexts in each of the iommu. So multiple entries + are required to list all the iommus and the stream ids that the + master is connected to. + +Example: mdp iommu and its bus master + + mdp_port0: iommu@7500000 { + compatible = "qcom,apq8064-iommu"; + #iommu-cells = <1>; + clock-names = + "smmu_pclk", + "smmu_clk"; + clocks = + <&mmcc SMMU_AHB_CLK>, + <&mmcc MDP_AXI_CLK>; + reg = <0x07500000 0x100000>; + interrupts = + , + ; + qcom,ncb = <2>; + }; + + mdp: qcom,mdp@5100000 { + compatible = "qcom,mdp"; + ... + iommus = <&mdp_port0 0 + &mdp_port0 2>; + }; -- cgit v0.10.2 From f7f125ef0b0210a2eb269148df9a1d641521857b Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 13 Jun 2016 17:06:04 +0530 Subject: iommu/msm: Move the contents from msm_iommu_dev.c to msm_iommu.c There are only two functions left in msm_iommu_dev.c. Move it to msm_iommu.c and delete the file. Signed-off-by: Sricharan R Tested-by: Archit Taneja Tested-by: Srinivas Kandagatla Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6edb31..7fe479f 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o -obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o +obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm-smmu.o diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index bc1a4e3..792b352 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,47 @@ static void __disable_clocks(struct msm_iommu_dev *iommu) clk_disable(iommu->pclk); } +static void msm_iommu_reset(void __iomem *base, int ncb) +{ + int ctx; + + SET_RPUE(base, 0); + SET_RPUEIE(base, 0); + SET_ESRRESTORE(base, 0); + SET_TBE(base, 0); + SET_CR(base, 0); + SET_SPDMBE(base, 0); + SET_TESTBUSCR(base, 0); + SET_TLBRSW(base, 0); + SET_GLOBAL_TLBIALL(base, 0); + SET_RPU_ACR(base, 0); + SET_TLBLKCRWE(base, 1); + + for (ctx = 0; ctx < ncb; ctx++) { + SET_BPRCOSH(base, ctx, 0); + SET_BPRCISH(base, ctx, 0); + SET_BPRCNSH(base, ctx, 0); + SET_BPSHCFG(base, ctx, 0); + SET_BPMTCFG(base, ctx, 0); + SET_ACTLR(base, ctx, 0); + SET_SCTLR(base, ctx, 0); + SET_FSRRESTORE(base, ctx, 0); + SET_TTBR0(base, ctx, 0); + SET_TTBR1(base, ctx, 0); + SET_TTBCR(base, ctx, 0); + SET_BFBCR(base, ctx, 0); + SET_PAR(base, ctx, 0); + SET_FAR(base, ctx, 0); + SET_CTX_TLBIALL(base, ctx, 0); + SET_TLBFLPTER(base, ctx, 0); + SET_TLBSLPTER(base, ctx, 0); + SET_TLBLKCR(base, ctx, 0); + SET_PRRR(base, ctx, 0); + SET_NMRR(base, ctx, 0); + SET_CONTEXTIDR(base, ctx, 0); + } +} + static int __flush_iotlb(struct iommu_domain *domain) { struct msm_priv *priv = to_msm_priv(domain); @@ -708,6 +750,146 @@ static const struct iommu_ops msm_iommu_ops = { .pgsize_bitmap = MSM_IOMMU_PGSIZES, }; +static int msm_iommu_probe(struct platform_device *pdev) +{ + struct resource *r; + struct msm_iommu_dev *iommu; + int ret, par, val; + + iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return -ENODEV; + + iommu->dev = &pdev->dev; + INIT_LIST_HEAD(&iommu->ctx_list); + + iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); + if (IS_ERR(iommu->pclk)) { + dev_err(iommu->dev, "could not get smmu_pclk\n"); + return PTR_ERR(iommu->pclk); + } + + ret = clk_prepare(iommu->pclk); + if (ret) { + dev_err(iommu->dev, "could not prepare smmu_pclk\n"); + return ret; + } + + iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); + if (IS_ERR(iommu->clk)) { + dev_err(iommu->dev, "could not get iommu_clk\n"); + clk_unprepare(iommu->pclk); + return PTR_ERR(iommu->clk); + } + + ret = clk_prepare(iommu->clk); + if (ret) { + dev_err(iommu->dev, "could not prepare iommu_clk\n"); + clk_unprepare(iommu->pclk); + return ret; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iommu->base = devm_ioremap_resource(iommu->dev, r); + if (IS_ERR(iommu->base)) { + dev_err(iommu->dev, "could not get iommu base\n"); + ret = PTR_ERR(iommu->base); + goto fail; + } + + iommu->irq = platform_get_irq(pdev, 0); + if (iommu->irq < 0) { + dev_err(iommu->dev, "could not get iommu irq\n"); + ret = -ENODEV; + goto fail; + } + + ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val); + if (ret) { + dev_err(iommu->dev, "could not get ncb\n"); + goto fail; + } + iommu->ncb = val; + + msm_iommu_reset(iommu->base, iommu->ncb); + SET_M(iommu->base, 0, 1); + SET_PAR(iommu->base, 0, 0); + SET_V2PCFG(iommu->base, 0, 1); + SET_V2PPR(iommu->base, 0, 0); + par = GET_PAR(iommu->base, 0); + SET_V2PCFG(iommu->base, 0, 0); + SET_M(iommu->base, 0, 0); + + if (!par) { + pr_err("Invalid PAR value detected\n"); + ret = -ENODEV; + goto fail; + } + + ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL, + msm_iommu_fault_handler, + IRQF_ONESHOT | IRQF_SHARED, + "msm_iommu_secure_irpt_handler", + iommu); + if (ret) { + pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret); + goto fail; + } + + list_add(&iommu->dev_node, &qcom_iommu_devices); + + pr_info("device mapped at %p, irq %d with %d ctx banks\n", + iommu->base, iommu->irq, iommu->ncb); + + return ret; +fail: + clk_unprepare(iommu->clk); + clk_unprepare(iommu->pclk); + return ret; +} + +static const struct of_device_id msm_iommu_dt_match[] = { + { .compatible = "qcom,apq8064-iommu" }, + {} +}; + +static int msm_iommu_remove(struct platform_device *pdev) +{ + struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); + + clk_unprepare(iommu->clk); + clk_unprepare(iommu->pclk); + return 0; +} + +static struct platform_driver msm_iommu_driver = { + .driver = { + .name = "msm_iommu", + .of_match_table = msm_iommu_dt_match, + }, + .probe = msm_iommu_probe, + .remove = msm_iommu_remove, +}; + +static int __init msm_iommu_driver_init(void) +{ + int ret; + + ret = platform_driver_register(&msm_iommu_driver); + if (ret != 0) + pr_err("Failed to register IOMMU driver\n"); + + return ret; +} + +static void __exit msm_iommu_driver_exit(void) +{ + platform_driver_unregister(&msm_iommu_driver); +} + +subsys_initcall(msm_iommu_driver_init); +module_exit(msm_iommu_driver_exit); + static int __init get_tex_class(int icp, int ocp, int mt, int nos) { int i = 0; diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c deleted file mode 100644 index be01cc4..0000000 --- a/drivers/iommu/msm_iommu_dev.c +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "msm_iommu_hw-8xxx.h" -#include "msm_iommu.h" - -static void msm_iommu_reset(void __iomem *base, int ncb) -{ - int ctx; - - SET_RPUE(base, 0); - SET_RPUEIE(base, 0); - SET_ESRRESTORE(base, 0); - SET_TBE(base, 0); - SET_CR(base, 0); - SET_SPDMBE(base, 0); - SET_TESTBUSCR(base, 0); - SET_TLBRSW(base, 0); - SET_GLOBAL_TLBIALL(base, 0); - SET_RPU_ACR(base, 0); - SET_TLBLKCRWE(base, 1); - - for (ctx = 0; ctx < ncb; ctx++) { - SET_BPRCOSH(base, ctx, 0); - SET_BPRCISH(base, ctx, 0); - SET_BPRCNSH(base, ctx, 0); - SET_BPSHCFG(base, ctx, 0); - SET_BPMTCFG(base, ctx, 0); - SET_ACTLR(base, ctx, 0); - SET_SCTLR(base, ctx, 0); - SET_FSRRESTORE(base, ctx, 0); - SET_TTBR0(base, ctx, 0); - SET_TTBR1(base, ctx, 0); - SET_TTBCR(base, ctx, 0); - SET_BFBCR(base, ctx, 0); - SET_PAR(base, ctx, 0); - SET_FAR(base, ctx, 0); - SET_CTX_TLBIALL(base, ctx, 0); - SET_TLBFLPTER(base, ctx, 0); - SET_TLBSLPTER(base, ctx, 0); - SET_TLBLKCR(base, ctx, 0); - SET_PRRR(base, ctx, 0); - SET_NMRR(base, ctx, 0); - SET_CONTEXTIDR(base, ctx, 0); - } -} - -static int msm_iommu_probe(struct platform_device *pdev) -{ - struct resource *r; - struct msm_iommu_dev *iommu; - int ret, par, val; - - iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return -ENODEV; - - iommu->dev = &pdev->dev; - INIT_LIST_HEAD(&iommu->ctx_list); - - iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); - if (IS_ERR(iommu->pclk)) { - dev_err(iommu->dev, "could not get smmu_pclk\n"); - return PTR_ERR(iommu->pclk); - } - - ret = clk_prepare(iommu->pclk); - if (ret) { - dev_err(iommu->dev, "could not prepare smmu_pclk\n"); - return ret; - } - - iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); - if (IS_ERR(iommu->clk)) { - dev_err(iommu->dev, "could not get iommu_clk\n"); - clk_unprepare(iommu->pclk); - return PTR_ERR(iommu->clk); - } - - ret = clk_prepare(iommu->clk); - if (ret) { - dev_err(iommu->dev, "could not prepare iommu_clk\n"); - clk_unprepare(iommu->pclk); - return ret; - } - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - iommu->base = devm_ioremap_resource(iommu->dev, r); - if (IS_ERR(iommu->base)) { - dev_err(iommu->dev, "could not get iommu base\n"); - ret = PTR_ERR(iommu->base); - goto fail; - } - - iommu->irq = platform_get_irq(pdev, 0); - if (iommu->irq < 0) { - dev_err(iommu->dev, "could not get iommu irq\n"); - ret = -ENODEV; - goto fail; - } - - ret = of_property_read_u32(iommu->dev->of_node, "ncb", &val); - if (ret) { - dev_err(iommu->dev, "could not get ncb\n"); - goto fail; - } - iommu->ncb = val; - - msm_iommu_reset(iommu->base, iommu->ncb); - SET_M(iommu->base, 0, 1); - SET_PAR(iommu->base, 0, 0); - SET_V2PCFG(iommu->base, 0, 1); - SET_V2PPR(iommu->base, 0, 0); - par = GET_PAR(iommu->base, 0); - SET_V2PCFG(iommu->base, 0, 0); - SET_M(iommu->base, 0, 0); - - if (!par) { - pr_err("Invalid PAR value detected\n"); - ret = -ENODEV; - goto fail; - } - - ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL, - msm_iommu_fault_handler, - IRQF_ONESHOT | IRQF_SHARED, - "msm_iommu_secure_irpt_handler", - iommu); - if (ret) { - pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret); - goto fail; - } - - list_add(&iommu->dev_node, &qcom_iommu_devices); - - pr_info("device mapped at %p, irq %d with %d ctx banks\n", - iommu->base, iommu->irq, iommu->ncb); -fail: - clk_unprepare(iommu->clk); - clk_unprepare(iommu->pclk); - return ret; -} - -static const struct of_device_id msm_iommu_dt_match[] = { - { .compatible = "qcom,apq8064-iommu" }, - {} -}; - -static int msm_iommu_remove(struct platform_device *pdev) -{ - struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); - - clk_unprepare(iommu->clk); - clk_unprepare(iommu->pclk); - return 0; -} - -static struct platform_driver msm_iommu_driver = { - .driver = { - .name = "msm_iommu", - .of_match_table = msm_iommu_dt_match, - }, - .probe = msm_iommu_probe, - .remove = msm_iommu_remove, -}; - -static struct platform_driver * const drivers[] = { - &msm_iommu_driver, - &msm_iommu_ctx_driver, -}; - -static int __init msm_iommu_driver_init(void) -{ - return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); -} - -static void __exit msm_iommu_driver_exit(void) -{ - platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); -} - -subsys_initcall(msm_iommu_driver_init); -module_exit(msm_iommu_driver_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Stepan Moskovchenko "); -- cgit v0.10.2 From f78ebca8ff3d61fb45fef1274595a72d1314d955 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 13 Jun 2016 17:06:05 +0530 Subject: iommu/msm: Add support for generic master bindings This adds the xlate callback which gets invoked during device registration from DT. The master devices gets added through this. Signed-off-by: Sricharan R Tested-by: Archit Taneja Tested-by: Srinivas Kandagatla Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 792b352..8ab0643 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -702,6 +703,54 @@ static void print_ctx_regs(void __iomem *base, int ctx) GET_PRRR(base, ctx), GET_NMRR(base, ctx)); } +static void insert_iommu_master(struct device *dev, + struct msm_iommu_dev **iommu, + struct of_phandle_args *spec) +{ + struct msm_iommu_ctx_dev *master = dev->archdata.iommu; + int sid; + + if (list_empty(&(*iommu)->ctx_list)) { + master = kzalloc(sizeof(*master), GFP_ATOMIC); + master->of_node = dev->of_node; + list_add(&master->list, &(*iommu)->ctx_list); + dev->archdata.iommu = master; + } + + for (sid = 0; sid < master->num_mids; sid++) + if (master->mids[sid] == spec->args[0]) { + dev_warn(dev, "Stream ID 0x%hx repeated; ignoring\n", + sid); + return; + } + + master->mids[master->num_mids++] = spec->args[0]; +} + +static int qcom_iommu_of_xlate(struct device *dev, + struct of_phandle_args *spec) +{ + struct msm_iommu_dev *iommu; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&msm_iommu_lock, flags); + list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) + if (iommu->dev->of_node == spec->np) + break; + + if (!iommu || iommu->dev->of_node != spec->np) { + ret = -ENODEV; + goto fail; + } + + insert_iommu_master(dev, &iommu, spec); +fail: + spin_unlock_irqrestore(&msm_iommu_lock, flags); + + return ret; +} + irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id) { struct msm_iommu_dev *iommu = dev_id; @@ -737,7 +786,7 @@ fail: return 0; } -static const struct iommu_ops msm_iommu_ops = { +static struct iommu_ops msm_iommu_ops = { .capable = msm_iommu_capable, .domain_alloc = msm_iommu_domain_alloc, .domain_free = msm_iommu_domain_free, @@ -748,6 +797,7 @@ static const struct iommu_ops msm_iommu_ops = { .map_sg = default_iommu_map_sg, .iova_to_phys = msm_iommu_iova_to_phys, .pgsize_bitmap = MSM_IOMMU_PGSIZES, + .of_xlate = qcom_iommu_of_xlate, }; static int msm_iommu_probe(struct platform_device *pdev) @@ -837,6 +887,7 @@ static int msm_iommu_probe(struct platform_device *pdev) } list_add(&iommu->dev_node, &qcom_iommu_devices); + of_iommu_set_ops(pdev->dev.of_node, &msm_iommu_ops); pr_info("device mapped at %p, irq %d with %d ctx banks\n", iommu->base, iommu->irq, iommu->ncb); @@ -935,7 +986,13 @@ static int __init msm_iommu_init(void) return 0; } -subsys_initcall(msm_iommu_init); +static int __init msm_iommu_of_setup(struct device_node *np) +{ + msm_iommu_init(); + return 0; +} + +IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", msm_iommu_of_setup); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Stepan Moskovchenko "); -- cgit v0.10.2 From c9220fbd7741861294dede37465243ee7efdb7bd Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 13 Jun 2016 17:06:06 +0530 Subject: iommu/msm: use generic ARMV7S short descriptor pagetable ops This iommu uses the armv7 short descriptor format. So use the generic ARMV7S pagetable ops instead of rewriting the same stuff in the driver. Signed-off-by: Sricharan R Tested-by: Archit Taneja Tested-by: Srinivas Kandagatla Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ad08603..b60e72b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -91,6 +91,7 @@ config MSM_IOMMU depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST depends on BROKEN select IOMMU_API + select IOMMU_IO_PGTABLE_ARMV7S help Support for the IOMMUs found on certain Qualcomm SOCs. These IOMMUs allow virtualization of the address space used by most diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 8ab0643..b09692b 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -35,27 +35,27 @@ #include "msm_iommu_hw-8xxx.h" #include "msm_iommu.h" +#include "io-pgtable.h" #define MRC(reg, processor, op1, crn, crm, op2) \ __asm__ __volatile__ ( \ " mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \ : "=r" (reg)) -#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0) -#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1) - /* bitmap of the page sizes currently supported */ #define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) -static int msm_iommu_tex_class[4]; - DEFINE_SPINLOCK(msm_iommu_lock); static LIST_HEAD(qcom_iommu_devices); +static struct iommu_ops msm_iommu_ops; struct msm_priv { - unsigned long *pgtable; struct list_head list_attached; struct iommu_domain domain; + struct io_pgtable_cfg cfg; + struct io_pgtable_ops *iop; + struct device *dev; + spinlock_t pgtlock; /* pagetable lock */ }; static struct msm_priv *to_msm_priv(struct iommu_domain *dom) @@ -122,49 +122,79 @@ static void msm_iommu_reset(void __iomem *base, int ncb) SET_TLBFLPTER(base, ctx, 0); SET_TLBSLPTER(base, ctx, 0); SET_TLBLKCR(base, ctx, 0); - SET_PRRR(base, ctx, 0); - SET_NMRR(base, ctx, 0); SET_CONTEXTIDR(base, ctx, 0); } } -static int __flush_iotlb(struct iommu_domain *domain) +static void __flush_iotlb(void *cookie) { - struct msm_priv *priv = to_msm_priv(domain); + struct msm_priv *priv = cookie; struct msm_iommu_dev *iommu = NULL; struct msm_iommu_ctx_dev *master; int ret = 0; -#ifndef CONFIG_IOMMU_PGTABLES_L2 - unsigned long *fl_table = priv->pgtable; - int i; + list_for_each_entry(iommu, &priv->list_attached, dom_node) { + ret = __enable_clocks(iommu); + if (ret) + goto fail; - if (!list_empty(&priv->list_attached)) { - dmac_flush_range(fl_table, fl_table + SZ_16K); + list_for_each_entry(master, &iommu->ctx_list, list) + SET_CTX_TLBIALL(iommu->base, master->num, 0); - for (i = 0; i < NUM_FL_PTE; i++) - if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) { - void *sl_table = __va(fl_table[i] & - FL_BASE_MASK); - dmac_flush_range(sl_table, sl_table + SZ_4K); - } + __disable_clocks(iommu); } -#endif +fail: + return; +} + +static void __flush_iotlb_range(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct msm_priv *priv = cookie; + struct msm_iommu_dev *iommu = NULL; + struct msm_iommu_ctx_dev *master; + int ret = 0; + int temp_size; list_for_each_entry(iommu, &priv->list_attached, dom_node) { ret = __enable_clocks(iommu); if (ret) goto fail; - list_for_each_entry(master, &iommu->ctx_list, list) - SET_CTX_TLBIALL(iommu->base, master->num, 0); + list_for_each_entry(master, &iommu->ctx_list, list) { + temp_size = size; + do { + iova &= TLBIVA_VA; + iova |= GET_CONTEXTIDR_ASID(iommu->base, + master->num); + SET_TLBIVA(iommu->base, master->num, iova); + iova += granule; + } while (temp_size -= granule); + } __disable_clocks(iommu); } + fail: - return ret; + return; } +static void __flush_iotlb_sync(void *cookie) +{ + /* + * Nothing is needed here, the barrier to guarantee + * completion of the tlb sync operation is implicitly + * taken care when the iommu client does a writel before + * kick starting the other master. + */ +} + +static const struct iommu_gather_ops msm_iommu_gather_ops = { + .tlb_flush_all = __flush_iotlb, + .tlb_add_flush = __flush_iotlb_range, + .tlb_sync = __flush_iotlb_sync, +}; + static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end) { int idx; @@ -232,15 +262,17 @@ static void __reset_context(void __iomem *base, int ctx) SET_TLBFLPTER(base, ctx, 0); SET_TLBSLPTER(base, ctx, 0); SET_TLBLKCR(base, ctx, 0); - SET_PRRR(base, ctx, 0); - SET_NMRR(base, ctx, 0); } -static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable) +static void __program_context(void __iomem *base, int ctx, + struct msm_priv *priv) { - unsigned int prrr, nmrr; __reset_context(base, ctx); + /* Turn on TEX Remap */ + SET_TRE(base, ctx, 1); + SET_AFE(base, ctx, 1); + /* Set up HTW mode */ /* TLB miss configuration: perform HTW on miss */ SET_TLBMCFG(base, ctx, 0x3); @@ -248,8 +280,13 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable) /* V2P configuration: HTW for access */ SET_V2PCFG(base, ctx, 0x3); - SET_TTBCR(base, ctx, 0); - SET_TTBR0_PA(base, ctx, (pgtable >> 14)); + SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr); + SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]); + SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]); + + /* Set prrr and nmrr */ + SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr); + SET_NMRR(base, ctx, priv->cfg.arm_v7s_cfg.nmrr); /* Invalidate the TLB for this context */ SET_CTX_TLBIALL(base, ctx, 0); @@ -268,38 +305,9 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable) SET_RCOSH(base, ctx, 1); SET_RCNSH(base, ctx, 1); - /* Turn on TEX Remap */ - SET_TRE(base, ctx, 1); - - /* Set TEX remap attributes */ - RCP15_PRRR(prrr); - RCP15_NMRR(nmrr); - SET_PRRR(base, ctx, prrr); - SET_NMRR(base, ctx, nmrr); - /* Turn on BFB prefetch */ SET_BFBDFE(base, ctx, 1); -#ifdef CONFIG_IOMMU_PGTABLES_L2 - /* Configure page tables as inner-cacheable and shareable to reduce - * the TLB miss penalty. - */ - SET_TTBR0_SH(base, ctx, 1); - SET_TTBR1_SH(base, ctx, 1); - - SET_TTBR0_NOS(base, ctx, 1); - SET_TTBR1_NOS(base, ctx, 1); - - SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */ - SET_TTBR0_IRGNL(base, ctx, 1); - - SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */ - SET_TTBR1_IRGNL(base, ctx, 1); - - SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */ - SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */ -#endif - /* Enable the MMU */ SET_M(base, ctx, 1); } @@ -316,13 +324,6 @@ static struct iommu_domain *msm_iommu_domain_alloc(unsigned type) goto fail_nomem; INIT_LIST_HEAD(&priv->list_attached); - priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL, - get_order(SZ_16K)); - - if (!priv->pgtable) - goto fail_nomem; - - memset(priv->pgtable, 0, SZ_16K); priv->domain.geometry.aperture_start = 0; priv->domain.geometry.aperture_end = (1ULL << 32) - 1; @@ -339,24 +340,35 @@ static void msm_iommu_domain_free(struct iommu_domain *domain) { struct msm_priv *priv; unsigned long flags; - unsigned long *fl_table; - int i; spin_lock_irqsave(&msm_iommu_lock, flags); priv = to_msm_priv(domain); + kfree(priv); + spin_unlock_irqrestore(&msm_iommu_lock, flags); +} - fl_table = priv->pgtable; +static int msm_iommu_domain_config(struct msm_priv *priv) +{ + spin_lock_init(&priv->pgtlock); - for (i = 0; i < NUM_FL_PTE; i++) - if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) - free_page((unsigned long) __va(((fl_table[i]) & - FL_BASE_MASK))); + priv->cfg = (struct io_pgtable_cfg) { + .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP, + .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, + .ias = 32, + .oas = 32, + .tlb = &msm_iommu_gather_ops, + .iommu_dev = priv->dev, + }; - free_pages((unsigned long)priv->pgtable, get_order(SZ_16K)); - priv->pgtable = NULL; + priv->iop = alloc_io_pgtable_ops(ARM_V7S, &priv->cfg, priv); + if (!priv->iop) { + dev_err(priv->dev, "Failed to allocate pgtable\n"); + return -EINVAL; + } - kfree(priv); - spin_unlock_irqrestore(&msm_iommu_lock, flags); + msm_iommu_ops.pgsize_bitmap = priv->cfg.pgsize_bitmap; + + return 0; } static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -367,6 +379,9 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) struct msm_priv *priv = to_msm_priv(domain); struct msm_iommu_ctx_dev *master; + priv->dev = dev; + msm_iommu_domain_config(priv); + spin_lock_irqsave(&msm_iommu_lock, flags); list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) { master = list_first_entry(&iommu->ctx_list, @@ -392,14 +407,13 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) } config_mids(iommu, master); __program_context(iommu->base, master->num, - __pa(priv->pgtable)); + priv); } __disable_clocks(iommu); list_add(&iommu->dom_node, &priv->list_attached); } } - ret = __flush_iotlb(domain); fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); @@ -415,11 +429,9 @@ static void msm_iommu_detach_dev(struct iommu_domain *domain, struct msm_iommu_ctx_dev *master; int ret; - spin_lock_irqsave(&msm_iommu_lock, flags); - ret = __flush_iotlb(domain); - if (ret) - goto fail; + free_io_pgtable_ops(priv->iop); + spin_lock_irqsave(&msm_iommu_lock, flags); list_for_each_entry(iommu, &priv->list_attached, dom_node) { ret = __enable_clocks(iommu); if (ret) @@ -435,190 +447,30 @@ fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); } -static int msm_iommu_map(struct iommu_domain *domain, unsigned long va, +static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t pa, size_t len, int prot) { - struct msm_priv *priv; + struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; - unsigned long *fl_table; - unsigned long *fl_pte; - unsigned long fl_offset; - unsigned long *sl_table; - unsigned long *sl_pte; - unsigned long sl_offset; - unsigned int pgprot; - int ret = 0, tex, sh; - - spin_lock_irqsave(&msm_iommu_lock, flags); - - sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0; - tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK]; - - if (tex < 0 || tex > NUM_TEX_CLASS - 1) { - ret = -EINVAL; - goto fail; - } - - priv = to_msm_priv(domain); - - fl_table = priv->pgtable; - - if (len != SZ_16M && len != SZ_1M && - len != SZ_64K && len != SZ_4K) { - pr_debug("Bad size: %d\n", len); - ret = -EINVAL; - goto fail; - } - - if (!fl_table) { - pr_debug("Null page table\n"); - ret = -EINVAL; - goto fail; - } - - if (len == SZ_16M || len == SZ_1M) { - pgprot = sh ? FL_SHARED : 0; - pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0; - pgprot |= tex & 0x02 ? FL_CACHEABLE : 0; - pgprot |= tex & 0x04 ? FL_TEX0 : 0; - } else { - pgprot = sh ? SL_SHARED : 0; - pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0; - pgprot |= tex & 0x02 ? SL_CACHEABLE : 0; - pgprot |= tex & 0x04 ? SL_TEX0 : 0; - } - - fl_offset = FL_OFFSET(va); /* Upper 12 bits */ - fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */ - - if (len == SZ_16M) { - int i = 0; - for (i = 0; i < 16; i++) - *(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION | - FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT | - FL_SHARED | FL_NG | pgprot; - } - - if (len == SZ_1M) - *fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG | - FL_TYPE_SECT | FL_SHARED | pgprot; - - /* Need a 2nd level table */ - if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) { - unsigned long *sl; - sl = (unsigned long *) __get_free_pages(GFP_ATOMIC, - get_order(SZ_4K)); - - if (!sl) { - pr_debug("Could not allocate second level table\n"); - ret = -ENOMEM; - goto fail; - } - - memset(sl, 0, SZ_4K); - *fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE); - } - - sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK)); - sl_offset = SL_OFFSET(va); - sl_pte = sl_table + sl_offset; - - - if (len == SZ_4K) - *sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG | - SL_SHARED | SL_TYPE_SMALL | pgprot; - - if (len == SZ_64K) { - int i; + int ret; - for (i = 0; i < 16; i++) - *(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 | - SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot; - } + spin_lock_irqsave(&priv->pgtlock, flags); + ret = priv->iop->map(priv->iop, iova, pa, len, prot); + spin_unlock_irqrestore(&priv->pgtlock, flags); - ret = __flush_iotlb(domain); -fail: - spin_unlock_irqrestore(&msm_iommu_lock, flags); return ret; } -static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, - size_t len) +static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t len) { - struct msm_priv *priv; + struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; - unsigned long *fl_table; - unsigned long *fl_pte; - unsigned long fl_offset; - unsigned long *sl_table; - unsigned long *sl_pte; - unsigned long sl_offset; - int i, ret = 0; - - spin_lock_irqsave(&msm_iommu_lock, flags); - - priv = to_msm_priv(domain); - fl_table = priv->pgtable; + spin_lock_irqsave(&priv->pgtlock, flags); + len = priv->iop->unmap(priv->iop, iova, len); + spin_unlock_irqrestore(&priv->pgtlock, flags); - if (len != SZ_16M && len != SZ_1M && - len != SZ_64K && len != SZ_4K) { - pr_debug("Bad length: %d\n", len); - goto fail; - } - - if (!fl_table) { - pr_debug("Null page table\n"); - goto fail; - } - - fl_offset = FL_OFFSET(va); /* Upper 12 bits */ - fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */ - - if (*fl_pte == 0) { - pr_debug("First level PTE is 0\n"); - goto fail; - } - - /* Unmap supersection */ - if (len == SZ_16M) - for (i = 0; i < 16; i++) - *(fl_pte+i) = 0; - - if (len == SZ_1M) - *fl_pte = 0; - - sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK)); - sl_offset = SL_OFFSET(va); - sl_pte = sl_table + sl_offset; - - if (len == SZ_64K) { - for (i = 0; i < 16; i++) - *(sl_pte+i) = 0; - } - - if (len == SZ_4K) - *sl_pte = 0; - - if (len == SZ_4K || len == SZ_64K) { - int used = 0; - - for (i = 0; i < NUM_SL_PTE; i++) - if (sl_table[i]) - used = 1; - if (!used) { - free_page((unsigned long)sl_table); - *fl_pte = 0; - } - } - - ret = __flush_iotlb(domain); - -fail: - spin_unlock_irqrestore(&msm_iommu_lock, flags); - - /* the IOMMU API requires us to return how many bytes were unmapped */ - len = ret ? 0 : len; return len; } @@ -699,8 +551,6 @@ static void print_ctx_regs(void __iomem *base, int ctx) GET_TTBR0(base, ctx), GET_TTBR1(base, ctx)); pr_err("SCTLR = %08x ACTLR = %08x\n", GET_SCTLR(base, ctx), GET_ACTLR(base, ctx)); - pr_err("PRRR = %08x NMRR = %08x\n", - GET_PRRR(base, ctx), GET_NMRR(base, ctx)); } static void insert_iommu_master(struct device *dev, @@ -941,47 +791,8 @@ static void __exit msm_iommu_driver_exit(void) subsys_initcall(msm_iommu_driver_init); module_exit(msm_iommu_driver_exit); -static int __init get_tex_class(int icp, int ocp, int mt, int nos) -{ - int i = 0; - unsigned int prrr = 0; - unsigned int nmrr = 0; - int c_icp, c_ocp, c_mt, c_nos; - - RCP15_PRRR(prrr); - RCP15_NMRR(nmrr); - - for (i = 0; i < NUM_TEX_CLASS; i++) { - c_nos = PRRR_NOS(prrr, i); - c_mt = PRRR_MT(prrr, i); - c_icp = NMRR_ICP(nmrr, i); - c_ocp = NMRR_OCP(nmrr, i); - - if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos) - return i; - } - - return -ENODEV; -} - -static void __init setup_iommu_tex_classes(void) -{ - msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] = - get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1); - - msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] = - get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1); - - msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] = - get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1); - - msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] = - get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1); -} - static int __init msm_iommu_init(void) { - setup_iommu_tex_classes(); bus_set_iommu(&platform_bus_type, &msm_iommu_ops); return 0; } -- cgit v0.10.2 From 1cb13f78329ca2f1f7ef4e6024901cc26d611ee1 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 13 Jun 2016 17:06:07 +0530 Subject: iommu/msm: Remove driver BROKEN Now that the driver is DT adapted, bus_set_iommu gets called only when on compatible matching. So the driver should not break multiplatform builds now. So remove the BROKEN config. Signed-off-by: Sricharan R Tested-by: Archit Taneja Tested-by: Srinivas Kandagatla Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b60e72b..f5f4d14 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -89,7 +89,6 @@ config MSM_IOMMU bool "MSM IOMMU Support" depends on ARM depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST - depends on BROKEN select IOMMU_API select IOMMU_IO_PGTABLE_ARMV7S help -- cgit v0.10.2 From fec3b217ff19978650121103d849b5889b04780a Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Fri, 24 Jun 2016 10:13:26 +0800 Subject: iommu/rockchip: Fix devm_{request,free}_irq parameter Even though the IOMMU shares IRQ with its master, the struct device passed to {request,free}_irq is supposed to represent the device that is signalling the interrupt. This patch makes the driver use IOMMU device instead of master's device to make things clear. Signed-off-by: Simon Xue Signed-off-by: Shunqian Zheng Reviewed-by: Douglas Anderson Signed-off-by: Tomasz Figa Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 25b4627..5a9659a 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -807,7 +807,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, iommu->domain = domain; - ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq, + ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq, IRQF_SHARED, dev_name(dev), iommu); if (ret) return ret; @@ -860,7 +860,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain, } rk_iommu_disable_stall(iommu); - devm_free_irq(dev, iommu->irq, iommu); + devm_free_irq(iommu->dev, iommu->irq, iommu); iommu->domain = NULL; -- cgit v0.10.2 From e6d0f4737c468d1889aba06801c490988cf66ad7 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Fri, 24 Jun 2016 10:13:27 +0800 Subject: iommu/rockchip: Add map_sg callback for rk_iommu_ops The iommu_dma_alloc() in iommu/dma-iommu.c calls iommu_map_sg() that requires the callback iommu_ops .map_sg(). Adding the default_iommu_map_sg() to Rockchip IOMMU accordingly. Signed-off-by: Simon Xue Signed-off-by: Shunqian Zheng Reviewed-by: Douglas Anderson Signed-off-by: Tomasz Figa Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 5a9659a..53fa0d9 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1022,6 +1022,7 @@ static const struct iommu_ops rk_iommu_ops = { .detach_dev = rk_iommu_detach_device, .map = rk_iommu_map, .unmap = rk_iommu_unmap, + .map_sg = default_iommu_map_sg, .add_device = rk_iommu_add_device, .remove_device = rk_iommu_remove_device, .iova_to_phys = rk_iommu_iova_to_phys, -- cgit v0.10.2 From 3d08f434bd58656ae630376d0b5afd6ca1ffb013 Mon Sep 17 00:00:00 2001 From: Shunqian Zheng Date: Fri, 24 Jun 2016 10:13:28 +0800 Subject: iommu/rockchip: Fix allocation of bases array in driver probe In .probe(), devm_kzalloc() is called with size == 0 and works only by luck, due to internal behavior of the allocator and the fact that the proper allocation size is small. Let's use proper value for calculating the size. Fixes: cd6438c5f844 ("iommu/rockchip: Reconstruct to support multi slaves") Signed-off-by: Shunqian Zheng Signed-off-by: Tomasz Figa Reviewed-by: Douglas Anderson Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 53fa0d9..8a5bac7 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1034,6 +1034,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rk_iommu *iommu; struct resource *res; + int num_res = pdev->num_resources; int i; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); @@ -1043,12 +1044,13 @@ static int rk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; - iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * iommu->num_mmu, + + iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * num_res, GFP_KERNEL); if (!iommu->bases) return -ENOMEM; - for (i = 0; i < pdev->num_resources; i++) { + for (i = 0; i < num_res; i++) { res = platform_get_resource(pdev, IORESOURCE_MEM, i); if (!res) continue; -- cgit v0.10.2 From 4f0aba676735c653b4e739b760c1e66cd520d3e3 Mon Sep 17 00:00:00 2001 From: Shunqian Zheng Date: Fri, 24 Jun 2016 10:13:29 +0800 Subject: iommu/rockchip: Use DMA API to manage coherency Use DMA API instead of architecture internal functions like __cpuc_flush_dcache_area() etc. The biggest difficulty here is that dma_map and _sync calls require some struct device, while there is no real 1:1 relation between an IOMMU domain and some device. To overcome this, a simple platform device is registered for each allocated IOMMU domain. With this patch, this driver can be used on both ARM and ARM64 platforms, such as RK3288 and RK3399 respectively. Signed-off-by: Shunqian Zheng Signed-off-by: Tomasz Figa Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 8a5bac7..712ed75 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -4,11 +4,10 @@ * published by the Free Software Foundation. */ -#include -#include #include #include #include +#include #include #include #include @@ -77,7 +76,9 @@ struct rk_iommu_domain { struct list_head iommus; + struct platform_device *pdev; u32 *dt; /* page directory table */ + dma_addr_t dt_dma; spinlock_t iommus_lock; /* lock for iommus list */ spinlock_t dt_lock; /* lock for modifying page directory table */ @@ -93,14 +94,12 @@ struct rk_iommu { struct iommu_domain *domain; /* domain to which iommu is attached */ }; -static inline void rk_table_flush(u32 *va, unsigned int count) +static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, + unsigned int count) { - phys_addr_t pa_start = virt_to_phys(va); - phys_addr_t pa_end = virt_to_phys(va + count); - size_t size = pa_end - pa_start; + size_t size = count * sizeof(u32); /* count of u32 entry */ - __cpuc_flush_dcache_area(va, size); - outer_flush_range(pa_start, pa_end); + dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE); } static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) @@ -183,10 +182,9 @@ static inline bool rk_dte_is_pt_valid(u32 dte) return dte & RK_DTE_PT_VALID; } -static u32 rk_mk_dte(u32 *pt) +static inline u32 rk_mk_dte(dma_addr_t pt_dma) { - phys_addr_t pt_phys = virt_to_phys(pt); - return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; + return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; } /* @@ -603,13 +601,16 @@ static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain, static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, dma_addr_t iova) { + struct device *dev = &rk_domain->pdev->dev; u32 *page_table, *dte_addr; - u32 dte; + u32 dte_index, dte; phys_addr_t pt_phys; + dma_addr_t pt_dma; assert_spin_locked(&rk_domain->dt_lock); - dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)]; + dte_index = rk_iova_dte_index(iova); + dte_addr = &rk_domain->dt[dte_index]; dte = *dte_addr; if (rk_dte_is_pt_valid(dte)) goto done; @@ -618,19 +619,27 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, if (!page_table) return ERR_PTR(-ENOMEM); - dte = rk_mk_dte(page_table); - *dte_addr = dte; + pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pt_dma)) { + dev_err(dev, "DMA mapping error while allocating page table\n"); + free_page((unsigned long)page_table); + return ERR_PTR(-ENOMEM); + } - rk_table_flush(page_table, NUM_PT_ENTRIES); - rk_table_flush(dte_addr, 1); + dte = rk_mk_dte(pt_dma); + *dte_addr = dte; + rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES); + rk_table_flush(rk_domain, + rk_domain->dt_dma + dte_index * sizeof(u32), 1); done: pt_phys = rk_dte_pt_address(dte); return (u32 *)phys_to_virt(pt_phys); } static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain, - u32 *pte_addr, dma_addr_t iova, size_t size) + u32 *pte_addr, dma_addr_t pte_dma, + size_t size) { unsigned int pte_count; unsigned int pte_total = size / SPAGE_SIZE; @@ -645,14 +654,14 @@ static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain, pte_addr[pte_count] = rk_mk_pte_invalid(pte); } - rk_table_flush(pte_addr, pte_count); + rk_table_flush(rk_domain, pte_dma, pte_count); return pte_count * SPAGE_SIZE; } static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, - dma_addr_t iova, phys_addr_t paddr, size_t size, - int prot) + dma_addr_t pte_dma, dma_addr_t iova, + phys_addr_t paddr, size_t size, int prot) { unsigned int pte_count; unsigned int pte_total = size / SPAGE_SIZE; @@ -671,7 +680,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, paddr += SPAGE_SIZE; } - rk_table_flush(pte_addr, pte_count); + rk_table_flush(rk_domain, pte_dma, pte_total); /* * Zap the first and last iova to evict from iotlb any previously @@ -684,7 +693,8 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, return 0; unwind: /* Unmap the range of iovas that we just mapped */ - rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE); + rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, + pte_count * SPAGE_SIZE); iova += pte_count * SPAGE_SIZE; page_phys = rk_pte_page_address(pte_addr[pte_count]); @@ -699,8 +709,9 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); unsigned long flags; - dma_addr_t iova = (dma_addr_t)_iova; + dma_addr_t pte_dma, iova = (dma_addr_t)_iova; u32 *page_table, *pte_addr; + u32 dte_index, pte_index; int ret; spin_lock_irqsave(&rk_domain->dt_lock, flags); @@ -718,8 +729,13 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, return PTR_ERR(page_table); } - pte_addr = &page_table[rk_iova_pte_index(iova)]; - ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot); + dte_index = rk_domain->dt[rk_iova_dte_index(iova)]; + pte_index = rk_iova_pte_index(iova); + pte_addr = &page_table[pte_index]; + pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32); + ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova, + paddr, size, prot); + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); return ret; @@ -730,7 +746,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); unsigned long flags; - dma_addr_t iova = (dma_addr_t)_iova; + dma_addr_t pte_dma, iova = (dma_addr_t)_iova; phys_addr_t pt_phys; u32 dte; u32 *pte_addr; @@ -754,7 +770,8 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, pt_phys = rk_dte_pt_address(dte); pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); - unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size); + pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32); + unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size); spin_unlock_irqrestore(&rk_domain->dt_lock, flags); @@ -787,7 +804,6 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, struct rk_iommu_domain *rk_domain = to_rk_domain(domain); unsigned long flags; int ret, i; - phys_addr_t dte_addr; /* * Allow 'virtual devices' (e.g., drm) to attach to domain. @@ -812,9 +828,9 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, if (ret) return ret; - dte_addr = virt_to_phys(rk_domain->dt); for (i = 0; i < iommu->num_mmu; i++) { - rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr); + rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, + rk_domain->dt_dma); rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } @@ -870,14 +886,30 @@ static void rk_iommu_detach_device(struct iommu_domain *domain, static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) { struct rk_iommu_domain *rk_domain; + struct platform_device *pdev; + struct device *iommu_dev; if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; - rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); - if (!rk_domain) + /* Register a pdev per domain, so DMA API can base on this *dev + * even some virtual master doesn't have an iommu slave + */ + pdev = platform_device_register_simple("rk_iommu_domain", + PLATFORM_DEVID_AUTO, NULL, 0); + if (IS_ERR(pdev)) return NULL; + rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL); + if (!rk_domain) + goto err_unreg_pdev; + + rk_domain->pdev = pdev; + + /* To init the iovad which is required by iommu_dma_init_domain() */ + if (iommu_get_dma_cookie(&rk_domain->domain)) + goto err_unreg_pdev; + /* * rk32xx iommus use a 2 level pagetable. * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. @@ -885,9 +917,17 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) */ rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); if (!rk_domain->dt) - goto err_dt; + goto err_put_cookie; + + iommu_dev = &pdev->dev; + rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt, + SPAGE_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) { + dev_err(iommu_dev, "DMA map error for DT\n"); + goto err_free_dt; + } - rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES); + rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES); spin_lock_init(&rk_domain->iommus_lock); spin_lock_init(&rk_domain->dt_lock); @@ -895,8 +935,13 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) return &rk_domain->domain; -err_dt: - kfree(rk_domain); +err_free_dt: + free_page((unsigned long)rk_domain->dt); +err_put_cookie: + iommu_put_dma_cookie(&rk_domain->domain); +err_unreg_pdev: + platform_device_unregister(pdev); + return NULL; } @@ -912,12 +957,19 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) if (rk_dte_is_pt_valid(dte)) { phys_addr_t pt_phys = rk_dte_pt_address(dte); u32 *page_table = phys_to_virt(pt_phys); + dma_unmap_single(&rk_domain->pdev->dev, pt_phys, + SPAGE_SIZE, DMA_TO_DEVICE); free_page((unsigned long)page_table); } } + dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma, + SPAGE_SIZE, DMA_TO_DEVICE); free_page((unsigned long)rk_domain->dt); - kfree(rk_domain); + + iommu_put_dma_cookie(&rk_domain->domain); + + platform_device_unregister(rk_domain->pdev); } static bool rk_iommu_is_dev_iommu_master(struct device *dev) @@ -1029,6 +1081,30 @@ static const struct iommu_ops rk_iommu_ops = { .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, }; +static int rk_iommu_domain_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + + /* Set dma_ops for dev, otherwise it would be dummy_dma_ops */ + arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false); + + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32)); + + return 0; +} + +static struct platform_driver rk_iommu_domain_driver = { + .probe = rk_iommu_domain_probe, + .driver = { + .name = "rk_iommu_domain", + }, +}; + static int rk_iommu_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1106,11 +1182,19 @@ static int __init rk_iommu_init(void) if (ret) return ret; - return platform_driver_register(&rk_iommu_driver); + ret = platform_driver_register(&rk_iommu_domain_driver); + if (ret) + return ret; + + ret = platform_driver_register(&rk_iommu_driver); + if (ret) + platform_driver_unregister(&rk_iommu_domain_driver); + return ret; } static void __exit rk_iommu_exit(void) { platform_driver_unregister(&rk_iommu_driver); + platform_driver_unregister(&rk_iommu_domain_driver); } subsys_initcall(rk_iommu_init); -- cgit v0.10.2 From a93db2f22b6b48369acb72f66a0ae47ec17a0b05 Mon Sep 17 00:00:00 2001 From: Shunqian Zheng Date: Fri, 24 Jun 2016 10:13:30 +0800 Subject: iommu/rockchip: Prepare to support generic DMA mapping Set geometry for allocated domains and fix .domain_alloc() callback to work with IOMMU_DOMAIN_DMA domain type, which is used for implicit domains on ARM64. Signed-off-by: Shunqian Zheng Signed-off-by: Tomasz Figa Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 712ed75..9afcbf7 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -889,7 +889,7 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) struct platform_device *pdev; struct device *iommu_dev; - if (type != IOMMU_DOMAIN_UNMANAGED) + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) return NULL; /* Register a pdev per domain, so DMA API can base on this *dev @@ -906,8 +906,8 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) rk_domain->pdev = pdev; - /* To init the iovad which is required by iommu_dma_init_domain() */ - if (iommu_get_dma_cookie(&rk_domain->domain)) + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&rk_domain->domain)) goto err_unreg_pdev; /* @@ -933,12 +933,17 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) spin_lock_init(&rk_domain->dt_lock); INIT_LIST_HEAD(&rk_domain->iommus); + rk_domain->domain.geometry.aperture_start = 0; + rk_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); + rk_domain->domain.geometry.force_aperture = true; + return &rk_domain->domain; err_free_dt: free_page((unsigned long)rk_domain->dt); err_put_cookie: - iommu_put_dma_cookie(&rk_domain->domain); + if (type == IOMMU_DOMAIN_DMA) + iommu_put_dma_cookie(&rk_domain->domain); err_unreg_pdev: platform_device_unregister(pdev); @@ -967,7 +972,8 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) SPAGE_SIZE, DMA_TO_DEVICE); free_page((unsigned long)rk_domain->dt); - iommu_put_dma_cookie(&rk_domain->domain); + if (domain->type == IOMMU_DOMAIN_DMA) + iommu_put_dma_cookie(&rk_domain->domain); platform_device_unregister(rk_domain->pdev); } -- cgit v0.10.2 From 560829b4f6c34adae82082fe86d21e7c6cdc4eaf Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 13:38:10 +0200 Subject: iommu/arm-smmu: Fix typo in devicetree binding introductory text This may well be the world's most inconsequential patch, but there is a spelling mistake that needs fixing and Andrea was bored enough to write the patch (along with 1528 others...). Signed-off-by: Andrea Gelmini Signed-off-by: Will Deacon diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index 947863a..7b94c88 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -1,6 +1,6 @@ * ARM SMMUv3 Architecture Implementation -The SMMUv3 architecture is a significant deparature from previous +The SMMUv3 architecture is a significant departure from previous revisions, replacing the MMIO register interface with in-memory command and event queues and adding support for the ATS and PRI components of the PCIe specification. -- cgit v0.10.2 From 112c898b59dd5cfd95ee30dfe7cc4fc11a6d484e Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Mon, 13 Jun 2016 17:20:17 +0800 Subject: iommu/arm-smmu: request pcie devices to enable ACS The PCIe ACS capability will affect the layout of iommu groups. Generally speaking, if the path from root port to the PCIe device is ACS enabled, the iommu will create a single iommu group for this PCIe device. If all PCIe devices on the path are ACS enabled then Linux can determine this path is ACS enabled. Linux use two PCIe configuration registers to determine the ACS status of PCIe devices: ACS Capability Register and ACS Control Register. The first register is used to check the implementation of ACS function of a PCIe device, the second register is used to check the enable status of ACS function. If one PCIe device has implemented and enabled the ACS function then Linux will determine this PCIe device enabled ACS. From the Chapter:6.12 of PCI Express Base Specification Revision 3.1a, we can find that when a PCIe device implements ACS function, the enable status is set to disabled by default and can be enabled by ACS-aware software. ACS will affect the iommu groups topology, so, the iommu driver is ACS-aware software. This patch adds a call to pci_request_acs() to the arm-smmu driver to enable the ACS function in PCIe devices that support it, when they get probed. Reviewed-by: Robin Murphy Reviewed-by: Eric Auger Signed-off-by: Wei Chen Signed-off-by: Will Deacon diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 94b6821..30ea899 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2686,6 +2686,8 @@ static int __init arm_smmu_init(void) if (ret) return ret; + pci_request_acs(); + return bus_set_iommu(&pci_bus_type, &arm_smmu_ops); } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 9345a3f..ab365ec 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2096,8 +2096,10 @@ static int __init arm_smmu_init(void) #endif #ifdef CONFIG_PCI - if (!iommu_present(&pci_bus_type)) + if (!iommu_present(&pci_bus_type)) { + pci_request_acs(); bus_set_iommu(&pci_bus_type, &arm_smmu_ops); + } #endif return 0; -- cgit v0.10.2 From 7c6d90e2bb1a98b86d73b9e8ab4d97ed5507e37c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 16 Jun 2016 18:21:19 +0100 Subject: iommu/io-pgtable-arm: Fix iova_to_phys for block entries The implementation of iova_to_phys for the long-descriptor ARM io-pgtable code always masks with the granule size when inserting the low virtual address bits into the physical address determined from the page tables. In cases where the leaf entry is found before the final level of table (i.e. due to a block mapping), this results in rounding down to the bottom page of the block mapping. Consequently, the physical address range batching in the vfio_unmap_unpin is defeated and we end up taking the long way home. This patch fixes the problem by masking the virtual address with the appropriate mask for the level at which the leaf descriptor is located. The short-descriptor code already gets this right, so no change is needed there. Cc: Reported-by: Robin Murphy Tested-by: Robin Murphy Signed-off-by: Will Deacon diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index a1ed1b7..f5c90e1 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -576,7 +576,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; found_translation: - iova &= (ARM_LPAE_GRANULE(data) - 1); + iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; } -- cgit v0.10.2 From efcd94c07665a074fb5e25dfaaba0a9f606248b5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:59 +0200 Subject: MAINTAINERS: Add file patterns for iommu device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Joerg Roedel Cc: iommu@lists.linux-foundation.org Signed-off-by: Joerg Roedel diff --git a/MAINTAINERS b/MAINTAINERS index 1209323..47b8ab3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6145,6 +6145,7 @@ M: Joerg Roedel L: iommu@lists.linux-foundation.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git S: Maintained +F: Documentation/devicetree/bindings/iommu/ F: drivers/iommu/ IP MASQUERADING -- cgit v0.10.2 From bee140044579fbfdad5fd98717c0405d7b492226 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 4 Jul 2016 17:38:22 +0800 Subject: iommu/arm-smmu: Use devm_request_irq and devm_free_irq Use devm_request_irq to simplify error handling path, when probe smmu device. Also devm_{request|free}_irq when init or destroy domain context. Signed-off-by: Peng Fan Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Will Deacon diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index ab365ec..4f49fe2 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -987,8 +987,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, * handler seeing a half-initialised domain state. */ irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; - ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED, - "arm-smmu-context-fault", domain); + ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, + IRQF_SHARED, "arm-smmu-context-fault", domain); if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", cfg->irptndx, irq); @@ -1028,7 +1028,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) if (cfg->irptndx != INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; - free_irq(irq, domain); + devm_free_irq(smmu->dev, irq, domain); } free_io_pgtable_ops(smmu_domain->pgtbl_ops); @@ -1986,15 +1986,15 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) } for (i = 0; i < smmu->num_global_irqs; ++i) { - err = request_irq(smmu->irqs[i], - arm_smmu_global_fault, - IRQF_SHARED, - "arm-smmu global fault", - smmu); + err = devm_request_irq(smmu->dev, smmu->irqs[i], + arm_smmu_global_fault, + IRQF_SHARED, + "arm-smmu global fault", + smmu); if (err) { dev_err(dev, "failed to request global IRQ %d (%u)\n", i, smmu->irqs[i]); - goto out_free_irqs; + goto out_put_masters; } } @@ -2006,10 +2006,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) arm_smmu_device_reset(smmu); return 0; -out_free_irqs: - while (i--) - free_irq(smmu->irqs[i], smmu); - out_put_masters: for (node = rb_first(&smmu->masters); node; node = rb_next(node)) { struct arm_smmu_master *master @@ -2050,7 +2046,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) dev_err(dev, "removing device with active domains!\n"); for (i = 0; i < smmu->num_global_irqs; ++i) - free_irq(smmu->irqs[i], smmu); + devm_free_irq(smmu->dev, smmu->irqs[i], smmu); /* Turn the thing off */ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); -- cgit v0.10.2 From 452014d2b4824ed9ca32a3ef6ee745f22431132a Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 24 Jun 2016 06:13:14 -0700 Subject: iommu/vt-d: Remove unnecassary qi clflushes According to the manual: "Hardware access to ... invalidation queue ... are always coherent." Remove unnecassary clflushes accordingly. Signed-off-by: Nadav Amit Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 2eff7b6..1344e29 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1167,8 +1167,6 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) (unsigned long long)qi->desc[index].high); memcpy(&qi->desc[index], &qi->desc[wait_index], sizeof(struct qi_desc)); - __iommu_flush_cache(iommu, &qi->desc[index], - sizeof(struct qi_desc)); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); return -EINVAL; } @@ -1243,9 +1241,6 @@ restart: hw[wait_index] = wait_desc; - __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); - __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); - qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; -- cgit v0.10.2 From e38d1f1312e4e88d1dab1fdf591824e1f3b105a9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 28 Jun 2016 20:38:36 +0200 Subject: iommu: Simplify init function iommu_group_ida and iommu_group_mutex can be initialized statically. There's no need to do this dynamically in the init function. Signed-off-by: Heiner Kallweit Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3000051..debce45 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -34,8 +34,8 @@ #include static struct kset *iommu_group_kset; -static struct ida iommu_group_ida; -static struct mutex iommu_group_mutex; +static DEFINE_IDA(iommu_group_ida); +static DEFINE_MUTEX(iommu_group_mutex); struct iommu_callback_data { const struct iommu_ops *ops; @@ -1483,9 +1483,6 @@ static int __init iommu_init(void) { iommu_group_kset = kset_create_and_add("iommu_groups", NULL, kernel_kobj); - ida_init(&iommu_group_ida); - mutex_init(&iommu_group_mutex); - BUG_ON(!iommu_group_kset); return 0; -- cgit v0.10.2 From feccf398db631f3b98c4c6572381517d90b5fd87 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 29 Jun 2016 21:13:59 +0200 Subject: iommu: Simplify and fix ida handling Ida handling can be much simplified by using the ida_simple_.. functions. This change also fixes the bug that previously checking for errors returned by ida_get_new() was incomplete. ida_get_new() can return errors other than EAGAIN, e.g. ENOSPC. This case wasn't handled. Signed-off-by: Heiner Kallweit Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index debce45..4d3c4a8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -35,7 +35,6 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); -static DEFINE_MUTEX(iommu_group_mutex); struct iommu_callback_data { const struct iommu_ops *ops; @@ -144,9 +143,7 @@ static void iommu_group_release(struct kobject *kobj) if (group->iommu_data_release) group->iommu_data_release(group->iommu_data); - mutex_lock(&iommu_group_mutex); - ida_remove(&iommu_group_ida, group->id); - mutex_unlock(&iommu_group_mutex); + ida_simple_remove(&iommu_group_ida, group->id); if (group->default_domain) iommu_domain_free(group->default_domain); @@ -186,26 +183,17 @@ struct iommu_group *iommu_group_alloc(void) INIT_LIST_HEAD(&group->devices); BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); - mutex_lock(&iommu_group_mutex); - -again: - if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) { + ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL); + if (ret < 0) { kfree(group); - mutex_unlock(&iommu_group_mutex); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); } - - if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id)) - goto again; - - mutex_unlock(&iommu_group_mutex); + group->id = ret; ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, NULL, "%d", group->id); if (ret) { - mutex_lock(&iommu_group_mutex); - ida_remove(&iommu_group_ida, group->id); - mutex_unlock(&iommu_group_mutex); + ida_simple_remove(&iommu_group_ida, group->id); kfree(group); return ERR_PTR(ret); } -- cgit v0.10.2 From b548e786ce47017107765bbeb0f100202525ea83 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 13 Jul 2016 12:35:24 +0200 Subject: iommu/amd: Init unity mappings only for dma_ops domains The default domain for a device might also be identity-mapped. In this case the kernel would crash when unity mappings are defined for the device. Fix that by making sure the domain is a dma_ops domain. Fixes: 0bb6e243d7fb ('iommu/amd: Support IOMMU_DOMAIN_DMA type allocation') Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 921111e..b938a4a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -467,9 +467,11 @@ static void init_iommu_group(struct device *dev) if (!domain) goto out; - dma_domain = to_pdomain(domain)->priv; + if (to_pdomain(domain)->flags == PD_DMA_OPS_MASK) { + dma_domain = to_pdomain(domain)->priv; + init_unity_mappings_for_device(dev, dma_domain); + } - init_unity_mappings_for_device(dev, dma_domain); out: iommu_group_put(group); } -- cgit v0.10.2 From 33b21a6b203f70e2012b02753134e59c3ab38779 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 13:07:53 +0200 Subject: iommu: Add apply_dm_region call-back to iommu-ops This new call-back will be used by the iommu driver to do reserve the given dm_region in its iova space before the mapping is created. The call-back is temporary until the dma-ops implementation is part of the common iommu code. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3000051..e8d2fb0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -348,6 +348,9 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, list_for_each_entry(entry, &mappings, list) { dma_addr_t start, end, addr; + if (domain->ops->apply_dm_region) + domain->ops->apply_dm_region(dev, domain, entry); + start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 664683a..a35fb8b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -152,6 +152,7 @@ struct iommu_dm_region { * @domain_set_attr: Change domain attributes * @get_dm_regions: Request list of direct mapping requirements for a device * @put_dm_regions: Free list of direct mapping requirements for a device + * @apply_dm_region: Temporary helper call-back for iova reserved ranges * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain * @domain_set_windows: Set the number of windows for a domain @@ -186,6 +187,8 @@ struct iommu_ops { /* Request/Free a list of direct mapping requirements for a device */ void (*get_dm_regions)(struct device *dev, struct list_head *list); void (*put_dm_regions)(struct device *dev, struct list_head *list); + void (*apply_dm_region)(struct device *dev, struct iommu_domain *domain, + struct iommu_dm_region *region); /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, -- cgit v0.10.2 From a72c4225447bae5d435671446168876bcea4de59 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 11:12:49 +0200 Subject: iommu/amd: Select IOMMU_IOVA for AMD IOMMU Include the generic IOVA code to make use of it in the AMD IOMMU driver too. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ad08603..0b1aabb 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -111,6 +111,7 @@ config AMD_IOMMU select PCI_PRI select PCI_PASID select IOMMU_API + select IOMMU_IOVA depends on X86_64 && PCI && ACPI ---help--- With this option you can enable support for AMD IOMMU hardware in -- cgit v0.10.2 From 307d5851ef74c6c06a68f50302bf19809966d345 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 11:54:04 +0200 Subject: iommu/amd: Allocate iova_domain for dma_ops_domain Use it later for allocating the IO virtual addresses. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b938a4a..e7825b2 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,11 @@ #define LOOP_TIMEOUT 100000 +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) + /* * This bitmap is used to advertise the page sizes our hardware support * to the IOMMU core, which will then use this information to split @@ -158,6 +164,9 @@ struct dma_ops_domain { /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; + + /* IOVA RB-Tree */ + struct iova_domain iovad; }; /**************************************************************************** @@ -1969,6 +1978,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; + put_iova_domain(&dom->iovad); + free_percpu(dom->next_index); del_domain_from_list(&dom->domain); @@ -2044,6 +2055,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) for_each_possible_cpu(cpu) *per_cpu_ptr(dma_dom->next_index, cpu) = 0; + init_iova_domain(&dma_dom->iovad, PAGE_SIZE, + IOVA_START_PFN, DMA_32BIT_PFN); + return dma_dom; free_dma_dom: @@ -2951,7 +2965,11 @@ static struct dma_map_ops amd_iommu_dma_ops = { int __init amd_iommu_init_api(void) { - int err = 0; + int ret, err = 0; + + ret = iova_cache_get(); + if (ret) + return ret; err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) -- cgit v0.10.2 From 81cd07b9c92ad446fcde18db7de961def0dbcfd3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 7 Jul 2016 18:01:10 +0200 Subject: iommu/amd: Create a list of reserved iova addresses Put the MSI-range, the HT-range and the MMIO ranges of PCI devices into that range, so that these addresses are not allocated for DMA. Copy this address list into every created dma_ops_domain. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e7825b2..1bb59ae 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -63,6 +63,12 @@ #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) +/* Reserved IOVA ranges */ +#define MSI_RANGE_START (0xfee00000) +#define MSI_RANGE_END (0xfeefffff) +#define HT_RANGE_START (0xfd00000000ULL) +#define HT_RANGE_END (0xffffffffffULL) + /* * This bitmap is used to advertise the page sizes our hardware support * to the IOMMU core, which will then use this information to split @@ -169,6 +175,9 @@ struct dma_ops_domain { struct iova_domain iovad; }; +static struct iova_domain reserved_iova_ranges; +static struct lock_class_key reserved_rbtree_key; + /**************************************************************************** * * Helper functions @@ -2058,6 +2067,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); + /* Initialize reserved ranges */ + copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); + return dma_dom; free_dma_dom: @@ -2963,6 +2975,59 @@ static struct dma_map_ops amd_iommu_dma_ops = { .set_dma_mask = set_dma_mask, }; +static int init_reserved_iova_ranges(void) +{ + struct pci_dev *pdev = NULL; + struct iova *val; + + init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, + IOVA_START_PFN, DMA_32BIT_PFN); + + lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, + &reserved_rbtree_key); + + /* MSI memory range */ + val = reserve_iova(&reserved_iova_ranges, + IOVA_PFN(MSI_RANGE_START), IOVA_PFN(MSI_RANGE_END)); + if (!val) { + pr_err("Reserving MSI range failed\n"); + return -ENOMEM; + } + + /* HT memory range */ + val = reserve_iova(&reserved_iova_ranges, + IOVA_PFN(HT_RANGE_START), IOVA_PFN(HT_RANGE_END)); + if (!val) { + pr_err("Reserving HT range failed\n"); + return -ENOMEM; + } + + /* + * Memory used for PCI resources + * FIXME: Check whether we can reserve the PCI-hole completly + */ + for_each_pci_dev(pdev) { + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; ++i) { + struct resource *r = &pdev->resource[i]; + + if (!(r->flags & IORESOURCE_MEM)) + continue; + + val = reserve_iova(&reserved_iova_ranges, + IOVA_PFN(r->start), + IOVA_PFN(r->end)); + if (!val) { + pr_err("Reserve pci-resource range failed\n"); + return -ENOMEM; + } + } + } + + return 0; +} + int __init amd_iommu_init_api(void) { int ret, err = 0; @@ -2971,6 +3036,10 @@ int __init amd_iommu_init_api(void) if (ret) return ret; + ret = init_reserved_iova_ranges(); + if (ret) + return ret; + err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; -- cgit v0.10.2 From 8d54d6c8b8f3e40dadd15b1dad6f190c2ccf7473 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 13:32:20 +0200 Subject: iommu/amd: Implement apply_dm_region call-back It is used to reserve the dm-regions in the iova-tree. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1bb59ae..4c9063a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3385,6 +3385,20 @@ static void amd_iommu_put_dm_regions(struct device *dev, kfree(entry); } +static void amd_iommu_apply_dm_region(struct device *dev, + struct iommu_domain *domain, + struct iommu_dm_region *region) +{ + struct protection_domain *pdomain = to_pdomain(domain); + struct dma_ops_domain *dma_dom = pdomain->priv; + unsigned long start, end; + + start = IOVA_PFN(region->start); + end = IOVA_PFN(region->start + region->length); + + WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); +} + static const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -3400,6 +3414,7 @@ static const struct iommu_ops amd_iommu_ops = { .device_group = amd_iommu_device_group, .get_dm_regions = amd_iommu_get_dm_regions, .put_dm_regions = amd_iommu_put_dm_regions, + .apply_dm_region = amd_iommu_apply_dm_region, .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; -- cgit v0.10.2 From b911b89b6d0112b5e81b74ce25d894b03023344e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 14:29:11 +0200 Subject: iommu/amd: Pass gfp-flags to iommu_map_page() Make this function ready to be used in the DMA-API path. Reorder parameters a bit while at it. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4c9063a..c421c06 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1418,8 +1418,9 @@ static u64 *fetch_pte(struct protection_domain *domain, static int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, + unsigned long page_size, int prot, - unsigned long page_size) + gfp_t gfp) { u64 __pte, *pte; int i, count; @@ -1431,7 +1432,7 @@ static int iommu_map_page(struct protection_domain *dom, return -EINVAL; count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); + pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp); if (!pte) return -ENOMEM; @@ -3283,7 +3284,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, prot |= IOMMU_PROT_IW; mutex_lock(&domain->api_lock); - ret = iommu_map_page(domain, iova, paddr, prot, page_size); + ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL); mutex_unlock(&domain->api_lock); return ret; -- cgit v0.10.2 From 518d9b450387a3508363af58d1f62db9fc92d438 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 14:39:47 +0200 Subject: iommu/amd: Remove special mapping code for dma_ops path Use the iommu-api map/unmap functions instead. This will be required anyway when IOVA code is used for address allocation. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c421c06..1cd57a3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2560,94 +2560,6 @@ static void update_domain(struct protection_domain *domain) } /* - * This function fetches the PTE for a given address in the aperture - */ -static u64* dma_ops_get_pte(struct dma_ops_domain *dom, - unsigned long address) -{ - struct aperture_range *aperture; - u64 *pte, *pte_page; - - aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; - if (!aperture) - return NULL; - - pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; - if (!pte) { - pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, - GFP_ATOMIC); - aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; - } else - pte += PM_LEVEL_INDEX(0, address); - - update_domain(&dom->domain); - - return pte; -} - -/* - * This is the generic map function. It maps one 4kb page at paddr to - * the given address in the DMA address space for the domain. - */ -static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, - unsigned long address, - phys_addr_t paddr, - int direction) -{ - u64 *pte, __pte; - - WARN_ON(address > dom->aperture_size); - - paddr &= PAGE_MASK; - - pte = dma_ops_get_pte(dom, address); - if (!pte) - return DMA_ERROR_CODE; - - __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; - - if (direction == DMA_TO_DEVICE) - __pte |= IOMMU_PTE_IR; - else if (direction == DMA_FROM_DEVICE) - __pte |= IOMMU_PTE_IW; - else if (direction == DMA_BIDIRECTIONAL) - __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; - - WARN_ON_ONCE(*pte); - - *pte = __pte; - - return (dma_addr_t)address; -} - -/* - * The generic unmapping function for on page in the DMA address space. - */ -static void dma_ops_domain_unmap(struct dma_ops_domain *dom, - unsigned long address) -{ - struct aperture_range *aperture; - u64 *pte; - - if (address >= dom->aperture_size) - return; - - aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; - if (!aperture) - return; - - pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; - if (!pte) - return; - - pte += PM_LEVEL_INDEX(0, address); - - WARN_ON_ONCE(!*pte); - - *pte = 0ULL; -} - -/* * This function contains common code for mapping of a physically * contiguous memory region into DMA address space. It is used by all * mapping functions provided with this IOMMU driver. @@ -2657,7 +2569,7 @@ static dma_addr_t __map_single(struct device *dev, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, - int dir, + int direction, bool align, u64 dma_mask) { @@ -2665,6 +2577,7 @@ static dma_addr_t __map_single(struct device *dev, dma_addr_t address, start, ret; unsigned int pages; unsigned long align_mask = 0; + int prot = 0; int i; pages = iommu_num_pages(paddr, size, PAGE_SIZE); @@ -2679,10 +2592,18 @@ static dma_addr_t __map_single(struct device *dev, if (address == DMA_ERROR_CODE) goto out; + if (direction == DMA_TO_DEVICE) + prot = IOMMU_PROT_IR; + else if (direction == DMA_FROM_DEVICE) + prot = IOMMU_PROT_IW; + else if (direction == DMA_BIDIRECTIONAL) + prot = IOMMU_PROT_IW | IOMMU_PROT_IR; + start = address; for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(dma_dom, start, paddr, dir); - if (ret == DMA_ERROR_CODE) + ret = iommu_map_page(&dma_dom->domain, start, paddr, + PAGE_SIZE, prot, GFP_ATOMIC); + if (ret) goto out_unmap; paddr += PAGE_SIZE; @@ -2702,7 +2623,7 @@ out_unmap: for (--i; i >= 0; --i) { start -= PAGE_SIZE; - dma_ops_domain_unmap(dma_dom, start); + iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); } dma_ops_free_addresses(dma_dom, address, pages); @@ -2733,7 +2654,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, start = dma_addr; for (i = 0; i < pages; ++i) { - dma_ops_domain_unmap(dma_dom, start); + iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); start += PAGE_SIZE; } -- cgit v0.10.2 From 256e4621c21aa1bf704e1a12e643923fdb732d04 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 14:23:01 +0200 Subject: iommu/amd: Make use of the generic IOVA allocator Remove the old address allocation code and make use of the generic IOVA allocator that is also used by other dma-ops implementations. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1cd57a3..77be2d0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1649,167 +1649,32 @@ out_free: return -ENOMEM; } -static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom, - struct aperture_range *range, - unsigned long pages, - unsigned long dma_mask, - unsigned long boundary_size, - unsigned long align_mask, - bool trylock) -{ - unsigned long offset, limit, flags; - dma_addr_t address; - bool flush = false; - - offset = range->offset >> PAGE_SHIFT; - limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, - dma_mask >> PAGE_SHIFT); - - if (trylock) { - if (!spin_trylock_irqsave(&range->bitmap_lock, flags)) - return -1; - } else { - spin_lock_irqsave(&range->bitmap_lock, flags); - } - - address = iommu_area_alloc(range->bitmap, limit, range->next_bit, - pages, offset, boundary_size, align_mask); - if (address == -1) { - /* Nothing found, retry one time */ - address = iommu_area_alloc(range->bitmap, limit, - 0, pages, offset, boundary_size, - align_mask); - flush = true; - } - - if (address != -1) - range->next_bit = address + pages; - - spin_unlock_irqrestore(&range->bitmap_lock, flags); - - if (flush) { - domain_flush_tlb(&dom->domain); - domain_flush_complete(&dom->domain); - } - - return address; -} - -static unsigned long dma_ops_area_alloc(struct device *dev, - struct dma_ops_domain *dom, - unsigned int pages, - unsigned long align_mask, - u64 dma_mask) +static unsigned long dma_ops_alloc_iova(struct device *dev, + struct dma_ops_domain *dma_dom, + unsigned int pages, u64 dma_mask) { - unsigned long boundary_size, mask; - unsigned long address = -1; - bool first = true; - u32 start, i; - - preempt_disable(); - - mask = dma_get_seg_boundary(dev); - -again: - start = this_cpu_read(*dom->next_index); - - /* Sanity check - is it really necessary? */ - if (unlikely(start > APERTURE_MAX_RANGES)) { - start = 0; - this_cpu_write(*dom->next_index, 0); - } - - boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : - 1UL << (BITS_PER_LONG - PAGE_SHIFT); + unsigned long pfn = 0; - for (i = 0; i < APERTURE_MAX_RANGES; ++i) { - struct aperture_range *range; - int index; - - index = (start + i) % APERTURE_MAX_RANGES; + pages = __roundup_pow_of_two(pages); - range = dom->aperture[index]; - - if (!range || range->offset >= dma_mask) - continue; - - address = dma_ops_aperture_alloc(dom, range, pages, - dma_mask, boundary_size, - align_mask, first); - if (address != -1) { - address = range->offset + (address << PAGE_SHIFT); - this_cpu_write(*dom->next_index, index); - break; - } - } - - if (address == -1 && first) { - first = false; - goto again; - } + if (dma_mask > DMA_BIT_MASK(32)) + pfn = alloc_iova_fast(&dma_dom->iovad, pages, + IOVA_PFN(DMA_BIT_MASK(32))); - preempt_enable(); + if (!pfn) + pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); - return address; + return (pfn << PAGE_SHIFT); } -static unsigned long dma_ops_alloc_addresses(struct device *dev, - struct dma_ops_domain *dom, - unsigned int pages, - unsigned long align_mask, - u64 dma_mask) +static void dma_ops_free_iova(struct dma_ops_domain *dma_dom, + unsigned long address, + unsigned int pages) { - unsigned long address = -1; - - while (address == -1) { - address = dma_ops_area_alloc(dev, dom, pages, - align_mask, dma_mask); - - if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC)) - break; - } - - if (unlikely(address == -1)) - address = DMA_ERROR_CODE; - - WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); - - return address; -} - -/* - * The address free function. - * - * called with domain->lock held - */ -static void dma_ops_free_addresses(struct dma_ops_domain *dom, - unsigned long address, - unsigned int pages) -{ - unsigned i = address >> APERTURE_RANGE_SHIFT; - struct aperture_range *range = dom->aperture[i]; - unsigned long flags; - - BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); - -#ifdef CONFIG_IOMMU_STRESS - if (i < 4) - return; -#endif - - if (amd_iommu_unmap_flush) { - domain_flush_tlb(&dom->domain); - domain_flush_complete(&dom->domain); - } - - address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; - - spin_lock_irqsave(&range->bitmap_lock, flags); - if (address + pages > range->next_bit) - range->next_bit = address + pages; - bitmap_clear(range->bitmap, address, pages); - spin_unlock_irqrestore(&range->bitmap_lock, flags); + pages = __roundup_pow_of_two(pages); + address >>= PAGE_SHIFT; + free_iova_fast(&dma_dom->iovad, address, pages); } /**************************************************************************** @@ -2586,9 +2451,7 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, - dma_mask); - + address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask); if (address == DMA_ERROR_CODE) goto out; @@ -2626,7 +2489,10 @@ out_unmap: iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); } - dma_ops_free_addresses(dma_dom, address, pages); + domain_flush_tlb(&dma_dom->domain); + domain_flush_complete(&dma_dom->domain); + + dma_ops_free_iova(dma_dom, address, pages); return DMA_ERROR_CODE; } @@ -2658,7 +2524,10 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, start += PAGE_SIZE; } - dma_ops_free_addresses(dma_dom, dma_addr, pages); + domain_flush_tlb(&dma_dom->domain); + domain_flush_complete(&dma_dom->domain); + + dma_ops_free_iova(dma_dom, dma_addr, pages); } /* -- cgit v0.10.2 From 2d4c515bf06c9bce87b546279413621f847ef6a3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 16:21:32 +0200 Subject: iommu/amd: Remove other remains of old address allocator There are other remains in the code from the old allocatore. Remove them all. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 77be2d0..2ba8b46 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -134,43 +134,12 @@ static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); /* - * For dynamic growth the aperture size is split into ranges of 128MB of - * DMA address space each. This struct represents one such range. - */ -struct aperture_range { - - spinlock_t bitmap_lock; - - /* address allocation bitmap */ - unsigned long *bitmap; - unsigned long offset; - unsigned long next_bit; - - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 *pte_pages[64]; -}; - -/* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { /* generic protection domain information */ struct protection_domain domain; - /* size of the aperture for the mappings */ - unsigned long aperture_size; - - /* aperture index we start searching for free addresses */ - u32 __percpu *next_index; - - /* address space relevant data */ - struct aperture_range *aperture[APERTURE_MAX_RANGES]; - /* IOVA RB-Tree */ struct iova_domain iovad; }; @@ -410,43 +379,6 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) } /* - * This function actually applies the mapping to the page table of the - * dma_ops domain. - */ -static void alloc_unity_mapping(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e) -{ - u64 addr; - - for (addr = e->address_start; addr < e->address_end; - addr += PAGE_SIZE) { - if (addr < dma_dom->aperture_size) - __set_bit(addr >> PAGE_SHIFT, - dma_dom->aperture[0]->bitmap); - } -} - -/* - * Inits the unity mappings required for a specific device - */ -static void init_unity_mappings_for_device(struct device *dev, - struct dma_ops_domain *dma_dom) -{ - struct unity_map_entry *e; - int devid; - - devid = get_device_id(dev); - if (devid < 0) - return; - - list_for_each_entry(e, &amd_iommu_unity_map, list) { - if (!(devid >= e->devid_start && devid <= e->devid_end)) - continue; - alloc_unity_mapping(dma_dom, e); - } -} - -/* * This function checks if the driver got a valid device from the caller to * avoid dereferencing invalid pointers. */ @@ -473,24 +405,12 @@ static bool check_device(struct device *dev) static void init_iommu_group(struct device *dev) { - struct dma_ops_domain *dma_domain; - struct iommu_domain *domain; struct iommu_group *group; group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return; - domain = iommu_group_default_domain(group); - if (!domain) - goto out; - - if (to_pdomain(domain)->flags == PD_DMA_OPS_MASK) { - dma_domain = to_pdomain(domain)->priv; - init_unity_mappings_for_device(dev, dma_domain); - } - -out: iommu_group_put(group); } @@ -1496,158 +1416,10 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /**************************************************************************** * * The next functions belong to the address allocator for the dma_ops - * interface functions. They work like the allocators in the other IOMMU - * drivers. Its basically a bitmap which marks the allocated pages in - * the aperture. Maybe it could be enhanced in the future to a more - * efficient allocator. + * interface functions. * ****************************************************************************/ -/* - * The address allocator core functions. - * - * called with domain->lock held - */ - -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) -{ - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} - -/* - * This function is used to add a new aperture range to an existing - * aperture in case of dma_ops domain allocation or address allocation - * failure. - */ -static int alloc_new_range(struct dma_ops_domain *dma_dom, - bool populate, gfp_t gfp) -{ - int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; - unsigned long i, old_size, pte_pgsize; - struct aperture_range *range; - struct amd_iommu *iommu; - unsigned long flags; - -#ifdef CONFIG_IOMMU_STRESS - populate = false; -#endif - - if (index >= APERTURE_MAX_RANGES) - return -ENOMEM; - - range = kzalloc(sizeof(struct aperture_range), gfp); - if (!range) - return -ENOMEM; - - range->bitmap = (void *)get_zeroed_page(gfp); - if (!range->bitmap) - goto out_free; - - range->offset = dma_dom->aperture_size; - - spin_lock_init(&range->bitmap_lock); - - if (populate) { - unsigned long address = dma_dom->aperture_size; - int i, num_ptes = APERTURE_RANGE_PAGES / 512; - u64 *pte, *pte_page; - - for (i = 0; i < num_ptes; ++i) { - pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, - &pte_page, gfp); - if (!pte) - goto out_free; - - range->pte_pages[i] = pte_page; - - address += APERTURE_RANGE_SIZE / 64; - } - } - - spin_lock_irqsave(&dma_dom->domain.lock, flags); - - /* First take the bitmap_lock and then publish the range */ - spin_lock(&range->bitmap_lock); - - old_size = dma_dom->aperture_size; - dma_dom->aperture[index] = range; - dma_dom->aperture_size += APERTURE_RANGE_SIZE; - - /* Reserve address range used for MSI messages */ - if (old_size < MSI_ADDR_BASE_LO && - dma_dom->aperture_size > MSI_ADDR_BASE_LO) { - unsigned long spage; - int pages; - - pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE); - spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT; - - dma_ops_reserve_addresses(dma_dom, spage, pages); - } - - /* Initialize the exclusion range if necessary */ - for_each_iommu(iommu) { - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset - && iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - startpage = iommu->exclusion_start >> PAGE_SHIFT; - dma_ops_reserve_addresses(dma_dom, startpage, pages); - } - } - - /* - * Check for areas already mapped as present in the new aperture - * range and mark those pages as reserved in the allocator. Such - * mappings may already exist as a result of requested unity - * mappings for devices. - */ - for (i = dma_dom->aperture[index]->offset; - i < dma_dom->aperture_size; - i += pte_pgsize) { - u64 *pte = fetch_pte(&dma_dom->domain, i, &pte_pgsize); - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - continue; - - dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, - pte_pgsize >> 12); - } - - update_domain(&dma_dom->domain); - - spin_unlock(&range->bitmap_lock); - - spin_unlock_irqrestore(&dma_dom->domain.lock, flags); - - return 0; - -out_free: - update_domain(&dma_dom->domain); - - free_page((unsigned long)range->bitmap); - - kfree(range); - - return -ENOMEM; -} static unsigned long dma_ops_alloc_iova(struct device *dev, struct dma_ops_domain *dma_dom, @@ -1848,46 +1620,18 @@ static void free_gcr3_table(struct protection_domain *domain) */ static void dma_ops_domain_free(struct dma_ops_domain *dom) { - int i; - if (!dom) return; - put_iova_domain(&dom->iovad); - - free_percpu(dom->next_index); - del_domain_from_list(&dom->domain); - free_pagetable(&dom->domain); + put_iova_domain(&dom->iovad); - for (i = 0; i < APERTURE_MAX_RANGES; ++i) { - if (!dom->aperture[i]) - continue; - free_page((unsigned long)dom->aperture[i]->bitmap); - kfree(dom->aperture[i]); - } + free_pagetable(&dom->domain); kfree(dom); } -static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, - int max_apertures) -{ - int ret, i, apertures; - - apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; - ret = 0; - - for (i = apertures; i < max_apertures; ++i) { - ret = alloc_new_range(dma_dom, false, GFP_KERNEL); - if (ret) - break; - } - - return ret; -} - /* * Allocates a new protection domain usable for the dma_ops functions. * It also initializes the page table and the address allocator data @@ -1896,7 +1640,6 @@ static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; - int cpu; dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); if (!dma_dom) @@ -1905,10 +1648,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (protection_domain_init(&dma_dom->domain)) goto free_dma_dom; - dma_dom->next_index = alloc_percpu(u32); - if (!dma_dom->next_index) - goto free_dma_dom; - dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1916,26 +1655,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; - add_domain_to_list(&dma_dom->domain); - - if (alloc_new_range(dma_dom, true, GFP_KERNEL)) - goto free_dma_dom; - - /* - * mark the first page as allocated so we never return 0 as - * a valid dma-address. So we can use 0 as error value - */ - dma_dom->aperture[0]->bitmap[0] = 1; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(dma_dom->next_index, cpu) = 0; - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); /* Initialize reserved ranges */ copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); + add_domain_to_list(&dma_dom->domain); + return dma_dom; free_dma_dom: @@ -2510,10 +2237,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == DMA_ERROR_CODE) || - (dma_addr + size > dma_dom->aperture_size)) - return; - flush_addr = dma_addr; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; @@ -2727,34 +2450,6 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } -static int set_dma_mask(struct device *dev, u64 mask) -{ - struct protection_domain *domain; - int max_apertures = 1; - - domain = get_domain(dev); - if (IS_ERR(domain)) - return PTR_ERR(domain); - - if (mask == DMA_BIT_MASK(64)) - max_apertures = 8; - else if (mask > DMA_BIT_MASK(32)) - max_apertures = 4; - - /* - * To prevent lock contention it doesn't make sense to allocate more - * apertures than online cpus - */ - if (max_apertures > num_online_cpus()) - max_apertures = num_online_cpus(); - - if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures)) - dev_err(dev, "Can't allocate %d iommu apertures\n", - max_apertures); - - return 0; -} - static struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, @@ -2763,7 +2458,6 @@ static struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, - .set_dma_mask = set_dma_mask, }; static int init_reserved_iova_ranges(void) -- cgit v0.10.2 From bda350dbdbc1ad8655ece0ec3d41bebc3ee7a77b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Jul 2016 16:28:02 +0200 Subject: iommu/amd: Remove align-parameter from __map_single() This parameter is not required anymore because the iova-allocations are always aligned to its size. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2ba8b46..d218e35 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2162,22 +2162,17 @@ static dma_addr_t __map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction, - bool align, u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start, ret; unsigned int pages; - unsigned long align_mask = 0; int prot = 0; int i; pages = iommu_num_pages(paddr, size, PAGE_SIZE); paddr &= PAGE_MASK; - if (align) - align_mask = (1UL << get_order(size)) - 1; - address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask); if (address == DMA_ERROR_CODE) goto out; @@ -2273,8 +2268,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, dma_mask = *dev->dma_mask; - return __map_single(dev, domain->priv, paddr, size, dir, false, - dma_mask); + return __map_single(dev, domain->priv, paddr, size, dir, dma_mask); } /* @@ -2317,8 +2311,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, domain->priv, - paddr, s->length, dir, false, - dma_mask); + paddr, s->length, dir, dma_mask); if (s->dma_address) { s->dma_length = s->length; @@ -2402,7 +2395,7 @@ static void *alloc_coherent(struct device *dev, size_t size, dma_mask = *dev->dma_mask; *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), - size, DMA_BIDIRECTIONAL, true, dma_mask); + size, DMA_BIDIRECTIONAL, dma_mask); if (*dma_addr == DMA_ERROR_CODE) goto out_free; -- cgit v0.10.2 From c5b5da9c79bb2d88fa3c5163ccf1a7a9e89cfa49 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Jul 2016 11:55:37 +0200 Subject: iommu/amd: Set up data structures for flush queue The flush queue is the equivalent to defered-flushing in the Intel VT-d driver. This patch sets up the data structures needed for this. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d218e35..38f8a5e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -89,6 +89,22 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); +#define FLUSH_QUEUE_SIZE 256 + +struct flush_queue_entry { + unsigned long iova_pfn; + unsigned long pages; + struct dma_ops_domain *dma_dom; +}; + +struct flush_queue { + spinlock_t lock; + unsigned next; + struct flush_queue_entry *entries; +}; + +DEFINE_PER_CPU(struct flush_queue, flush_queue); + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -2508,7 +2524,7 @@ static int init_reserved_iova_ranges(void) int __init amd_iommu_init_api(void) { - int ret, err = 0; + int ret, cpu, err = 0; ret = iova_cache_get(); if (ret) @@ -2518,6 +2534,18 @@ int __init amd_iommu_init_api(void) if (ret) return ret; + for_each_possible_cpu(cpu) { + struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); + + queue->entries = kzalloc(FLUSH_QUEUE_SIZE * + sizeof(*queue->entries), + GFP_KERNEL); + if (!queue->entries) + goto out_put_iova; + + spin_lock_init(&queue->lock); + } + err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; @@ -2530,6 +2558,15 @@ int __init amd_iommu_init_api(void) if (err) return err; return 0; + +out_put_iova: + for_each_possible_cpu(cpu) { + struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); + + kfree(queue->entries); + } + + return -ENOMEM; } int __init amd_iommu_init_dma_ops(void) @@ -2552,6 +2589,7 @@ int __init amd_iommu_init_dma_ops(void) pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n"); return 0; + } /***************************************************************************** -- cgit v0.10.2 From f1eae7c58036b30d434d84a34c6c3154bfb5b496 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Jul 2016 12:50:35 +0200 Subject: iommu/amd: Allow NULL pointer parameter for domain_flush_complete() If domain == NULL is passed to the function, it will queue a completion-wait command on all IOMMUs in the system. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 38f8a5e..0884b12 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1177,7 +1177,7 @@ static void domain_flush_complete(struct protection_domain *domain) int i; for (i = 0; i < amd_iommus_present; ++i) { - if (!domain->dev_iommu[i]) + if (domain && !domain->dev_iommu[i]) continue; /* -- cgit v0.10.2 From b1516a14657acf81a587e9a6e733a881625eee53 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Jul 2016 13:07:22 +0200 Subject: iommu/amd: Implement flush queue With the flush queue the IOMMU TLBs will not be flushed at every dma-ops unmap operation. The unmapped ranges will be queued and flushed at once, when the queue is full. This makes unmapping operations a lot faster (on average) and restores the performance of the old address allocator. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 0884b12..a8e4c5a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2120,6 +2120,66 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ +static void __queue_flush(struct flush_queue *queue) +{ + struct protection_domain *domain; + unsigned long flags; + int idx; + + /* First flush TLB of all known domains */ + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_for_each_entry(domain, &amd_iommu_pd_list, list) + domain_flush_tlb(domain); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); + + /* Wait until flushes have completed */ + domain_flush_complete(NULL); + + for (idx = 0; idx < queue->next; ++idx) { + struct flush_queue_entry *entry; + + entry = queue->entries + idx; + + free_iova_fast(&entry->dma_dom->iovad, + entry->iova_pfn, + entry->pages); + + /* Not really necessary, just to make sure we catch any bugs */ + entry->dma_dom = NULL; + } + + queue->next = 0; +} + +static void queue_add(struct dma_ops_domain *dma_dom, + unsigned long address, unsigned long pages) +{ + struct flush_queue_entry *entry; + struct flush_queue *queue; + unsigned long flags; + int idx; + + pages = __roundup_pow_of_two(pages); + address >>= PAGE_SHIFT; + + queue = get_cpu_ptr(&flush_queue); + spin_lock_irqsave(&queue->lock, flags); + + if (queue->next == FLUSH_QUEUE_SIZE) + __queue_flush(queue); + + idx = queue->next++; + entry = queue->entries + idx; + + entry->iova_pfn = address; + entry->pages = pages; + entry->dma_dom = dma_dom; + + spin_unlock_irqrestore(&queue->lock, flags); + put_cpu_ptr(&flush_queue); +} + + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -2258,10 +2318,13 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, start += PAGE_SIZE; } - domain_flush_tlb(&dma_dom->domain); - domain_flush_complete(&dma_dom->domain); - - dma_ops_free_iova(dma_dom, dma_addr, pages); + if (amd_iommu_unmap_flush) { + dma_ops_free_iova(dma_dom, dma_addr, pages); + domain_flush_tlb(&dma_dom->domain); + domain_flush_complete(&dma_dom->domain); + } else { + queue_add(dma_dom, dma_addr, pages); + } } /* -- cgit v0.10.2 From bb279475db4d0bb07e4dbc99e060362b9f3b5093 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Jul 2016 13:56:36 +0200 Subject: iommu/amd: Implement timeout to flush unmap queues In case the queue doesn't fill up, we flush the TLB at least 10ms after the unmap happened to make sure that the TLB is cleaned up. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a8e4c5a..c0b2f4f 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -105,6 +105,9 @@ struct flush_queue { DEFINE_PER_CPU(struct flush_queue, flush_queue); +static atomic_t queue_timer_on; +static struct timer_list queue_timer; + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -2151,6 +2154,24 @@ static void __queue_flush(struct flush_queue *queue) queue->next = 0; } +void queue_flush_timeout(unsigned long unsused) +{ + int cpu; + + atomic_set(&queue_timer_on, 0); + + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + unsigned long flags; + + queue = per_cpu_ptr(&flush_queue, cpu); + spin_lock_irqsave(&queue->lock, flags); + if (queue->next > 0) + __queue_flush(queue); + spin_unlock_irqrestore(&queue->lock, flags); + } +} + static void queue_add(struct dma_ops_domain *dma_dom, unsigned long address, unsigned long pages) { @@ -2176,6 +2197,10 @@ static void queue_add(struct dma_ops_domain *dma_dom, entry->dma_dom = dma_dom; spin_unlock_irqrestore(&queue->lock, flags); + + if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0) + mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10)); + put_cpu_ptr(&flush_queue); } @@ -2634,6 +2659,9 @@ out_put_iova: int __init amd_iommu_init_dma_ops(void) { + setup_timer(&queue_timer, queue_flush_timeout, 0); + atomic_set(&queue_timer_on, 0); + swiotlb = iommu_pass_through ? 1 : 0; iommu_detected = 1; -- cgit v0.10.2 From f37f7f33d561901d599e98a72bbf44af1f162732 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 8 Jul 2016 11:47:22 +0200 Subject: iommu/amd: Introduce dir2prot() helper This function converts dma_data_direction to iommu-protection flags. This will be needed on multiple places in the code, so this will save some code. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c0b2f4f..281cacb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2252,6 +2252,17 @@ static void update_domain(struct protection_domain *domain) domain->updated = false; } +static int dir2prot(enum dma_data_direction direction) +{ + if (direction == DMA_TO_DEVICE) + return IOMMU_PROT_IR; + else if (direction == DMA_FROM_DEVICE) + return IOMMU_PROT_IW; + else if (direction == DMA_BIDIRECTIONAL) + return IOMMU_PROT_IW | IOMMU_PROT_IR; + else + return 0; +} /* * This function contains common code for mapping of a physically * contiguous memory region into DMA address space. It is used by all @@ -2262,7 +2273,7 @@ static dma_addr_t __map_single(struct device *dev, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, - int direction, + enum dma_data_direction direction, u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; @@ -2278,12 +2289,7 @@ static dma_addr_t __map_single(struct device *dev, if (address == DMA_ERROR_CODE) goto out; - if (direction == DMA_TO_DEVICE) - prot = IOMMU_PROT_IR; - else if (direction == DMA_FROM_DEVICE) - prot = IOMMU_PROT_IW; - else if (direction == DMA_BIDIRECTIONAL) - prot = IOMMU_PROT_IW | IOMMU_PROT_IR; + prot = dir2prot(direction); start = address; for (i = 0; i < pages; ++i) { -- cgit v0.10.2 From 80187fd39dcb30e3aa39e93a87b2d2f7fc8f4fd5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Jul 2016 17:20:54 +0200 Subject: iommu/amd: Optimize map_sg and unmap_sg Optimize these functions so that they need only one call into the address alloctor. This also saves a couple of io-tlb flushes in the unmap_sg path. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 281cacb..acad37c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2396,50 +2396,110 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, __unmap_single(domain->priv, dma_addr, size, dir); } +static int sg_num_pages(struct device *dev, + struct scatterlist *sglist, + int nelems) +{ + unsigned long mask, boundary_size; + struct scatterlist *s; + int i, npages = 0; + + mask = dma_get_seg_boundary(dev); + boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : + 1UL << (BITS_PER_LONG - PAGE_SHIFT); + + for_each_sg(sglist, s, nelems, i) { + int p, n; + + s->dma_address = npages << PAGE_SHIFT; + p = npages % boundary_size; + n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); + if (p + n > boundary_size) + npages += boundary_size - p; + npages += n; + } + + return npages; +} + /* * The exported map_sg function for dma_ops (handles scatter-gather * lists). */ static int map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction dir, + int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { + int mapped_pages = 0, npages = 0, prot = 0, i; struct protection_domain *domain; - int i; + struct dma_ops_domain *dma_dom; struct scatterlist *s; - phys_addr_t paddr; - int mapped_elems = 0; + unsigned long address; u64 dma_mask; domain = get_domain(dev); if (IS_ERR(domain)) return 0; + dma_dom = domain->priv; dma_mask = *dev->dma_mask; + npages = sg_num_pages(dev, sglist, nelems); + + address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); + if (address == DMA_ERROR_CODE) + goto out_err; + + prot = dir2prot(direction); + + /* Map all sg entries */ for_each_sg(sglist, s, nelems, i) { - paddr = sg_phys(s); + int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); + + for (j = 0; j < pages; ++j) { + unsigned long bus_addr, phys_addr; + int ret; - s->dma_address = __map_single(dev, domain->priv, - paddr, s->length, dir, dma_mask); + bus_addr = address + s->dma_address + (j << PAGE_SHIFT); + phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT); + ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC); + if (ret) + goto out_unmap; - if (s->dma_address) { - s->dma_length = s->length; - mapped_elems++; - } else - goto unmap; + mapped_pages += 1; + } } - return mapped_elems; + /* Everything is mapped - write the right values into s->dma_address */ + for_each_sg(sglist, s, nelems, i) { + s->dma_address += address + s->offset; + s->dma_length = s->length; + } + + return nelems; + +out_unmap: + pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n", + dev_name(dev), npages); + + for_each_sg(sglist, s, nelems, i) { + int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); + + for (j = 0; j < pages; ++j) { + unsigned long bus_addr; -unmap: - for_each_sg(sglist, s, mapped_elems, i) { - if (s->dma_address) - __unmap_single(domain->priv, s->dma_address, - s->dma_length, dir); - s->dma_address = s->dma_length = 0; + bus_addr = address + s->dma_address + (j << PAGE_SHIFT); + iommu_unmap_page(domain, bus_addr, PAGE_SIZE); + + if (--mapped_pages) + goto out_free_iova; + } } +out_free_iova: + free_iova_fast(&dma_dom->iovad, address, npages); + +out_err: return 0; } @@ -2452,18 +2512,17 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { struct protection_domain *domain; - struct scatterlist *s; - int i; + unsigned long startaddr; + int npages = 2; domain = get_domain(dev); if (IS_ERR(domain)) return; - for_each_sg(sglist, s, nelems, i) { - __unmap_single(domain->priv, s->dma_address, - s->dma_length, dir); - s->dma_address = s->dma_length = 0; - } + startaddr = sg_dma_address(sglist) & PAGE_MASK; + npages = sg_num_pages(dev, sglist, nelems); + + __unmap_single(domain->priv, startaddr, npages << PAGE_SHIFT, dir); } /* -- cgit v0.10.2 From d26592a93ddb7a73cbd82b08e2e0d2985d68d750 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 7 Jul 2016 15:31:13 +0200 Subject: iommu/amd: Use dev_data->domain in get_domain() Using the cached value is much more efficient than calling into the IOMMU core code. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index acad37c..155a3e6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2215,16 +2215,11 @@ static void queue_add(struct dma_ops_domain *dma_dom, static struct protection_domain *get_domain(struct device *dev) { struct protection_domain *domain; - struct iommu_domain *io_domain; if (!check_device(dev)) return ERR_PTR(-EINVAL); - io_domain = iommu_get_domain_for_dev(dev); - if (!io_domain) - return NULL; - - domain = to_pdomain(io_domain); + domain = get_dev_data(dev)->domain; if (!dma_ops_domain(domain)) return ERR_PTR(-EBUSY); -- cgit v0.10.2 From cda7005ba2cbd0744fea343dd5b2aa637eba5b9e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 7 Jul 2016 15:57:04 +0200 Subject: iommu/amd: Handle IOMMU_DOMAIN_DMA in ops->domain_free call-back This domain type is not yet handled in the iommu_ops->domain_free() call-back. Fix that. Fixes: 0bb6e243d7fb ('iommu/amd: Support IOMMU_DOMAIN_DMA type allocation') Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 155a3e6..5c72d12 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2863,9 +2863,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; - - if (!dom) - return; + struct dma_ops_domain *dma_dom; domain = to_pdomain(dom); @@ -2874,13 +2872,24 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) BUG_ON(domain->dev_cnt != 0); - if (domain->mode != PAGE_MODE_NONE) - free_pagetable(domain); + if (!dom) + return; - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); + switch (dom->type) { + case IOMMU_DOMAIN_DMA: + dma_dom = domain->priv; + dma_ops_domain_free(dma_dom); + break; + default: + if (domain->mode != PAGE_MODE_NONE) + free_pagetable(domain); - protection_domain_free(domain); + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); + + protection_domain_free(domain); + break; + } } static void amd_iommu_detach_device(struct iommu_domain *dom, -- cgit v0.10.2 From 281e8ccbff172899a60579773e72ad63d58b3770 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 7 Jul 2016 16:12:02 +0200 Subject: iommu/amd: Flush iova queue before releasing dma_ops_domain Before a dma_ops_domain can be freed, we need to make sure it is not longer referenced by the flush queue. So empty the queue before a dma_ops_domain can be freed. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 5c72d12..d13a186 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2154,12 +2154,10 @@ static void __queue_flush(struct flush_queue *queue) queue->next = 0; } -void queue_flush_timeout(unsigned long unsused) +static void queue_flush_all(void) { int cpu; - atomic_set(&queue_timer_on, 0); - for_each_possible_cpu(cpu) { struct flush_queue *queue; unsigned long flags; @@ -2172,6 +2170,12 @@ void queue_flush_timeout(unsigned long unsused) } } +static void queue_flush_timeout(unsigned long unsused) +{ + atomic_set(&queue_timer_on, 0); + queue_flush_all(); +} + static void queue_add(struct dma_ops_domain *dma_dom, unsigned long address, unsigned long pages) { @@ -2877,6 +2881,13 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) switch (dom->type) { case IOMMU_DOMAIN_DMA: + /* + * First make sure the domain is no longer referenced from the + * flush queue + */ + queue_flush_all(); + + /* Now release the domain */ dma_dom = domain->priv; dma_ops_domain_free(dma_dom); break; -- cgit v0.10.2 From b3311b061de2e51db683a67092546876839df532 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 8 Jul 2016 13:31:31 +0200 Subject: iommu/amd: Use container_of to get dma_ops_domain This is better than storing an extra pointer in struct protection_domain, because this pointer can now be removed from the struct. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d13a186..fb43cc5 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -231,6 +231,12 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain) +{ + BUG_ON(domain->flags != PD_DMA_OPS_MASK); + return container_of(domain, struct dma_ops_domain, domain); +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -1670,7 +1676,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; - dma_dom->domain.priv = dma_dom; if (!dma_dom->domain.pt_root) goto free_dma_dom; @@ -2367,6 +2372,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, { phys_addr_t paddr = page_to_phys(page) + offset; struct protection_domain *domain; + struct dma_ops_domain *dma_dom; u64 dma_mask; domain = get_domain(dev); @@ -2376,8 +2382,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page, return DMA_ERROR_CODE; dma_mask = *dev->dma_mask; + dma_dom = to_dma_ops_domain(domain); - return __map_single(dev, domain->priv, paddr, size, dir, dma_mask); + return __map_single(dev, dma_dom, paddr, size, dir, dma_mask); } /* @@ -2387,12 +2394,15 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { struct protection_domain *domain; + struct dma_ops_domain *dma_dom; domain = get_domain(dev); if (IS_ERR(domain)) return; - __unmap_single(domain->priv, dma_addr, size, dir); + dma_dom = to_dma_ops_domain(domain); + + __unmap_single(dma_dom, dma_addr, size, dir); } static int sg_num_pages(struct device *dev, @@ -2440,7 +2450,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, if (IS_ERR(domain)) return 0; - dma_dom = domain->priv; + dma_dom = to_dma_ops_domain(domain); dma_mask = *dev->dma_mask; npages = sg_num_pages(dev, sglist, nelems); @@ -2511,6 +2521,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { struct protection_domain *domain; + struct dma_ops_domain *dma_dom; unsigned long startaddr; int npages = 2; @@ -2519,9 +2530,10 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, return; startaddr = sg_dma_address(sglist) & PAGE_MASK; + dma_dom = to_dma_ops_domain(domain); npages = sg_num_pages(dev, sglist, nelems); - __unmap_single(domain->priv, startaddr, npages << PAGE_SHIFT, dir); + __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir); } /* @@ -2533,6 +2545,7 @@ static void *alloc_coherent(struct device *dev, size_t size, { u64 dma_mask = dev->coherent_dma_mask; struct protection_domain *domain; + struct dma_ops_domain *dma_dom; struct page *page; domain = get_domain(dev); @@ -2543,6 +2556,7 @@ static void *alloc_coherent(struct device *dev, size_t size, } else if (IS_ERR(domain)) return NULL; + dma_dom = to_dma_ops_domain(domain); size = PAGE_ALIGN(size); dma_mask = dev->coherent_dma_mask; flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -2562,7 +2576,7 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!dma_mask) dma_mask = *dev->dma_mask; - *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), + *dma_addr = __map_single(dev, dma_dom, page_to_phys(page), size, DMA_BIDIRECTIONAL, dma_mask); if (*dma_addr == DMA_ERROR_CODE) @@ -2586,6 +2600,7 @@ static void free_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { struct protection_domain *domain; + struct dma_ops_domain *dma_dom; struct page *page; page = virt_to_page(virt_addr); @@ -2595,7 +2610,9 @@ static void free_coherent(struct device *dev, size_t size, if (IS_ERR(domain)) goto free_mem; - __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + dma_dom = to_dma_ops_domain(domain); + + __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); free_mem: if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) @@ -2888,7 +2905,7 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) queue_flush_all(); /* Now release the domain */ - dma_dom = domain->priv; + dma_dom = to_dma_ops_domain(domain); dma_ops_domain_free(dma_dom); break; default: @@ -3076,8 +3093,7 @@ static void amd_iommu_apply_dm_region(struct device *dev, struct iommu_domain *domain, struct iommu_dm_region *region) { - struct protection_domain *pdomain = to_pdomain(domain); - struct dma_ops_domain *dma_dom = pdomain->priv; + struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain)); unsigned long start, end; start = IOVA_PFN(region->start); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 590956a..caf5e38 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -421,7 +421,6 @@ struct protection_domain { bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ - void *priv; /* private data */ }; /* -- cgit v0.10.2 From 5c365d18a73d3979db37006eaacefc0008869c0f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 13 Jul 2016 13:53:21 +0000 Subject: iommu/vt-d: Return error code in domain_context_mapping_one() In 'commit <55d940430ab9> ("iommu/vt-d: Get rid of domain->iommu_lock")', the error handling path is changed a little, which makes the function always return 0. This path fixes this. Signed-off-by: Wei Yang Fixes: 55d940430ab9 ('iommu/vt-d: Get rid of domain->iommu_lock') Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 748e5e4..0a97a6d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2076,7 +2076,7 @@ out_unlock: spin_unlock(&iommu->lock); spin_unlock_irqrestore(&device_domain_lock, flags); - return 0; + return ret; } struct domain_context_mapping_data { -- cgit v0.10.2 From 3254de6bf74fe94c197c9f819fe62a3a3c36f073 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jul 2016 15:18:54 +0200 Subject: iommu/amd: Update Alias-DTE in update_device_table() Not doing so might cause IO-Page-Faults when a device uses an alias request-id and the alias-dte is left in a lower page-mode which does not cover the address allocated from the iova-allocator. Fixes: 492667dacc0a ('x86/amd-iommu: Remove amd_iommu_pd_table') Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fb43cc5..9703913 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2239,8 +2239,15 @@ static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; - list_for_each_entry(dev_data, &domain->dev_list, list) + list_for_each_entry(dev_data, &domain->dev_list, list) { set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled); + + if (dev_data->devid == dev_data->alias) + continue; + + /* There is an alias, update device table entry for it */ + set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled); + } } static void update_domain(struct protection_domain *domain) -- cgit v0.10.2 From ffec219770dadd32171e6dd927e1d83d3218529f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jul 2016 15:31:23 +0200 Subject: iommu/amd: Initialize dma-ops domains with 3-level page-table A two-level page-table can map up to 1GB of address space. With the IOVA allocator now in use, the allocated addresses are often more closely to 4G, which requires the address space to be increased much more often. Avoid that by using a three-level page-table by default. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9703913..33c177b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1673,7 +1673,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (protection_domain_init(&dma_dom->domain)) goto free_dma_dom; - dma_dom->domain.mode = PAGE_MODE_2_LEVEL; + dma_dom->domain.mode = PAGE_MODE_3_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; if (!dma_dom->domain.pt_root) -- cgit v0.10.2