Merge tag 'powerpc-4.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman: "Highlights: - Support for Power ISA 3.0 (Power9) Radix Tree MMU from Aneesh Kumar K.V - Live patching support for ppc64le (also merged via livepatching.git) Various cleanups & minor fixes from: - Aaro Koskinen, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Chris Smart, Daniel Axtens, Frederic Barrat, Gavin Shan, Ian Munsie, Lennart Sorensen, Madhavan Srinivasan, Mahesh Salgaonkar, Markus Elfring, Michael Ellerman, Oliver O'Halloran, Paul Gortmaker, Paul Mackerras, Rashmica Gupta, Russell Currey, Suraj Jitindar Singh, Thiago Jung Bauermann, Valentin Rothberg, Vipin K Parashar. General: - Update LMB associativity index during DLPAR add/remove from Nathan Fontenot - Fix branching to OOL handlers in relocatable kernel from Hari Bathini - Add support for userspace Power9 copy/paste from Chris Smart - Always use STRICT_MM_TYPECHECKS from Michael Ellerman - Add mask of possible MMU features from Michael Ellerman PCI: - Enable pass through of NVLink to guests from Alexey Kardashevskiy - Cleanups in preparation for powernv PCI hotplug from Gavin Shan - Don't report error in eeh_pe_reset_and_recover() from Gavin Shan - Restore initial state in eeh_pe_reset_and_recover() from Gavin Shan - Revert "powerpc/eeh: Fix crash in eeh_add_device_early() on Cell" from Guilherme G Piccoli - Remove the dependency on EEH struct in DDW mechanism from Guilherme G Piccoli selftests: - Test cp_abort during context switch from Chris Smart - Add several tests for transactional memory support from Rashmica Gupta perf: - Add support for sampling interrupt register state from Anju T - Add support for unwinding perf-stackdump from Chandan Kumar cxl: - Configure the PSL for two CAPI ports on POWER8NVL from Philippe Bergheaud - Allow initialization on timebase sync failures from Frederic Barrat - Increase timeout for detection of AFU mmio hang from Frederic Barrat - Handle num_of_processes larger than can fit in the SPA from Ian Munsie - Ensure PSL interrupt is configured for contexts with no AFU IRQs from Ian Munsie - Add kernel API to allow a context to operate with relocate disabled from Ian Munsie - Check periodically the coherent platform function's state from Christophe Lombard Freescale: - Updates from Scott: "Contains 86xx fixes, minor device tree fixes, an erratum workaround, and a kconfig dependency fix." * tag 'powerpc-4.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (192 commits) powerpc/86xx: Fix PCI interrupt map definition powerpc/86xx: Move pci1 definition to the include file powerpc/fsl: Fix build of the dtb embedded kernel images powerpc/fsl: Fix rcpm compatible string powerpc/fsl: Remove FSL_SOC dependency from FSL_LBC powerpc/fsl-pci: Add a workaround for PCI 5 errata powerpc/fsl: Fix SPI compatible on t208xrdb and t1040rdb powerpc/powernv/npu: Add PE to PHB's list powerpc/powernv: Fix insufficient memory allocation powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism Revert "powerpc/eeh: Fix crash in eeh_add_device_early() on Cell" powerpc/eeh: Drop unnecessary label in eeh_pe_change_owner() powerpc/eeh: Ignore handlers in eeh_pe_reset_and_recover() powerpc/eeh: Restore initial state in eeh_pe_reset_and_recover() powerpc/eeh: Don't report error in eeh_pe_reset_and_recover() Revert "powerpc/powernv: Exclude root bus in pnv_pci_reset_secondary_bus()" powerpc/powernv/npu: Enable NVLink pass through powerpc/powernv/npu: Rework TCE Kill handling powerpc/powernv/npu: Add set/unset window helpers powerpc/powernv/ioda2: Export debug helper pe_level_printk() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-20 17:12:41 (GMT)
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-20 17:12:41 (GMT)
commit: c04a5880299eab3da8c10547db96ea9cdffd44a6 (patch)
tree: 8708b60e410780ce8ea2074335033b016cab4c4f /arch/powerpc/platforms/powernv/npu-dma.c
parent: a1c28b75a95808161cacbb3531c418abe248994e (diff)
parent: 138a076496e61c68ebc1dcccc088705826bbe26d (diff)
download: linux-c04a5880299eab3da8c10547db96ea9cdffd44a6.tar.xz
1 files changed, 148 insertions, 135 deletions
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 7229acd..0459e10 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/pci.h>
 #include <linux/memblock.h>
+#include <linux/iommu.h>
 
 #include <asm/iommu.h>
 #include <asm/pnv-pci.h>
@@ -25,8 +26,6 @@
  * Other types of TCE cache invalidation are not functional in the
  * hardware.
  */
-#define TCE_KILL_INVAL_ALL PPC_BIT(0)
-
 static struct pci_dev *get_pci_dev(struct device_node *dn)
 {
 	return PCI_DN(dn)->pcidev;
@@ -138,22 +137,17 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
 	struct pnv_ioda_pe *pe;
 	struct pci_dn *pdn;
 
-	if (npe->flags & PNV_IODA_PE_PEER) {
-		pe = npe->peers[0];
-		pdev = pe->pdev;
-	} else {
-		pdev = pnv_pci_get_gpu_dev(npe->pdev);
-		if (!pdev)
-			return NULL;
+	pdev = pnv_pci_get_gpu_dev(npe->pdev);
+	if (!pdev)
+		return NULL;
 
-		pdn = pci_get_pdn(pdev);
-		if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
-			return NULL;
+	pdn = pci_get_pdn(pdev);
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return NULL;
 
-		hose = pci_bus_to_host(pdev->bus);
-		phb = hose->private_data;
-		pe = &phb->ioda.pe_array[pdn->pe_number];
-	}
+	hose = pci_bus_to_host(pdev->bus);
+	phb = hose->private_data;
+	pe = &phb->ioda.pe_array[pdn->pe_number];
 
 	if (gpdev)
 		*gpdev = pdev;
@@ -161,92 +155,70 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
 	return pe;
 }
 
-void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
+long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+		struct iommu_table *tbl)
 {
 	struct pnv_phb *phb = npe->phb;
+	int64_t rc;
+	const unsigned long size = tbl->it_indirect_levels ?
+		tbl->it_level_size : tbl->it_size;
+	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+	pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
+			start_addr, start_addr + win_size - 1,
+			IOMMU_PAGE_SIZE(tbl));
+
+	rc = opal_pci_map_pe_dma_window(phb->opal_id,
+			npe->pe_number,
+			npe->pe_number,
+			tbl->it_indirect_levels + 1,
+			__pa(tbl->it_base),
+			size << 3,
+			IOMMU_PAGE_SIZE(tbl));
+	if (rc) {
+		pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
+		return rc;
+	}
+	pnv_pci_ioda2_tce_invalidate_entire(phb, false);
 
-	if (WARN_ON(phb->type != PNV_PHB_NPU ||
-		    !phb->ioda.tce_inval_reg ||
-		    !(npe->flags & PNV_IODA_PE_DEV)))
-		return;
+	/* Add the table to the list so its TCE cache will get invalidated */
+	pnv_pci_link_table_and_group(phb->hose->node, num,
+			tbl, &npe->table_group);
 
-	mb(); /* Ensure previous TCE table stores are visible */
-	__raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
-		phb->ioda.tce_inval_reg);
+	return 0;
 }
 
-void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
-				struct iommu_table *tbl,
-				unsigned long index,
-				unsigned long npages,
-				bool rm)
+long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
 {
 	struct pnv_phb *phb = npe->phb;
+	int64_t rc;
 
-	/* We can only invalidate the whole cache on NPU */
-	unsigned long val = TCE_KILL_INVAL_ALL;
-
-	if (WARN_ON(phb->type != PNV_PHB_NPU ||
-		    !phb->ioda.tce_inval_reg ||
-		    !(npe->flags & PNV_IODA_PE_DEV)))
-		return;
-
-	mb(); /* Ensure previous TCE table stores are visible */
-	if (rm)
-		__raw_rm_writeq(cpu_to_be64(val),
-		  (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
-	else
-		__raw_writeq(cpu_to_be64(val),
-			phb->ioda.tce_inval_reg);
-}
-
-void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
-{
-	struct pnv_ioda_pe *gpe;
-	struct pci_dev *gpdev;
-	int i, avail = -1;
-
-	if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
-		return;
-
-	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
-	if (!gpe)
-		return;
-
-	for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
-		/* Nothing to do if the PE is already connected. */
-		if (gpe->peers[i] == npe)
-			return;
+	pe_info(npe, "Removing DMA window\n");
 
-		if (!gpe->peers[i])
-			avail = i;
+	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+			npe->pe_number,
+			0/* levels */, 0/* table address */,
+			0/* table size */, 0/* page size */);
+	if (rc) {
+		pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
+		return rc;
 	}
+	pnv_pci_ioda2_tce_invalidate_entire(phb, false);
 
-	if (WARN_ON(avail < 0))
-		return;
-
-	gpe->peers[avail] = npe;
-	gpe->flags |= PNV_IODA_PE_PEER;
+	pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
+			&npe->table_group);
 
-	/*
-	 * We assume that the NPU devices only have a single peer PE
-	 * (the GPU PCIe device PE).
-	 */
-	npe->peers[0] = gpe;
-	npe->flags |= PNV_IODA_PE_PEER;
+	return 0;
 }
 
 /*
- * For the NPU we want to point the TCE table at the same table as the
- * real PCI device.
+ * Enables 32 bit DMA on NPU.
  */
-static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
+static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
 {
-	struct pnv_phb *phb = npe->phb;
 	struct pci_dev *gpdev;
 	struct pnv_ioda_pe *gpe;
-	void *addr;
-	unsigned int size;
 	int64_t rc;
 
 	/*
@@ -260,14 +232,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
 	if (!gpe)
 		return;
 
-	addr = (void *)gpe->table_group.tables[0]->it_base;
-	size = gpe->table_group.tables[0]->it_size << 3;
-	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
-					npe->pe_number, 1, __pa(addr),
-					size, 0x1000);
-	if (rc != OPAL_SUCCESS)
-		pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
-			__func__, rc, phb->hose->global_number, npe->pe_number);
+	rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
 
 	/*
 	 * We don't initialise npu_pe->tce32_table as we always use
@@ -277,72 +242,120 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
 }
 
 /*
- * Enable/disable bypass mode on the NPU. The NPU only supports one
+ * Enables bypass mode on the NPU. The NPU only supports one
  * window per link, so bypass needs to be explicitly enabled or
  * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
  * active at the same time.
  */
-int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
+static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
 {
 	struct pnv_phb *phb = npe->phb;
 	int64_t rc = 0;
+	phys_addr_t top = memblock_end_of_DRAM();
 
 	if (phb->type != PNV_PHB_NPU || !npe->pdev)
 		return -EINVAL;
 
-	if (enable) {
-		/* Enable the bypass window */
-		phys_addr_t top = memblock_end_of_DRAM();
-
-		npe->tce_bypass_base = 0;
-		top = roundup_pow_of_two(top);
-		dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
-			 npe->pe_number);
-		rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
-					npe->pe_number, npe->pe_number,
-					npe->tce_bypass_base, top);
-	} else {
-		/*
-		 * Disable the bypass window by replacing it with the
-		 * TCE32 window.
-		 */
-		pnv_npu_disable_bypass(npe);
-	}
+	rc = pnv_npu_unset_window(npe, 0);
+	if (rc != OPAL_SUCCESS)
+		return rc;
+
+	/* Enable the bypass window */
+
+	top = roundup_pow_of_two(top);
+	dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+			npe->pe_number);
+	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+			npe->pe_number, npe->pe_number,
+			0 /* bypass base */, top);
+
+	if (rc == OPAL_SUCCESS)
+		pnv_pci_ioda2_tce_invalidate_entire(phb, false);
 
 	return rc;
 }
 
-int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
 {
-	struct pci_controller *hose = pci_bus_to_host(npdev->bus);
-	struct pnv_phb *phb = hose->private_data;
-	struct pci_dn *pdn = pci_get_pdn(npdev);
-	struct pnv_ioda_pe *npe, *gpe;
-	struct pci_dev *gpdev;
-	uint64_t top;
-	bool bypass = false;
+	int i;
+	struct pnv_phb *phb;
+	struct pci_dn *pdn;
+	struct pnv_ioda_pe *npe;
+	struct pci_dev *npdev;
 
-	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
-		return -ENXIO;
+	for (i = 0; ; ++i) {
+		npdev = pnv_pci_get_npu_dev(gpdev, i);
 
-	/* We only do bypass if it's enabled on the linked device */
-	npe = &phb->ioda.pe_array[pdn->pe_number];
-	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
-	if (!gpe)
-		return -ENODEV;
+		if (!npdev)
+			break;
+
+		pdn = pci_get_pdn(npdev);
+		if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+			return;
+
+		phb = pci_bus_to_host(npdev->bus)->private_data;
+
+		/* We only do bypass if it's enabled on the linked device */
+		npe = &phb->ioda.pe_array[pdn->pe_number];
+
+		if (bypass) {
+			dev_info(&npdev->dev,
+					"Using 64-bit DMA iommu bypass\n");
+			pnv_npu_dma_set_bypass(npe);
+		} else {
+			dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+			pnv_npu_dma_set_32(npe);
+		}
+	}
+}
 
-	if (gpe->tce_bypass_enabled) {
-		top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
-		bypass = (dma_mask >= top);
+/* Switch ownership from platform code to external user (e.g. VFIO) */
+void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+{
+	struct pnv_phb *phb = npe->phb;
+	int64_t rc;
+
+	/*
+	 * Note: NPU has just a single TVE in the hardware which means that
+	 * while used by the kernel, it can have either 32bit window or
+	 * DMA bypass but never both. So we deconfigure 32bit window only
+	 * if it was enabled at the moment of ownership change.
+	 */
+	if (npe->table_group.tables[0]) {
+		pnv_npu_unset_window(npe, 0);
+		return;
 	}
 
-	if (bypass)
-		dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
-	else
-		dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+	/* Disable bypass */
+	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+			npe->pe_number, npe->pe_number,
+			0 /* bypass base */, 0);
+	if (rc) {
+		pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
+		return;
+	}
+	pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+}
 
-	pnv_npu_dma_set_bypass(npe, bypass);
-	*npdev->dev.dma_mask = dma_mask;
+struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
+{
+	struct pnv_phb *phb = npe->phb;
+	struct pci_bus *pbus = phb->hose->bus;
+	struct pci_dev *npdev, *gpdev = NULL, *gptmp;
+	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
 
-	return 0;
+	if (!gpe || !gpdev)
+		return NULL;
+
+	list_for_each_entry(npdev, &pbus->devices, bus_list) {
+		gptmp = pnv_pci_get_gpu_dev(npdev);
+
+		if (gptmp != gpdev)
+			continue;
+
+		pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
+		iommu_group_add_device(gpe->table_group.group, &npdev->dev);
+	}
+
+	return gpe;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-20 17:12:41 (GMT)
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-20 17:12:41 (GMT)
commit	c04a5880299eab3da8c10547db96ea9cdffd44a6 (patch)
tree	8708b60e410780ce8ea2074335033b016cab4c4f /arch/powerpc/platforms/powernv/npu-dma.c
parent	a1c28b75a95808161cacbb3531c418abe248994e (diff)
parent	138a076496e61c68ebc1dcccc088705826bbe26d (diff)
download	linux-c04a5880299eab3da8c10547db96ea9cdffd44a6.tar.xz