summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv/pci-ioda.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:16:16 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:16:16 (GMT)
commit5a148af66932c31814e263366094b5812210b501 (patch)
treec5155ae89d7109533b8b073631bd65a7dd394b9d /arch/powerpc/platforms/powernv/pci-ioda.c
parent99c6bcf46d2233d33e441834e958ed0bc22b190a (diff)
parent54d5999d98f2ab36ad71b9ef4d82cf5f399205f5 (diff)
downloadlinux-fsl-qoriq-5a148af66932c31814e263366094b5812210b501.tar.xz
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc update from Benjamin Herrenschmidt: "The main highlights this time around are: - A pile of addition POWER8 bits and nits, such as updated performance counter support (Michael Ellerman), new branch history buffer support (Anshuman Khandual), base support for the new PCI host bridge when not using the hypervisor (Gavin Shan) and other random related bits and fixes from various contributors. - Some rework of our page table format by Aneesh Kumar which fixes a thing or two and paves the way for THP support. THP itself will not make it this time around however. - More Freescale updates, including Altivec support on the new e6500 cores, new PCI controller support, and a pile of new boards support and updates. - The usual batch of trivial cleanups & fixes" * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits) powerpc: Fix build error for book3e powerpc: Context switch the new EBB SPRs powerpc: Turn on the EBB H/FSCR bits powerpc: Replace CPU_FTR_BCTAR with CPU_FTR_ARCH_207S powerpc: Setup BHRB instructions facility in HFSCR for POWER8 powerpc: Fix interrupt range check on debug exception powerpc: Update tlbie/tlbiel as per ISA doc powerpc: Print page size info during boot powerpc: print both base and actual page size on hash failure powerpc: Fix hpte_decode to use the correct decoding for page sizes powerpc: Decode the pte-lp-encoding bits correctly. powerpc: Use encode avpn where we need only avpn values powerpc: Reduce PTE table memory wastage powerpc: Move the pte free routines from common header powerpc: Reduce the PTE_INDEX_SIZE powerpc: Switch 16GB and 16MB explicit hugepages to a different page table format powerpc: New hugepage directory format powerpc: Don't truncate pgd_index wrongly powerpc: Don't hard code the size of pte page powerpc: Save DAR and DSISR in pt_regs on MCE ...
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c307
1 files changed, 251 insertions, 56 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8e90e89..8c6c9cf 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -26,10 +26,12 @@
#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
#include <asm/ppc-pci.h>
#include <asm/opal.h>
#include <asm/iommu.h>
#include <asm/tce.h>
+#include <asm/xics.h>
#include "powernv.h"
#include "pci.h"
@@ -87,6 +89,7 @@ static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
return IODA_INVALID_PE;
} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
+ phb->ioda.pe_array[pe].phb = phb;
phb->ioda.pe_array[pe].pe_number = pe;
return pe;
}
@@ -431,22 +434,102 @@ static void pnv_pci_ioda_setup_PEs(void)
}
}
-static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *dev)
+static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
{
- /* We delay DMA setup after we have assigned all PE# */
+ struct pci_dn *pdn = pnv_ioda_get_pdn(pdev);
+ struct pnv_ioda_pe *pe;
+
+ /*
+ * The function can be called while the PE#
+ * hasn't been assigned. Do nothing for the
+ * case.
+ */
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ return;
+
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ set_iommu_table_base(&pdev->dev, &pe->tce32_table);
}
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+ u64 *startp, u64 *endp)
{
- struct pci_dev *dev;
+ u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+ unsigned long start, end, inc;
+
+ start = __pa(startp);
+ end = __pa(endp);
+
+ /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
+ if (tbl->it_busno) {
+ start <<= 12;
+ end <<= 12;
+ inc = 128 << 12;
+ start |= tbl->it_busno;
+ end |= tbl->it_busno;
+ } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
+ /* p7ioc-style invalidation, 2 TCEs per write */
+ start |= (1ull << 63);
+ end |= (1ull << 63);
+ inc = 16;
+ } else {
+ /* Default (older HW) */
+ inc = 128;
+ }
- list_for_each_entry(dev, &bus->devices, bus_list) {
- set_iommu_table_base(&dev->dev, &pe->tce32_table);
- if (dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+ end |= inc - 1; /* round up end to be different than start */
+
+ mb(); /* Ensure above stores are visible */
+ while (start <= end) {
+ __raw_writeq(start, invalidate);
+ start += inc;
+ }
+
+ /*
+ * The iommu layer will do another mb() for us on build()
+ * and we don't care on free()
+ */
+}
+
+static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
+ struct iommu_table *tbl,
+ u64 *startp, u64 *endp)
+{
+ unsigned long start, end, inc;
+ u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+
+ /* We'll invalidate DMA address in PE scope */
+ start = 0x2ul << 60;
+ start |= (pe->pe_number & 0xFF);
+ end = start;
+
+ /* Figure out the start, end and step */
+ inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
+ start |= (inc << 12);
+ inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
+ end |= (inc << 12);
+ inc = (0x1ul << 12);
+ mb();
+
+ while (start <= end) {
+ __raw_writeq(start, invalidate);
+ start += inc;
}
}
+void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
+ u64 *startp, u64 *endp)
+{
+ struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+ tce32_table);
+ struct pnv_phb *phb = pe->phb;
+
+ if (phb->type == PNV_PHB_IODA1)
+ pnv_pci_ioda1_tce_invalidate(tbl, startp, endp);
+ else
+ pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp);
+}
+
static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe, unsigned int base,
unsigned int segs)
@@ -518,16 +601,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
*/
tbl->it_busno = 0;
tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
- tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
- | TCE_PCI_SWINV_PAIR;
+ tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
+ TCE_PCI_SWINV_PAIR;
}
iommu_init_table(tbl, phb->hose->node);
- if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
- else
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
-
return;
fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
@@ -537,6 +615,76 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
}
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
+{
+ struct page *tce_mem = NULL;
+ void *addr;
+ const __be64 *swinvp;
+ struct iommu_table *tbl;
+ unsigned int tce_table_size, end;
+ int64_t rc;
+
+ /* We shouldn't already have a 32-bit DMA associated */
+ if (WARN_ON(pe->tce32_seg >= 0))
+ return;
+
+ /* The PE will reserve all possible 32-bits space */
+ pe->tce32_seg = 0;
+ end = (1 << ilog2(phb->ioda.m32_pci_base));
+ tce_table_size = (end / 0x1000) * 8;
+ pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+ end);
+
+ /* Allocate TCE table */
+ tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
+ get_order(tce_table_size));
+ if (!tce_mem) {
+ pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
+ goto fail;
+ }
+ addr = page_address(tce_mem);
+ memset(addr, 0, tce_table_size);
+
+ /*
+ * Map TCE table through TVT. The TVE index is the PE number
+ * shifted by 1 bit for 32-bits DMA space.
+ */
+ rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+ pe->pe_number << 1, 1, __pa(addr),
+ tce_table_size, 0x1000);
+ if (rc) {
+ pe_err(pe, "Failed to configure 32-bit TCE table,"
+ " err %ld\n", rc);
+ goto fail;
+ }
+
+ /* Setup linux iommu table */
+ tbl = &pe->tce32_table;
+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
+
+ /* OPAL variant of PHB3 invalidated TCEs */
+ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+ if (swinvp) {
+ /* We need a couple more fields -- an address and a data
+ * to or. Since the bus is only printed out on table free
+ * errors, and on the first pass the data will be a relative
+ * bus number, print that out instead.
+ */
+ tbl->it_busno = 0;
+ tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
+ tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+ }
+ iommu_init_table(tbl, phb->hose->node);
+
+ return;
+fail:
+ if (pe->tce32_seg >= 0)
+ pe->tce32_seg = -1;
+ if (tce_mem)
+ __free_pages(tce_mem, get_order(tce_table_size));
+}
+
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
{
struct pci_controller *hose = phb->hose;
@@ -579,20 +727,49 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
if (segs > remaining)
segs = remaining;
}
- pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
- pe->dma_weight, segs);
- pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+
+ /*
+ * For IODA2 compliant PHB3, we needn't care about the weight.
+ * The all available 32-bits DMA space will be assigned to
+ * the specific PE.
+ */
+ if (phb->type == PNV_PHB_IODA1) {
+ pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
+ pe->dma_weight, segs);
+ pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+ } else {
+ pe_info(pe, "Assign DMA32 space\n");
+ segs = 0;
+ pnv_pci_ioda2_setup_dma_pe(phb, pe);
+ }
+
remaining -= segs;
base += segs;
}
}
#ifdef CONFIG_PCI_MSI
+static void pnv_ioda2_msi_eoi(struct irq_data *d)
+{
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ struct irq_chip *chip = irq_data_get_irq_chip(d);
+ struct pnv_phb *phb = container_of(chip, struct pnv_phb,
+ ioda.irq_chip);
+ int64_t rc;
+
+ rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+ WARN_ON_ONCE(rc);
+
+ icp_native_eoi(d);
+}
+
static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int is_64,
- struct msi_msg *msg)
+ unsigned int hwirq, unsigned int virq,
+ unsigned int is_64, struct msi_msg *msg)
{
struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
+ struct irq_data *idata;
+ struct irq_chip *ichip;
unsigned int xive_num = hwirq - phb->msi_base;
uint64_t addr64;
uint32_t addr32, data;
@@ -637,6 +814,23 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
}
msg->data = data;
+ /*
+ * Change the IRQ chip for the MSI interrupts on PHB3.
+ * The corresponding IRQ chip should be populated for
+ * the first time.
+ */
+ if (phb->type == PNV_PHB_IODA2) {
+ if (!phb->ioda.irq_chip_init) {
+ idata = irq_get_irq_data(virq);
+ ichip = irq_data_get_irq_chip(idata);
+ phb->ioda.irq_chip_init = 1;
+ phb->ioda.irq_chip = *ichip;
+ phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
+ }
+
+ irq_set_chip(virq, &phb->ioda.irq_chip);
+ }
+
pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
" address=%x_%08x data=%x PE# %d\n",
pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
@@ -647,7 +841,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
{
- unsigned int bmap_size;
+ unsigned int count;
const __be32 *prop = of_get_property(phb->hose->dn,
"ibm,opal-msi-ranges", NULL);
if (!prop) {
@@ -658,18 +852,17 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
return;
phb->msi_base = be32_to_cpup(prop);
- phb->msi_count = be32_to_cpup(prop + 1);
- bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long);
- phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL);
- if (!phb->msi_map) {
+ count = be32_to_cpup(prop + 1);
+ if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
phb->hose->global_number);
return;
}
+
phb->msi_setup = pnv_pci_ioda_msi_setup;
phb->msi32_support = 1;
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
- phb->msi_count, phb->msi_base);
+ count, phb->msi_base);
}
#else
static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
@@ -852,18 +1045,19 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
-void __init pnv_pci_init_ioda1_phb(struct device_node *np)
+void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
{
struct pci_controller *hose;
static int primary = 1;
struct pnv_phb *phb;
unsigned long size, m32map_off, iomap_off, pemap_off;
const u64 *prop64;
+ const u32 *prop32;
u64 phb_id;
void *aux;
long rc;
- pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
+ pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
if (!prop64) {
@@ -890,37 +1084,34 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
hose->last_busno = 0xff;
hose->private_data = phb;
phb->opal_id = phb_id;
- phb->type = PNV_PHB_IODA1;
+ phb->type = ioda_type;
/* Detect specific models for error handling */
if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
phb->model = PNV_PHB_MODEL_P7IOC;
+ else if (of_device_is_compatible(np, "ibm,p8-pciex"))
+ phb->model = PNV_PHB_MODEL_PHB3;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
- /* We parse "ranges" now since we need to deduce the register base
- * from the IO base
- */
+ /* Parse 32-bit and IO ranges (if any) */
pci_process_bridge_OF_ranges(phb->hose, np, primary);
primary = 0;
- /* Magic formula from Milton */
+ /* Get registers */
phb->regs = of_iomap(np, 0);
if (phb->regs == NULL)
pr_err(" Failed to map registers !\n");
-
- /* XXX This is hack-a-thon. This needs to be changed so that:
- * - we obtain stuff like PE# etc... from device-tree
- * - we properly re-allocate M32 ourselves
- * (the OFW one isn't very good)
- */
-
/* Initialize more IODA stuff */
- phb->ioda.total_pe = 128;
+ prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
+ if (!prop32)
+ phb->ioda.total_pe = 1;
+ else
+ phb->ioda.total_pe = *prop32;
phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
- /* OFW Has already off top 64k of M32 space (MSI space) */
+ /* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
@@ -930,7 +1121,10 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
- /* Allocate aux data & arrays */
+ /* Allocate aux data & arrays
+ *
+ * XXX TODO: Don't allocate io segmap on PHB3
+ */
size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
m32map_off = size;
size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
@@ -960,7 +1154,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
hose->mem_resources[2].start = 0;
hose->mem_resources[2].end = 0;
-#if 0
+#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
window_type,
window_num,
@@ -974,16 +1168,6 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
phb->ioda.m32_size, phb->ioda.m32_segsize,
phb->ioda.io_size, phb->ioda.io_segsize);
- if (phb->regs) {
- pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
- pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
- pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
- pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
- pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
- pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
- pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
- pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
- }
phb->hose->ops = &pnv_pci_ops;
/* Setup RID -> PE mapping function */
@@ -1011,7 +1195,18 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
if (rc)
pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
- opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+
+ /*
+ * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
+ * has cleared the RTT which has the same effect
+ */
+ if (ioda_type == PNV_PHB_IODA1)
+ opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+}
+
+void pnv_pci_init_ioda2_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
}
void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1034,6 +1229,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
for_each_child_of_node(np, phbn) {
/* Look for IODA1 PHBs */
if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
- pnv_pci_init_ioda1_phb(phbn);
+ pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
}
}