summaryrefslogtreecommitdiff
path: root/drivers/irqchip/irq-gic-v3-its.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/irqchip/irq-gic-v3-its.c')
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c212
1 files changed, 175 insertions, 37 deletions
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d8996bd..9687f8a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -169,7 +169,7 @@ static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
{
- cmd->raw_cmd[0] &= ~(0xffffUL << 32);
+ cmd->raw_cmd[0] &= BIT_ULL(32) - 1;
cmd->raw_cmd[0] |= ((u64)devid) << 32;
}
@@ -416,13 +416,14 @@ static void its_send_single_command(struct its_node *its,
{
struct its_cmd_block *cmd, *sync_cmd, *next_cmd;
struct its_collection *sync_col;
+ unsigned long flags;
- raw_spin_lock(&its->lock);
+ raw_spin_lock_irqsave(&its->lock, flags);
cmd = its_allocate_entry(its);
if (!cmd) { /* We're soooooo screewed... */
pr_err_ratelimited("ITS can't allocate, dropping command\n");
- raw_spin_unlock(&its->lock);
+ raw_spin_unlock_irqrestore(&its->lock, flags);
return;
}
sync_col = builder(cmd, desc);
@@ -442,7 +443,7 @@ static void its_send_single_command(struct its_node *its,
post:
next_cmd = its_post_commands(its);
- raw_spin_unlock(&its->lock);
+ raw_spin_unlock_irqrestore(&its->lock, flags);
its_wait_for_range_completion(its, cmd, next_cmd);
}
@@ -799,21 +800,44 @@ static int its_alloc_tables(struct its_node *its)
{
int err;
int i;
- int psz = PAGE_SIZE;
+ int psz = SZ_64K;
u64 shr = GITS_BASER_InnerShareable;
+ u64 cache = GITS_BASER_WaWb;
for (i = 0; i < GITS_BASER_NR_REGS; i++) {
u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
u64 type = GITS_BASER_TYPE(val);
u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
+ int order = get_order(psz);
+ int alloc_size;
u64 tmp;
void *base;
if (type == GITS_BASER_TYPE_NONE)
continue;
- /* We're lazy and only allocate a single page for now */
- base = (void *)get_zeroed_page(GFP_KERNEL);
+ /*
+ * Allocate as many entries as required to fit the
+ * range of device IDs that the ITS can grok... The ID
+ * space being incredibly sparse, this results in a
+ * massive waste of memory.
+ *
+ * For other tables, only allocate a single page.
+ */
+ if (type == GITS_BASER_TYPE_DEVICE) {
+ u64 typer = readq_relaxed(its->base + GITS_TYPER);
+ u32 ids = GITS_TYPER_DEVBITS(typer);
+
+ order = get_order((1UL << ids) * entry_size);
+ if (order >= MAX_ORDER) {
+ order = MAX_ORDER - 1;
+ pr_warn("%s: Device Table too large, reduce its page order to %u\n",
+ its->msi_chip.of_node->full_name, order);
+ }
+ }
+
+ alloc_size = (1 << order) * PAGE_SIZE;
+ base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!base) {
err = -ENOMEM;
goto out_free;
@@ -825,7 +849,7 @@ retry_baser:
val = (virt_to_phys(base) |
(type << GITS_BASER_TYPE_SHIFT) |
((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
- GITS_BASER_WaWb |
+ cache |
shr |
GITS_BASER_VALID);
@@ -841,7 +865,7 @@ retry_baser:
break;
}
- val |= (PAGE_SIZE / psz) - 1;
+ val |= (alloc_size / psz) - 1;
writeq_relaxed(val, its->base + GITS_BASER + i * 8);
tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -851,9 +875,12 @@ retry_baser:
* Shareability didn't stick. Just use
* whatever the read reported, which is likely
* to be the only thing this redistributor
- * supports.
+ * supports. If that's zero, make it
+ * non-cacheable as well.
*/
shr = tmp & GITS_BASER_SHAREABILITY_MASK;
+ if (!shr)
+ cache = GITS_BASER_nC;
goto retry_baser;
}
@@ -882,7 +909,7 @@ retry_baser:
}
pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
- (int)(PAGE_SIZE / entry_size),
+ (int)(alloc_size / entry_size),
its_base_type_string[type],
(unsigned long)virt_to_phys(base),
psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
@@ -957,16 +984,39 @@ static void its_cpu_init_lpis(void)
tmp = readq_relaxed(rbase + GICR_PROPBASER);
if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
+ if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) {
+ /*
+ * The HW reports non-shareable, we must
+ * remove the cacheability attributes as
+ * well.
+ */
+ val &= ~(GICR_PROPBASER_SHAREABILITY_MASK |
+ GICR_PROPBASER_CACHEABILITY_MASK);
+ val |= GICR_PROPBASER_nC;
+ writeq_relaxed(val, rbase + GICR_PROPBASER);
+ }
pr_info_once("GIC: using cache flushing for LPI property table\n");
gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING;
}
/* set PENDBASE */
val = (page_to_phys(pend_page) |
- GICR_PROPBASER_InnerShareable |
- GICR_PROPBASER_WaWb);
+ GICR_PENDBASER_InnerShareable |
+ GICR_PENDBASER_WaWb);
writeq_relaxed(val, rbase + GICR_PENDBASER);
+ tmp = readq_relaxed(rbase + GICR_PENDBASER);
+
+ if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
+ /*
+ * The HW reports non-shareable, we must remove the
+ * cacheability attributes as well.
+ */
+ val &= ~(GICR_PENDBASER_SHAREABILITY_MASK |
+ GICR_PENDBASER_CACHEABILITY_MASK);
+ val |= GICR_PENDBASER_nC;
+ writeq_relaxed(val, rbase + GICR_PENDBASER);
+ }
/* Enable LPIs */
val = readl_relaxed(rbase + GICR_CTLR);
@@ -1003,7 +1053,7 @@ static void its_cpu_init_collection(void)
* This ITS wants a linear CPU number.
*/
target = readq_relaxed(gic_data_rdist_rd_base() + GICR_TYPER);
- target = GICR_TYPER_CPU_NUMBER(target);
+ target = GICR_TYPER_CPU_NUMBER(target) << 16;
}
/* Perform collection mapping */
@@ -1020,8 +1070,9 @@ static void its_cpu_init_collection(void)
static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
{
struct its_device *its_dev = NULL, *tmp;
+ unsigned long flags;
- raw_spin_lock(&its->lock);
+ raw_spin_lock_irqsave(&its->lock, flags);
list_for_each_entry(tmp, &its->its_device_list, entry) {
if (tmp->device_id == dev_id) {
@@ -1030,7 +1081,7 @@ static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
}
}
- raw_spin_unlock(&its->lock);
+ raw_spin_unlock_irqrestore(&its->lock, flags);
return its_dev;
}
@@ -1040,6 +1091,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
{
struct its_device *dev;
unsigned long *lpi_map;
+ unsigned long flags;
void *itt;
int lpi_base;
int nr_lpis;
@@ -1056,7 +1108,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
nr_ites = max(2UL, roundup_pow_of_two(nvecs));
sz = nr_ites * its->ite_size;
sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
- itt = kmalloc(sz, GFP_KERNEL);
+ itt = kzalloc(sz, GFP_KERNEL);
lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis);
if (!dev || !itt || !lpi_map) {
@@ -1075,9 +1127,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
dev->device_id = dev_id;
INIT_LIST_HEAD(&dev->entry);
- raw_spin_lock(&its->lock);
+ raw_spin_lock_irqsave(&its->lock, flags);
list_add(&dev->entry, &its->its_device_list);
- raw_spin_unlock(&its->lock);
+ raw_spin_unlock_irqrestore(&its->lock, flags);
/* Bind the device to the first possible CPU */
cpu = cpumask_first(cpu_online_mask);
@@ -1091,9 +1143,11 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
static void its_free_device(struct its_device *its_dev)
{
- raw_spin_lock(&its_dev->its->lock);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&its_dev->its->lock, flags);
list_del(&its_dev->entry);
- raw_spin_unlock(&its_dev->its->lock);
+ raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
kfree(its_dev->itt);
kfree(its_dev);
}
@@ -1112,31 +1166,69 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
return 0;
}
+struct its_pci_alias {
+ struct pci_dev *pdev;
+ u32 dev_id;
+ u32 count;
+};
+
+static int its_pci_msi_vec_count(struct pci_dev *pdev)
+{
+ int msi, msix;
+
+ msi = max(pci_msi_vec_count(pdev), 0);
+ msix = max(pci_msix_vec_count(pdev), 0);
+
+ return max(msi, msix);
+}
+
+static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct its_pci_alias *dev_alias = data;
+
+ dev_alias->dev_id = alias;
+ if (pdev != dev_alias->pdev)
+ dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev);
+
+ return 0;
+}
+
static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
int nvec, msi_alloc_info_t *info)
{
struct pci_dev *pdev;
struct its_node *its;
- u32 dev_id;
struct its_device *its_dev;
+ struct its_pci_alias dev_alias;
if (!dev_is_pci(dev))
return -EINVAL;
pdev = to_pci_dev(dev);
- dev_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
+ dev_alias.pdev = pdev;
+ dev_alias.count = nvec;
+
+ pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias);
its = domain->parent->host_data;
- its_dev = its_find_device(its, dev_id);
- if (WARN_ON(its_dev))
- return -EINVAL;
+ its_dev = its_find_device(its, dev_alias.dev_id);
+ if (its_dev) {
+ /*
+ * We already have seen this ID, probably through
+ * another alias (PCI bridge of some sort). No need to
+ * create the device.
+ */
+ dev_dbg(dev, "Reusing ITT for devID %x\n", dev_alias.dev_id);
+ goto out;
+ }
- its_dev = its_create_device(its, dev_id, nvec);
+ its_dev = its_create_device(its, dev_alias.dev_id, dev_alias.count);
if (!its_dev)
return -ENOMEM;
- dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n", nvec, ilog2(nvec));
-
+ dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n",
+ dev_alias.count, ilog2(dev_alias.count));
+out:
info->scratchpad[0].ptr = its_dev;
info->scratchpad[1].ptr = dev;
return 0;
@@ -1255,6 +1347,34 @@ static const struct irq_domain_ops its_domain_ops = {
.deactivate = its_irq_domain_deactivate,
};
+static int its_force_quiescent(void __iomem *base)
+{
+ u32 count = 1000000; /* 1s */
+ u32 val;
+
+ val = readl_relaxed(base + GITS_CTLR);
+ if (val & GITS_CTLR_QUIESCENT)
+ return 0;
+
+ /* Disable the generation of all interrupts to this ITS */
+ val &= ~GITS_CTLR_ENABLE;
+ writel_relaxed(val, base + GITS_CTLR);
+
+ /* Poll GITS_CTLR and wait until ITS becomes quiescent */
+ while (1) {
+ val = readl_relaxed(base + GITS_CTLR);
+ if (val & GITS_CTLR_QUIESCENT)
+ return 0;
+
+ count--;
+ if (!count)
+ return -EBUSY;
+
+ cpu_relax();
+ udelay(1);
+ }
+}
+
static int its_probe(struct device_node *node, struct irq_domain *parent)
{
struct resource res;
@@ -1283,6 +1403,13 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
goto out_unmap;
}
+ err = its_force_quiescent(its_base);
+ if (err) {
+ pr_warn("%s: failed to quiesce, giving up\n",
+ node->full_name);
+ goto out_unmap;
+ }
+
pr_info("ITS: %s\n", node->full_name);
its = kzalloc(sizeof(*its), GFP_KERNEL);
@@ -1322,14 +1449,26 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
writeq_relaxed(baser, its->base + GITS_CBASER);
tmp = readq_relaxed(its->base + GITS_CBASER);
- writeq_relaxed(0, its->base + GITS_CWRITER);
- writel_relaxed(1, its->base + GITS_CTLR);
- if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) {
+ if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
+ if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) {
+ /*
+ * The HW reports non-shareable, we must
+ * remove the cacheability attributes as
+ * well.
+ */
+ baser &= ~(GITS_CBASER_SHAREABILITY_MASK |
+ GITS_CBASER_CACHEABILITY_MASK);
+ baser |= GITS_CBASER_nC;
+ writeq_relaxed(baser, its->base + GITS_CBASER);
+ }
pr_info("ITS: using cache flushing for cmd queue\n");
its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING;
}
+ writeq_relaxed(0, its->base + GITS_CWRITER);
+ writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
+
if (of_property_read_bool(its->msi_chip.of_node, "msi-controller")) {
its->domain = irq_domain_add_tree(NULL, &its_domain_ops, its);
if (!its->domain) {
@@ -1382,12 +1521,11 @@ static bool gic_rdists_supports_plpis(void)
int its_cpu_init(void)
{
- if (!gic_rdists_supports_plpis()) {
- pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
- return -ENXIO;
- }
-
if (!list_empty(&its_nodes)) {
+ if (!gic_rdists_supports_plpis()) {
+ pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
+ return -ENXIO;
+ }
its_cpu_init_lpis();
its_cpu_init_collection();
}