From 729de41baf63e2172b9d61de61bbd53f231095ca Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@opensource.altera.com>
Date: Fri, 10 Oct 2014 10:21:14 -0500
Subject: reset: add reset_control_status helper function

There are cases where a system will want to read a reset status bit before
doing any other toggling. Add a reset_control_status helper function to the
reset controller API.

Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index baeaf82..7955e00 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -126,6 +126,21 @@ int reset_control_deassert(struct reset_control *rstc)
 EXPORT_SYMBOL_GPL(reset_control_deassert);
 
 /**
+ * reset_control_status - returns a negative errno if not supported, a
+ * positive value if the reset line is asserted, or zero if the reset
+ * line is not asserted.
+ * @rstc: reset controller
+ */
+int reset_control_status(struct reset_control *rstc)
+{
+	if (rstc->rcdev->ops->status)
+		return rstc->rcdev->ops->status(rstc->rcdev, rstc->id);
+
+	return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(reset_control_status);
+
+/**
  * of_reset_control_get - Lookup and obtain a reference to a reset controller.
  * @node: device to be reset by the controller
  * @id: reset line name
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index 41a4695f..ce6b962 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -12,11 +12,13 @@ struct reset_controller_dev;
  *         things to reset the device
  * @assert: manually assert the reset line, if supported
  * @deassert: manually deassert the reset line, if supported
+ * @status: return the status of the reset line, if supported
  */
 struct reset_control_ops {
 	int (*reset)(struct reset_controller_dev *rcdev, unsigned long id);
 	int (*assert)(struct reset_controller_dev *rcdev, unsigned long id);
 	int (*deassert)(struct reset_controller_dev *rcdev, unsigned long id);
+	int (*status)(struct reset_controller_dev *rcdev, unsigned long id);
 };
 
 struct module;
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 349f150..da5602b 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -10,6 +10,7 @@ struct reset_control;
 int reset_control_reset(struct reset_control *rstc);
 int reset_control_assert(struct reset_control *rstc);
 int reset_control_deassert(struct reset_control *rstc);
+int reset_control_status(struct reset_control *rstc);
 
 struct reset_control *reset_control_get(struct device *dev, const char *id);
 void reset_control_put(struct reset_control *rstc);
@@ -57,6 +58,12 @@ static inline int reset_control_deassert(struct reset_control *rstc)
 	return 0;
 }
 
+static inline int reset_control_status(struct reset_control *rstc)
+{
+	WARN_ON(1);
+	return 0;
+}
+
 static inline void reset_control_put(struct reset_control *rstc)
 {
 	WARN_ON(1);
-- 
cgit v0.10.2


From bd13251f71fc86f06b344810835bc4e5e77edef7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antoine=20T=C3=A9nart?= <antoine.tenart@free-electrons.com>
Date: Wed, 3 Sep 2014 09:48:20 +0200
Subject: reset: add the Berlin reset controller driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a reset controller for Marvell Berlin SoCs which is used by the
USB PHYs drivers (for now).

Signed-off-by: Antoine Ténart <antoine.tenart@free-electrons.com>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>

diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile
index 60fed3d..157d421 100644
--- a/drivers/reset/Makefile
+++ b/drivers/reset/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_RESET_CONTROLLER) += core.o
 obj-$(CONFIG_ARCH_SOCFPGA) += reset-socfpga.o
+obj-$(CONFIG_ARCH_BERLIN) += reset-berlin.o
 obj-$(CONFIG_ARCH_SUNXI) += reset-sunxi.o
 obj-$(CONFIG_ARCH_STI) += sti/
diff --git a/drivers/reset/reset-berlin.c b/drivers/reset/reset-berlin.c
new file mode 100644
index 0000000..f8b48a1
--- /dev/null
+++ b/drivers/reset/reset-berlin.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2014 Marvell Technology Group Ltd.
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/reset-controller.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define BERLIN_MAX_RESETS	32
+
+#define to_berlin_reset_priv(p)		\
+	container_of((p), struct berlin_reset_priv, rcdev)
+
+struct berlin_reset_priv {
+	void __iomem			*base;
+	unsigned int			size;
+	struct reset_controller_dev	rcdev;
+};
+
+static int berlin_reset_reset(struct reset_controller_dev *rcdev,
+			      unsigned long id)
+{
+	struct berlin_reset_priv *priv = to_berlin_reset_priv(rcdev);
+	int offset = id >> 8;
+	int mask = BIT(id & 0x1f);
+
+	writel(mask, priv->base + offset);
+
+	/* let the reset be effective */
+	udelay(10);
+
+	return 0;
+}
+
+static struct reset_control_ops berlin_reset_ops = {
+	.reset	= berlin_reset_reset,
+};
+
+static int berlin_reset_xlate(struct reset_controller_dev *rcdev,
+			      const struct of_phandle_args *reset_spec)
+{
+	struct berlin_reset_priv *priv = to_berlin_reset_priv(rcdev);
+	unsigned offset, bit;
+
+	if (WARN_ON(reset_spec->args_count != rcdev->of_reset_n_cells))
+		return -EINVAL;
+
+	offset = reset_spec->args[0];
+	bit = reset_spec->args[1];
+
+	if (offset >= priv->size)
+		return -EINVAL;
+
+	if (bit >= BERLIN_MAX_RESETS)
+		return -EINVAL;
+
+	return (offset << 8) | bit;
+}
+
+static int __berlin_reset_init(struct device_node *np)
+{
+	struct berlin_reset_priv *priv;
+	struct resource res;
+	resource_size_t size;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		goto err;
+
+	size = resource_size(&res);
+	priv->base = ioremap(res.start, size);
+	if (!priv->base) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	priv->size = size;
+
+	priv->rcdev.owner = THIS_MODULE;
+	priv->rcdev.ops = &berlin_reset_ops;
+	priv->rcdev.of_node = np;
+	priv->rcdev.of_reset_n_cells = 2;
+	priv->rcdev.of_xlate = berlin_reset_xlate;
+
+	reset_controller_register(&priv->rcdev);
+
+	return 0;
+
+err:
+	kfree(priv);
+	return ret;
+}
+
+static const struct of_device_id berlin_reset_of_match[] __initconst = {
+	{ .compatible = "marvell,berlin2-chip-ctrl" },
+	{ .compatible = "marvell,berlin2cd-chip-ctrl" },
+	{ .compatible = "marvell,berlin2q-chip-ctrl" },
+	{ },
+};
+
+static int __init berlin_reset_init(void)
+{
+	struct device_node *np;
+	int ret;
+
+	for_each_matching_node(np, berlin_reset_of_match) {
+		ret = __berlin_reset_init(np);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+arch_initcall(berlin_reset_init);
-- 
cgit v0.10.2


From 42813295dfa3689c73e93726e7ebbbdb466dd246 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 27 Oct 2014 09:37:35 -0700
Subject: soc: ti: Use list_first_entry_or_null() at appropriate places

Use list_first_entry_or_null() for first_region() and first_queue_range().

list_first_entry() expects the list is not empty, so first_region() and
first_queue_range() never return NULL.
Thus use list_first_entry_or_null() instead.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Santosh Shilimkar <ssantosh@kernel.org>

diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
index bc9dcc8..51da234 100644
--- a/drivers/soc/ti/knav_qmss.h
+++ b/drivers/soc/ti/knav_qmss.h
@@ -348,15 +348,15 @@ struct knav_range_info {
 	list_for_each_entry(region, &kdev->regions, list)
 
 #define first_region(kdev)					\
-	list_first_entry(&kdev->regions, \
-			struct knav_region, list)
+	list_first_entry_or_null(&kdev->regions, \
+				 struct knav_region, list)
 
 #define for_each_queue_range(kdev, range)			\
 	list_for_each_entry(range, &kdev->queue_ranges, list)
 
 #define first_queue_range(kdev)					\
-	list_first_entry(&kdev->queue_ranges, \
-			struct knav_range_info, list)
+	list_first_entry_or_null(&kdev->queue_ranges, \
+				 struct knav_range_info, list)
 
 #define for_each_pool(kdev, pool)				\
 	list_for_each_entry(pool, &kdev->pools, list)
-- 
cgit v0.10.2


From ea6d4c07ca872b875b31e9a9bc99c21359c0f19c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 27 Oct 2014 09:37:35 -0700
Subject: soc: ti: knav_qmss_queue: Fix unbalanced locking ins
 knav_pool_create()

Don't call mutex_unlock() in the error patch if the mutex_lock() is not called.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Santosh Shilimkar <ssantosh@kernel.org>

diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index 0a2c863..d66aaf2 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -785,7 +785,7 @@ void *knav_pool_create(const char *name,
 		dev_err(kdev->dev, "out of descs in region(%d) for pool(%s)\n",
 			region_id, name);
 		ret = -ENOMEM;
-		goto err;
+		goto err_unlock;
 	}
 
 	/* Region maintains a sorted (by region offset) list of pools
@@ -815,15 +815,16 @@ void *knav_pool_create(const char *name,
 		dev_err(kdev->dev, "pool(%s) create failed: fragmented desc pool in region(%d)\n",
 			name, region_id);
 		ret = -ENOMEM;
-		goto err;
+		goto err_unlock;
 	}
 
 	mutex_unlock(&knav_dev_lock);
 	kdesc_fill_pool(pool);
 	return pool;
 
-err:
+err_unlock:
 	mutex_unlock(&knav_dev_lock);
+err:
 	kfree(pool->name);
 	devm_kfree(kdev->dev, pool);
 	return ERR_PTR(ret);
-- 
cgit v0.10.2


From 39179cb5b789e0c0275ccb8fa83cfaf9257d1474 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 27 Oct 2014 09:37:35 -0700
Subject: soc: ti: knav_qmss_queue: Return proper error if devm_kzalloc fails

Return -ENOMEM if devm_kzalloc fails.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Santosh Shilimkar <ssantosh@kernel.org>

diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index d66aaf2..6f22d56 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1640,7 +1640,7 @@ static int knav_queue_init_queues(struct knav_device *kdev)
 	size = (1 << kdev->inst_shift) * kdev->num_queues_in_use;
 	kdev->instances = devm_kzalloc(kdev->dev, size, GFP_KERNEL);
 	if (!kdev->instances)
-		return -1;
+		return -ENOMEM;
 
 	for_each_queue_range(kdev, range) {
 		if (range->ops && range->ops->init_range)
-- 
cgit v0.10.2


From c6f85cb4305bd80658d19f7b097a7c36ef9912e2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Jun 2014 12:20:21 +0100
Subject: bus: cci: move away from arm_pmu framework

The ARM CPU PMUs and the ARM CCI PMU are using the same framework
despite being substantially different in programming model, which makes
it difficult to handle either particularly well.

This patch migrates the ARM CCI PMU driver away from the arm_pmu
framework, matching the style of the CCN PMU driver and other 'uncore'
PMU drivers. This will enable refactoring of the arm_pmu framework to
better support CPU PMUs. Event context migration on hotplug is not yet
added due to a race on event->ctx in the core perf code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
[will: fix whitespace issues]
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 7af78df..860da40 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -16,17 +16,17 @@
 
 #include <linux/arm-cci.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <asm/cacheflush.h>
-#include <asm/irq_regs.h>
-#include <asm/pmu.h>
 #include <asm/smp_plat.h>
 
 #define DRIVER_NAME		"CCI-400"
@@ -98,6 +98,8 @@ static unsigned long cci_ctrl_phys;
 
 #define CCI_PMU_CNTR_BASE(idx)	((idx) * SZ_4K)
 
+#define CCI_PMU_CNTR_MASK	((1ULL << 32) -1)
+
 /*
  * Instead of an event id to monitor CCI cycles, a dedicated counter is
  * provided. Use 0xff to represent CCI cycles and hope that no future revisions
@@ -170,18 +172,29 @@ static char *const pmu_names[] = {
 	[CCI_REV_R1] = "CCI_400_r1",
 };
 
-struct cci_pmu_drv_data {
+struct cci_pmu_hw_events {
+	struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
+	unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
+	raw_spinlock_t pmu_lock;
+};
+
+struct cci_pmu {
 	void __iomem *base;
-	struct arm_pmu *cci_pmu;
+	struct pmu pmu;
 	int nr_irqs;
 	int irqs[CCI_PMU_MAX_HW_EVENTS];
 	unsigned long active_irqs;
-	struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
-	unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
 	struct pmu_port_event_ranges *port_ranges;
-	struct pmu_hw_events hw_events;
+	struct cci_pmu_hw_events hw_events;
+	struct platform_device *plat_device;
+	int num_events;
+	atomic_t active_events;
+	struct mutex reserve_mutex;
+	cpumask_t cpus;
 };
-static struct cci_pmu_drv_data *pmu;
+static struct cci_pmu *pmu;
+
+#define to_cci_pmu(c)	(container_of(c, struct cci_pmu, pmu))
 
 static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
 {
@@ -252,7 +265,7 @@ static int pmu_validate_hw_event(u8 hw_event)
 	return -ENOENT;
 }
 
-static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx)
+static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
 {
 	return CCI_PMU_CYCLE_CNTR_IDX <= idx &&
 		idx <= CCI_PMU_CNTR_LAST(cci_pmu);
@@ -293,14 +306,9 @@ static u32 pmu_get_max_counters(void)
 	return n_cnts + 1;
 }
 
-static struct pmu_hw_events *pmu_get_hw_events(void)
-{
-	return &pmu->hw_events;
-}
-
-static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
+static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
 {
-	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
 	struct hw_perf_event *hw_event = &event->hw;
 	unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK;
 	int idx;
@@ -336,7 +344,7 @@ static int pmu_map_event(struct perf_event *event)
 	return mapping;
 }
 
-static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
+static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
 {
 	int i;
 	struct platform_device *pmu_device = cci_pmu->plat_device;
@@ -371,17 +379,91 @@ static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
 	return 0;
 }
 
+static void pmu_free_irq(struct cci_pmu *cci_pmu)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_irqs; i++) {
+		if (!test_and_clear_bit(i, &pmu->active_irqs))
+			continue;
+
+		free_irq(pmu->irqs[i], cci_pmu);
+	}
+}
+
+static u32 pmu_read_counter(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+	u32 value;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return 0;
+	}
+	value = pmu_read_register(idx, CCI_PMU_CNTR);
+
+	return value;
+}
+
+static void pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+	else
+		pmu_write_register(value, idx, CCI_PMU_CNTR);
+}
+
+static u64 pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+		 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK;
+
+	local64_add(delta, &event->count);
+
+	return new_raw_count;
+}
+
+static void pmu_read(struct perf_event *event)
+{
+	pmu_event_update(event);
+}
+
+void pmu_event_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	/*
+	 * The CCI PMU counters have a period of 2^32. To account for the
+	 * possiblity of extreme interrupt latency we program for a period of
+	 * half that. Hopefully we can handle the interrupt before another 2^31
+	 * events occur and the counter overtakes its previous value.
+	 */
+	u64 val = 1ULL << 31;
+	local64_set(&hwc->prev_count, val);
+	pmu_write_counter(event, val);
+}
+
 static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long flags;
-	struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
-	struct perf_sample_data data;
-	struct pt_regs *regs;
+	struct cci_pmu *cci_pmu = dev;
+	struct cci_pmu_hw_events *events = &pmu->hw_events;
 	int idx, handled = IRQ_NONE;
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
-	regs = get_irq_regs();
 	/*
 	 * Iterate over counters and update the corresponding perf events.
 	 * This should work regardless of whether we have per-counter overflow
@@ -403,154 +485,407 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 
 		pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW);
 
+		pmu_event_update(event);
+		pmu_event_set_period(event);
 		handled = IRQ_HANDLED;
-
-		armpmu_event_update(event);
-		perf_sample_data_init(&data, 0, hw_counter->last_period);
-		if (!armpmu_event_set_period(event))
-			continue;
-
-		if (perf_event_overflow(event, &data, regs))
-			cci_pmu->disable(event);
 	}
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 
 	return IRQ_RETVAL(handled);
 }
 
-static void pmu_free_irq(struct arm_pmu *cci_pmu)
+static int cci_pmu_get_hw(struct cci_pmu *cci_pmu)
 {
-	int i;
+	int ret = pmu_request_irq(cci_pmu, pmu_handle_irq);
+	if (ret) {
+		pmu_free_irq(cci_pmu);
+		return ret;
+	}
+	return 0;
+}
 
-	for (i = 0; i < pmu->nr_irqs; i++) {
-		if (!test_and_clear_bit(i, &pmu->active_irqs))
-			continue;
+static void cci_pmu_put_hw(struct cci_pmu *cci_pmu)
+{
+	pmu_free_irq(cci_pmu);
+}
 
-		free_irq(pmu->irqs[i], cci_pmu);
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	atomic_t *active_events = &cci_pmu->active_events;
+	struct mutex *reserve_mutex = &cci_pmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) {
+		cci_pmu_put_hw(cci_pmu);
+		mutex_unlock(reserve_mutex);
 	}
 }
 
-static void pmu_enable_event(struct perf_event *event)
+static void cci_pmu_enable(struct pmu *pmu)
 {
+	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events);
 	unsigned long flags;
-	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
-	struct hw_perf_event *hw_counter = &event->hw;
-	int idx = hw_counter->idx;
+	u32 val;
+
+	if (!enabled)
+		return;
+
+	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+
+	/* Enable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
+
+}
+
+static void cci_pmu_disable(struct pmu *pmu)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+
+	/* Disable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
+}
+
+static void cci_pmu_start(struct perf_event *event, int pmu_flags)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	unsigned long flags;
+
+	/*
+	 * To handle interrupt latency, we always reprogram the period
+	 * regardlesss of PERF_EF_RELOAD.
+	 */
+	if (pmu_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
 
 	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
 		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
 		return;
 	}
 
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
 
 	/* Configure the event to count, unless you are counting cycles */
 	if (idx != CCI_PMU_CYCLE_CNTR_IDX)
-		pmu_set_event(idx, hw_counter->config_base);
+		pmu_set_event(idx, hwc->config_base);
 
+	pmu_event_set_period(event);
 	pmu_enable_counter(idx);
 
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
 }
 
-static void pmu_disable_event(struct perf_event *event)
+static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
 {
-	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hw_counter = &event->hw;
-	int idx = hw_counter->idx;
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
 
 	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
 		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
 		return;
 	}
 
+	/*
+	 * We always reprogram the counter, so ignore PERF_EF_UPDATE. See
+	 * cci_pmu_start()
+	 */
 	pmu_disable_counter(idx);
+	pmu_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 }
 
-static void pmu_start(struct arm_pmu *cci_pmu)
+static int cci_pmu_add(struct perf_event *event, int flags)
 {
-	u32 val;
-	unsigned long flags;
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
 
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	perf_pmu_disable(event->pmu);
 
-	/* Enable all the PMU counters. */
-	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
-	writel(val, cci_ctrl_base + CCI_PMCR);
+	/* If we don't have a space for the counter then finish early. */
+	idx = pmu_get_event_idx(hw_events, event);
+	if (idx < 0) {
+		err = idx;
+		goto out;
+	}
 
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+	event->hw.idx = idx;
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		cci_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	perf_pmu_enable(event->pmu);
+	return err;
 }
 
-static void pmu_stop(struct arm_pmu *cci_pmu)
+static void cci_pmu_del(struct perf_event *event, int flags)
 {
-	u32 val;
-	unsigned long flags;
-	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
 
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	cci_pmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
 
-	/* Disable all the PMU counters. */
-	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
-	writel(val, cci_ctrl_base + CCI_PMCR);
+	perf_event_update_userpage(event);
+}
 
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+static int
+validate_event(struct cci_pmu_hw_events *hw_events,
+	       struct perf_event *event)
+{
+	if (is_software_event(event))
+		return 1;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	return pmu_get_event_idx(hw_events, event) >= 0;
 }
 
-static u32 pmu_read_counter(struct perf_event *event)
+static int
+validate_group(struct perf_event *event)
 {
-	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hw_counter = &event->hw;
-	int idx = hw_counter->idx;
-	u32 value;
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct cci_pmu_hw_events fake_pmu = {
+		/*
+		 * Initialise the fake PMU. We only need to populate the
+		 * used_mask for the purposes of validation.
+		 */
+		.used_mask = CPU_BITS_NONE,
+	};
 
-	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
-		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
-		return 0;
+	if (!validate_event(&fake_pmu, leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (!validate_event(&fake_pmu, sibling))
+			return -EINVAL;
 	}
-	value = pmu_read_register(idx, CCI_PMU_CNTR);
 
-	return value;
+	if (!validate_event(&fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
 }
 
-static void pmu_write_counter(struct perf_event *event, u32 value)
+static int
+__hw_perf_event_init(struct perf_event *event)
 {
-	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hw_counter = &event->hw;
-	int idx = hw_counter->idx;
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
 
-	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
-		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
-	else
-		pmu_write_register(value, idx, CCI_PMU_CNTR);
+	mapping = pmu_map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	/*
+	 * Limit the sample_period to half of the counter width. That way, the
+	 * new counter value is far less likely to overtake the previous one
+	 * unless you have some serious IRQ latency issues.
+	 */
+	hwc->sample_period  = CCI_PMU_CNTR_MASK >> 1;
+	hwc->last_period    = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+
+	if (event->group_leader != event) {
+		if (validate_group(event) != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cci_pmu_event_init(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	atomic_t *active_events = &cci_pmu->active_events;
+	int err = 0;
+	int cpu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Shared by all CPUs, no meaningful state to sample */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	/* We have no filtering of any kind */
+	if (event->attr.exclude_user	||
+	    event->attr.exclude_kernel	||
+	    event->attr.exclude_hv	||
+	    event->attr.exclude_idle	||
+	    event->attr.exclude_host	||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	/*
+	 * Following the example set by other "uncore" PMUs, we accept any CPU
+	 * and rewrite its affinity dynamically rather than having perf core
+	 * handle cpu == -1 and pid == -1 for this case.
+	 *
+	 * The perf core will pin online CPUs for the duration of this call and
+	 * the event being installed into its context, so the PMU's CPU can't
+	 * change under our feet.
+	 */
+	cpu = cpumask_first(&cci_pmu->cpus);
+	if (event->cpu < 0 || cpu < 0)
+		return -EINVAL;
+	event->cpu = cpu;
+
+	event->destroy = hw_perf_event_destroy;
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&cci_pmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = cci_pmu_get_hw(cci_pmu);
+		if (!err)
+			atomic_inc(active_events);
+		mutex_unlock(&cci_pmu->reserve_mutex);
+	}
+	if (err)
+		return err;
+
+	err = __hw_perf_event_init(event);
+	if (err)
+		hw_perf_event_destroy(event);
+
+	return err;
 }
 
-static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev)
+static ssize_t pmu_attr_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
 {
-	*cci_pmu = (struct arm_pmu){
-		.name		  = pmu_names[probe_cci_revision()],
-		.max_period       = (1LLU << 32) - 1,
-		.get_hw_events    = pmu_get_hw_events,
-		.get_event_idx    = pmu_get_event_idx,
-		.map_event        = pmu_map_event,
-		.request_irq      = pmu_request_irq,
-		.handle_irq       = pmu_handle_irq,
-		.free_irq         = pmu_free_irq,
-		.enable           = pmu_enable_event,
-		.disable          = pmu_disable_event,
-		.start            = pmu_start,
-		.stop             = pmu_stop,
-		.read_counter     = pmu_read_counter,
-		.write_counter    = pmu_write_counter,
+	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &pmu->cpus);
+
+	buf[n++] = '\n';
+	buf[n] = '\0';
+	return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, pmu_attr_cpumask_show, NULL);
+
+static struct attribute *pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group pmu_attr_group = {
+	.attrs = pmu_attrs,
+};
+
+static const struct attribute_group *pmu_attr_groups[] = {
+	&pmu_attr_group,
+	NULL
+};
+
+static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
+{
+	char *name = pmu_names[probe_cci_revision()];
+	cci_pmu->pmu = (struct pmu) {
+		.name		= pmu_names[probe_cci_revision()],
+		.task_ctx_nr	= perf_invalid_context,
+		.pmu_enable	= cci_pmu_enable,
+		.pmu_disable	= cci_pmu_disable,
+		.event_init	= cci_pmu_event_init,
+		.add		= cci_pmu_add,
+		.del		= cci_pmu_del,
+		.start		= cci_pmu_start,
+		.stop		= cci_pmu_stop,
+		.read		= pmu_read,
+		.attr_groups	= pmu_attr_groups,
 	};
 
 	cci_pmu->plat_device = pdev;
 	cci_pmu->num_events = pmu_get_max_counters();
 
-	return armpmu_register(cci_pmu, -1);
+	return perf_pmu_register(&cci_pmu->pmu, name, -1);
 }
 
+static int cci_pmu_cpu_notifier(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+	unsigned int target;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_PREPARE:
+		if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpus))
+			break;
+		target = cpumask_any_but(cpu_online_mask, cpu);
+		if (target < 0) // UP, last CPU
+			break;
+		/*
+		 * TODO: migrate context once core races on event->ctx have
+		 * been fixed.
+		 */
+		cpumask_set_cpu(target, &pmu->cpus);
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cci_pmu_cpu_nb = {
+	.notifier_call	= cci_pmu_cpu_notifier,
+	/*
+	 * to migrate uncore events, our notifier should be executed
+	 * before perf core's notifier.
+	 */
+	.priority	= CPU_PRI_PERF + 1,
+};
+
 static const struct of_device_id arm_cci_pmu_matches[] = {
 	{
 		.compatible = "arm,cci-400-pmu",
@@ -604,15 +939,16 @@ static int cci_pmu_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL);
-	if (!pmu->cci_pmu)
-		return -ENOMEM;
-
-	pmu->hw_events.events = pmu->events;
-	pmu->hw_events.used_mask = pmu->used_mask;
 	raw_spin_lock_init(&pmu->hw_events.pmu_lock);
+	mutex_init(&pmu->reserve_mutex);
+	atomic_set(&pmu->active_events, 0);
+	cpumask_set_cpu(smp_processor_id(), &pmu->cpus);
+
+	ret = register_cpu_notifier(&cci_pmu_cpu_nb);
+	if (ret)
+		return ret;
 
-	ret = cci_pmu_init(pmu->cci_pmu, pdev);
+	ret = cci_pmu_init(pmu, pdev);
 	if (ret)
 		return ret;
 
-- 
cgit v0.10.2


From cb6eb108a7be04709f8db0cedde30bee0e0a64ee Mon Sep 17 00:00:00 2001
From: chai wen <chaiw.fnst@cn.fujitsu.com>
Date: Wed, 22 Oct 2014 13:16:49 +0100
Subject: ARM: perf: remove useless return and check of idx in counter handling

Idx sanity check was once implemented separately in these counter
handling functions and then return value was treated as a judgement.
	armv7_pmnc_select_counter()
	armv7_pmnc_enable_counter()
	armv7_pmnc_disable_counter()
	armv7_pmnc_enable_intens()
	armv7_pmnc_disable_intens()
But we do not need to do this now, as idx validation check was moved
out all these functions by commit 7279adbd9bb8ef8f(ARM: perf: check ARMv7
counter validity on a per-pmu basis).
Let's remove the useless return of idx from these functions.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 116758b..aaf5314 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -564,13 +564,11 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 	return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
 }
 
-static inline int armv7_pmnc_select_counter(int idx)
+static inline void armv7_pmnc_select_counter(int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
-
-	return idx;
 }
 
 static inline u32 armv7pmu_read_counter(struct perf_event *event)
@@ -580,13 +578,15 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event)
 	int idx = hwc->idx;
 	u32 value = 0;
 
-	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
-	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+	} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
 		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if (armv7_pmnc_select_counter(idx) == idx)
+	} else {
+		armv7_pmnc_select_counter(idx);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+	}
 
 	return value;
 }
@@ -597,45 +597,43 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
-	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
-	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+	} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
 		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if (armv7_pmnc_select_counter(idx) == idx)
+	} else {
+		armv7_pmnc_select_counter(idx);
 		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+	}
 }
 
 static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 {
-	if (armv7_pmnc_select_counter(idx) == idx) {
-		val &= ARMV7_EVTYPE_MASK;
-		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
-	}
+	armv7_pmnc_select_counter(idx);
+	val &= ARMV7_EVTYPE_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
 }
 
-static inline int armv7_pmnc_enable_counter(int idx)
+static inline void armv7_pmnc_enable_counter(int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
-	return idx;
 }
 
-static inline int armv7_pmnc_disable_counter(int idx)
+static inline void armv7_pmnc_disable_counter(int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
-	return idx;
 }
 
-static inline int armv7_pmnc_enable_intens(int idx)
+static inline void armv7_pmnc_enable_intens(int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
-	return idx;
 }
 
-static inline int armv7_pmnc_disable_intens(int idx)
+static inline void armv7_pmnc_disable_intens(int idx)
 {
 	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
@@ -643,8 +641,6 @@ static inline int armv7_pmnc_disable_intens(int idx)
 	/* Clear the overflow flag in case an interrupt is pending. */
 	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
 	isb();
-
-	return idx;
 }
 
 static inline u32 armv7_pmnc_getreset_flags(void)
-- 
cgit v0.10.2


From 52a5566e7617ce2678c2a35c7982b808cb2b53f6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 30 Oct 2014 11:26:57 +0000
Subject: ARM: perf: use pr_* instead of printk

There are a few remaining uses of printk in the ARM perf code, so move
them over to the pr_* variants instead.

Reported-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index eb2c4d5..a5e808f 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -120,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 	if (irqs < 1) {
-		printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
 		return 0;
 	}
 
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index aaf5314..d62b27c 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -663,34 +663,34 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 	u32 val;
 	unsigned int cnt;
 
-	printk(KERN_INFO "PMNC registers dump:\n");
+	pr_info("PMNC registers dump:\n");
 
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
-	printk(KERN_INFO "PMNC  =0x%08x\n", val);
+	pr_info("PMNC  =0x%08x\n", val);
 
 	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
-	printk(KERN_INFO "CNTENS=0x%08x\n", val);
+	pr_info("CNTENS=0x%08x\n", val);
 
 	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
-	printk(KERN_INFO "INTENS=0x%08x\n", val);
+	pr_info("INTENS=0x%08x\n", val);
 
 	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
-	printk(KERN_INFO "FLAGS =0x%08x\n", val);
+	pr_info("FLAGS =0x%08x\n", val);
 
 	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
-	printk(KERN_INFO "SELECT=0x%08x\n", val);
+	pr_info("SELECT=0x%08x\n", val);
 
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
-	printk(KERN_INFO "CCNT  =0x%08x\n", val);
+	pr_info("CCNT  =0x%08x\n", val);
 
 	for (cnt = ARMV7_IDX_COUNTER0;
 			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
-		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
+		pr_info("CNT[%d] count =0x%08x\n",
 			ARMV7_IDX_TO_COUNTER(cnt), val);
 		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
-		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
+		pr_info("CNT[%d] evtsel=0x%08x\n",
 			ARMV7_IDX_TO_COUNTER(cnt), val);
 	}
 }
-- 
cgit v0.10.2


From d39976f0fd144d1cef4830d696e2a1e6d8058dc6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 29 Sep 2014 17:15:32 +0100
Subject: arm: perf: factor out callchain code

The ARM callchain handling code is currently bundled with the ARM PMU
management code, despite the two having no dependency on each other.
This bundling has the unfortunate property of making callchain handling
depend on CONFIG_HW_PERF_EVENTS, even though the callchain handling
could be applied to software events in the absence of PMU hardware
support.

This patch separates the two, placing the callchain handling in
perf_callchain.c and making it depend on CONFIG_PERF_EVENTS rather than
CONFIG_HW_PERF_EVENTS, enabling callchain recording on kernels built
without hardware perf event support.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index c3a8369..d9cf138 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,7 +12,7 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-#ifdef CONFIG_HW_PERF_EVENTS
+#ifdef CONFIG_PERF_EVENTS
 struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 38ddd9f..8dcbed5 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -82,7 +82,7 @@ obj-$(CONFIG_CPU_MOHAWK)	+= xscale-cp0.o
 obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
-obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
new file mode 100644
index 0000000..4e02ae5
--- /dev/null
+++ b/arch/arm/kernel/perf_callchain.c
@@ -0,0 +1,136 @@
+/*
+ * ARM callchain support
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based on the ARM OProfile backtrace code.
+ */
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/stacktrace.h>
+
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct frame_tail {
+	struct frame_tail __user *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __attribute__((packed));
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail __user *
+user_backtrace(struct frame_tail __user *tail,
+	       struct perf_callchain_entry *entry)
+{
+	struct frame_tail buftail;
+	unsigned long err;
+
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	perf_callchain_store(entry, buftail.lr);
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail + 1 >= buftail.fp)
+		return NULL;
+
+	return buftail.fp - 1;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct frame_tail __user *tail;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	perf_callchain_store(entry, regs->ARM_pc);
+
+	if (!current->mm)
+		return;
+
+	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
+
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+	       tail && !((unsigned long)tail & 0x3))
+		tail = user_backtrace(tail, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+		void *data)
+{
+	struct perf_callchain_entry *entry = data;
+	perf_callchain_store(entry, fr->pc);
+	return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct stackframe fr;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	arm_get_current_stackframe(regs, &fr);
+	walk_stackframe(&fr, callchain_trace, entry);
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return perf_guest_cbs->get_guest_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 266cba4..ae96b98 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -7,21 +7,18 @@
  * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
  *
  * This code is based on the sparc64 perf event code, which is in turn based
- * on the x86 code. Callchain code is based on the ARM OProfile backtrace
- * code.
+ * on the x86 code.
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
-#include <linux/uaccess.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 
 #include <asm/irq_regs.h>
 #include <asm/pmu.h>
-#include <asm/stacktrace.h>
 
 static int
 armpmu_map_cache_event(const unsigned (*cache_map)
@@ -533,130 +530,3 @@ int armpmu_register(struct arm_pmu *armpmu, int type)
 	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 }
 
-/*
- * Callchain handling code.
- */
-
-/*
- * The registers we're interested in are at the end of the variable
- * length saved register structure. The fp points at the end of this
- * structure so the address of this struct is:
- * (struct frame_tail *)(xxx->fp)-1
- *
- * This code has been adapted from the ARM OProfile support.
- */
-struct frame_tail {
-	struct frame_tail __user *fp;
-	unsigned long sp;
-	unsigned long lr;
-} __attribute__((packed));
-
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static struct frame_tail __user *
-user_backtrace(struct frame_tail __user *tail,
-	       struct perf_callchain_entry *entry)
-{
-	struct frame_tail buftail;
-	unsigned long err;
-
-	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
-		return NULL;
-
-	pagefault_disable();
-	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
-	pagefault_enable();
-
-	if (err)
-		return NULL;
-
-	perf_callchain_store(entry, buftail.lr);
-
-	/*
-	 * Frame pointers should strictly progress back up the stack
-	 * (towards higher addresses).
-	 */
-	if (tail + 1 >= buftail.fp)
-		return NULL;
-
-	return buftail.fp - 1;
-}
-
-void
-perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
-	struct frame_tail __user *tail;
-
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		/* We don't support guest os callchain now */
-		return;
-	}
-
-	perf_callchain_store(entry, regs->ARM_pc);
-
-	if (!current->mm)
-		return;
-
-	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
-
-	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
-	       tail && !((unsigned long)tail & 0x3))
-		tail = user_backtrace(tail, entry);
-}
-
-/*
- * Gets called by walk_stackframe() for every stackframe. This will be called
- * whist unwinding the stackframe and is like a subroutine return so we use
- * the PC.
- */
-static int
-callchain_trace(struct stackframe *fr,
-		void *data)
-{
-	struct perf_callchain_entry *entry = data;
-	perf_callchain_store(entry, fr->pc);
-	return 0;
-}
-
-void
-perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
-	struct stackframe fr;
-
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		/* We don't support guest os callchain now */
-		return;
-	}
-
-	arm_get_current_stackframe(regs, &fr);
-	walk_stackframe(&fr, callchain_trace, entry);
-}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
-		return perf_guest_cbs->get_guest_ip();
-
-	return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-	int misc = 0;
-
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		if (perf_guest_cbs->is_user_mode())
-			misc |= PERF_RECORD_MISC_GUEST_USER;
-		else
-			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
-	} else {
-		if (user_mode(regs))
-			misc |= PERF_RECORD_MISC_USER;
-		else
-			misc |= PERF_RECORD_MISC_KERNEL;
-	}
-
-	return misc;
-}
-- 
cgit v0.10.2


From 0f2a21018a71d8d3fec507f9c55ae8ed03ab9321 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 23 Oct 2014 15:59:35 +0100
Subject: arm: perf: add missing pr_info newlines

Most of the pr_info format strings in perf_event_cpu.c are missing
newlines. Currently we get away with this as the format strings for
subsequent calls to printk (including all pr_* calls) begin with a log
prefix, and the printk core adds the omitted newline for this case.
While generates the output we expect, we probably should not rely on the
format of successive printk calls in order to get legible output.

This patch adds the missing newlines to pr_info format strings in
perf_event_cpu.c, making them consistent with the format strings for
other pr_info, warn, and pr_err calls, and preventing potentially
illegible output if the next printk/pr_* format string doesn't begin
with a log prefix.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index a5e808f..8ba23ad 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -299,13 +299,13 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	int ret = -ENODEV;
 
 	if (cpu_pmu) {
-		pr_info("attempt to register multiple PMU devices!");
+		pr_info("attempt to register multiple PMU devices!\n");
 		return -ENOSPC;
 	}
 
 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
 	if (!pmu) {
-		pr_info("failed to allocate PMU device!");
+		pr_info("failed to allocate PMU device!\n");
 		return -ENOMEM;
 	}
 
@@ -320,7 +320,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	}
 
 	if (ret) {
-		pr_info("failed to probe PMU!");
+		pr_info("failed to probe PMU!\n");
 		goto out_free;
 	}
 
@@ -331,7 +331,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		return 0;
 
 out_free:
-	pr_info("failed to register PMU devices!");
+	pr_info("failed to register PMU devices!\n");
 	kfree(pmu);
 	return ret;
 }
-- 
cgit v0.10.2


From 548a86cae4858433cab7e101bca2c6856ab55887 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 23 May 2014 18:11:14 +0100
Subject: arm: perf: make PMU probing data-driven

The current PMU probing logic consists of a single switch statement,
which means that the core arm_pmu core in perf_event_cpu.c needs to know
about every CPU PMU variant supported by a driver using the arm_pmu
framework. This makes it rather difficult to decouple the drivers from
the (otherwise generic) probing code.

The patch refactors that switch statement to a table-driven lookup,
separating the logic and knowledge (in the form of the table). Later
patches will split the table across the relevant PMU drivers, which can
pass their tables to the generic probing function.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 0b648c5..ff39290 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -15,6 +15,8 @@
 #include <linux/interrupt.h>
 #include <linux/perf_event.h>
 
+#include <asm/cputype.h>
+
 /*
  * struct arm_pmu_platdata - ARM PMU platform data
  *
@@ -127,6 +129,27 @@ int armpmu_map_event(struct perf_event *event,
 						[PERF_COUNT_HW_CACHE_RESULT_MAX],
 		     u32 raw_event_mask);
 
+struct pmu_probe_info {
+	unsigned int cpuid;
+	unsigned int mask;
+	int (*init)(struct arm_pmu *);
+};
+
+#define PMU_PROBE(_cpuid, _mask, _fn)	\
+{					\
+	.cpuid = (_cpuid),		\
+	.mask = (_mask),		\
+	.init = (_fn),			\
+}
+
+#define ARM_PMU_PROBE(_cpuid, _fn) \
+	PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn)
+
+#define ARM_PMU_XSCALE_MASK	((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK)
+
+#define XSCALE_PMU_PROBE(_version, _fn) \
+	PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn)
+
 #endif /* CONFIG_HW_PERF_EVENTS */
 
 #endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 8ba23ad..e7d2652 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -241,48 +241,34 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 	{},
 };
 
+static const struct pmu_probe_info pmu_probe_table[] = {
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+	{ /* sentinel value */ }
+};
+
 /*
  * CPU PMU identification and probing.
  */
 static int probe_current_pmu(struct arm_pmu *pmu)
 {
 	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
 	int ret = -ENODEV;
+	const struct pmu_probe_info *info;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
-	switch (read_cpuid_part()) {
-	/* ARM Ltd CPUs. */
-	case ARM_CPU_PART_ARM1136:
-		ret = armv6_1136_pmu_init(pmu);
-		break;
-	case ARM_CPU_PART_ARM1156:
-		ret = armv6_1156_pmu_init(pmu);
-		break;
-	case ARM_CPU_PART_ARM1176:
-		ret = armv6_1176_pmu_init(pmu);
-		break;
-	case ARM_CPU_PART_ARM11MPCORE:
-		ret = armv6mpcore_pmu_init(pmu);
-		break;
-	case ARM_CPU_PART_CORTEX_A8:
-		ret = armv7_a8_pmu_init(pmu);
-		break;
-	case ARM_CPU_PART_CORTEX_A9:
-		ret = armv7_a9_pmu_init(pmu);
-		break;
-
-	default:
-		if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
-			switch (xscale_cpu_arch_version()) {
-			case ARM_CPU_XSCALE_ARCH_V1:
-				ret = xscale1pmu_init(pmu);
-				break;
-			case ARM_CPU_XSCALE_ARCH_V2:
-				ret = xscale2pmu_init(pmu);
-				break;
-			}
-		}
+	for (info = pmu_probe_table; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
 		break;
 	}
 
-- 
cgit v0.10.2


From 67b4305aab0fa993d91fa4c6ea2169cfb3f41c93 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 12 Sep 2012 10:53:23 +0100
Subject: arm: perf: use IDR types for CPU PMUs

For systems with heterogeneous CPUs (e.g. big.LITTLE systems) the PMUs
can be different in each cluster, and not all events can be migrated
between clusters. To allow userspace to deal with this, it must be
possible to address each PMU independently.

This patch changes PMUs to be registered with dynamic (IDR) types,
allowing them to be targeted individually. Each PMU's type can be found
in ${SYSFS_ROOT}/bus/event_source/devices/${PMU_NAME}/type.

From userspace, raw events can be targeted at a specific PMU:
$ perf stat -e ${PMU_NAME}/config=V,config1=V1,.../

Doing this does not break existing tools which use existing perf types:
when perf core can't find a PMU of matching type (in perf_init_event)
it'll iterate over the set of all PMUs. If a compatible PMU exists,
it'll be found eventually. If more than one compatible PMU exists, the
event will be handled by whichever PMU happens to be earlier in the pmus
list (which currently will be the last compatible PMU registered).

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ae96b98..7ffb267 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -77,8 +77,12 @@ armpmu_map_event(struct perf_event *event,
 		 u32 raw_event_mask)
 {
 	u64 config = event->attr.config;
+	int type = event->attr.type;
 
-	switch (event->attr.type) {
+	if (type == event->pmu->type)
+		return armpmu_map_raw_event(raw_event_mask, config);
+
+	switch (type) {
 	case PERF_TYPE_HARDWARE:
 		return armpmu_map_hw_event(event_map, config);
 	case PERF_TYPE_HW_CACHE:
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index e7d2652..7677d73 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -311,7 +311,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	}
 
 	cpu_pmu_init(cpu_pmu);
-	ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+	ret = armpmu_register(cpu_pmu, -1);
 
 	if (!ret)
 		return 0;
-- 
cgit v0.10.2


From a4560846eba60830a444d9e336c8a18f92e099ee Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 13 May 2014 19:08:19 +0100
Subject: arm: perf: limit size of accounting data

Commit 3fc2c83087 (ARM: perf: remove event limit from pmu_hw_events) got
rid of the upper limit on the number of events an arm_pmu could handle,
but introduced additional complexity and places a burden on each PMU
driver to allocate accounting data somehow. So far this has not
generally been useful as the only users of arm_pmu are the CPU backend
and the CCI driver.

Now that the CCI driver plugs into the perf subsystem directly, we can
remove some of the complexities that get in the way of supporting
heterogeneous CPU PMUs.

This patch restores the original limits on pmu_hw_events fields such
that the pmu_hw_events data can be allocated as a contiguous block. This
will simplify dynamic pmu_hw_events allocation in later patches.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index ff39290..3d7e30b 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -68,13 +68,13 @@ struct pmu_hw_events {
 	/*
 	 * The events that are active on the PMU for the given index.
 	 */
-	struct perf_event	**events;
+	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
 
 	/*
 	 * A 1 bit for an index indicates that the counter is being used for
 	 * an event. A 0 means that the counter can be used.
 	 */
-	unsigned long           *used_mask;
+	DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
 
 	/*
 	 * Hardware lock to serialize accesses to PMU registers. Needed for the
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 7ffb267..8648107 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -275,14 +275,12 @@ validate_group(struct perf_event *event)
 {
 	struct perf_event *sibling, *leader = event->group_leader;
 	struct pmu_hw_events fake_pmu;
-	DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
 
 	/*
 	 * Initialise the fake PMU. We only need to populate the
 	 * used_mask for the purposes of validation.
 	 */
-	memset(fake_used_mask, 0, sizeof(fake_used_mask));
-	fake_pmu.used_mask = fake_used_mask;
+	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 
 	if (!validate_event(&fake_pmu, leader))
 		return -EINVAL;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 7677d73..28d0464 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -36,8 +36,6 @@
 static struct arm_pmu *cpu_pmu;
 
 static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu);
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
 /*
@@ -172,8 +170,6 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-		events->events = per_cpu(hw_events, cpu);
-		events->used_mask = per_cpu(used_mask, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
 		per_cpu(percpu_pmu, cpu) = cpu_pmu;
 	}
-- 
cgit v0.10.2


From 116792508607002896b706fbad8310419fcc5742 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 13 May 2014 19:36:31 +0100
Subject: arm: perf: kill get_hw_events()

Now that the arm pmu code is limited to CPU PMUs the get_hw_events()
function is superfluous, as we'll always have a set of per-cpu
pmu_hw_events structures.

This patch removes the get_hw_events() function, replacing it with
a percpu hw_events pointer. Uses of get_hw_events are updated to use
this_cpu_ptr.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 3d7e30b..f273dd2 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -109,7 +109,7 @@ struct arm_pmu {
 	struct mutex	reserve_mutex;
 	u64		max_period;
 	struct platform_device	*plat_device;
-	struct pmu_hw_events	*(*get_hw_events)(void);
+	struct pmu_hw_events	__percpu *hw_events;
 };
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8648107..05ac5ee 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -201,7 +201,7 @@ static void
 armpmu_del(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
@@ -218,7 +218,7 @@ static int
 armpmu_add(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int err = 0;
@@ -467,7 +467,7 @@ static int armpmu_event_init(struct perf_event *event)
 static void armpmu_enable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 28d0464..fd24ad8 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -67,11 +67,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
 #include "perf_event_v6.c"
 #include "perf_event_v7.c"
 
-static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
-{
-	return this_cpu_ptr(&cpu_hw_events);
-}
-
 static void cpu_pmu_enable_percpu_irq(void *data)
 {
 	int irq = *(int *)data;
@@ -174,7 +169,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 		per_cpu(percpu_pmu, cpu) = cpu_pmu;
 	}
 
-	cpu_pmu->get_hw_events	= cpu_pmu_get_cpu_events;
+	cpu_pmu->hw_events	= &cpu_hw_events;
 	cpu_pmu->request_irq	= cpu_pmu_request_irq;
 	cpu_pmu->free_irq	= cpu_pmu_free_irq;
 
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index abfeb04..f2ffd5c 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -262,7 +262,7 @@ static void armv6pmu_enable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
@@ -300,7 +300,7 @@ armv6pmu_handle_irq(int irq_num,
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
 	struct pt_regs *regs;
 	int idx;
 
@@ -356,7 +356,7 @@ armv6pmu_handle_irq(int irq_num,
 static void armv6pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
@@ -368,7 +368,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu)
 static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
@@ -409,7 +409,7 @@ static void armv6pmu_disable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
@@ -444,7 +444,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
 	unsigned long val, mask, flags, evt = 0;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index d62b27c..8993770 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -701,7 +701,7 @@ static void armv7pmu_enable_event(struct perf_event *event)
 	unsigned long flags;
 	struct hw_perf_event *hwc = &event->hw;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
@@ -747,7 +747,7 @@ static void armv7pmu_disable_event(struct perf_event *event)
 	unsigned long flags;
 	struct hw_perf_event *hwc = &event->hw;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
@@ -779,7 +779,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	u32 pmnc;
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
 	struct pt_regs *regs;
 	int idx;
 
@@ -839,7 +839,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 static void armv7pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
@@ -850,7 +850,7 @@ static void armv7pmu_start(struct arm_pmu *cpu_pmu)
 static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
@@ -1283,7 +1283,7 @@ static void krait_pmu_disable_event(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	/* Disable counter and interrupt */
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
@@ -1309,7 +1309,7 @@ static void krait_pmu_enable_event(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 08da0af..8af9f1f 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -138,7 +138,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	unsigned long pmnc;
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
 	struct pt_regs *regs;
 	int idx;
 
@@ -198,7 +198,7 @@ static void xscale1pmu_enable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	switch (idx) {
@@ -234,7 +234,7 @@ static void xscale1pmu_disable_event(struct perf_event *event)
 	unsigned long val, mask, evt, flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	switch (idx) {
@@ -287,7 +287,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
@@ -299,7 +299,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
 static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
@@ -485,7 +485,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
 	struct pt_regs *regs;
 	int idx;
 
@@ -539,7 +539,7 @@ static void xscale2pmu_enable_event(struct perf_event *event)
 	unsigned long flags, ien, evtsel;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
@@ -585,7 +585,7 @@ static void xscale2pmu_disable_event(struct perf_event *event)
 	unsigned long flags, ien, evtsel, of_flags;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
@@ -651,7 +651,7 @@ out:
 static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
@@ -663,7 +663,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
 static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc();
-- 
cgit v0.10.2


From 5ebd92003494a19ac5246ae385c073be16de1144 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 13 May 2014 19:46:10 +0100
Subject: arm: perf: fold percpu_pmu into pmu_hw_events

Currently the percpu_pmu pointers used as percpu_irq dev_id values are
defined separately from the other per-cpu accounting data, which make
dynamically allocating the data (as will be required for systems with
heterogeneous CPUs) difficult.

This patch moves the percpu_pmu pointers into pmu_hw_events (which is
itself allocated per cpu), which will allow for easier dynamic
allocation. Both percpu and regular irqs are requested using percpu_pmu
pointers as tokens, freeing us from having to know whether an irq is
percpu within the handler, and thus avoiding a radix tree lookup on the
handler path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f273dd2..cc01498 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -81,6 +81,12 @@ struct pmu_hw_events {
 	 * read/modify/write sequences.
 	 */
 	raw_spinlock_t		pmu_lock;
+
+	/*
+	 * When using percpu IRQs, we need a percpu dev_id. Place it here as we
+	 * already have to allocate this struct per cpu.
+	 */
+	struct arm_pmu		*percpu_pmu;
 };
 
 struct arm_pmu {
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 05ac5ee..e34934f 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -304,17 +304,21 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 	int ret;
 	u64 start_clock, finish_clock;
 
-	if (irq_is_percpu(irq))
-		dev = *(void **)dev;
-	armpmu = dev;
+	/*
+	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
+	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
+	 * do any necessary shifting, we just need to perform the first
+	 * dereference.
+	 */
+	armpmu = *(void **)dev;
 	plat_device = armpmu->plat_device;
 	plat = dev_get_platdata(&plat_device->dev);
 
 	start_clock = sched_clock();
 	if (plat && plat->handle_irq)
-		ret = plat->handle_irq(irq, dev, armpmu->handle_irq);
+		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
 	else
-		ret = armpmu->handle_irq(irq, dev);
+		ret = armpmu->handle_irq(irq, armpmu);
 	finish_clock = sched_clock();
 
 	perf_sample_event_took(finish_clock - start_clock);
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index fd24ad8..b9391fa 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -35,7 +35,6 @@
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *cpu_pmu;
 
-static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
 /*
@@ -85,20 +84,21 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 
 	irq = platform_get_irq(pmu_device, 0);
 	if (irq >= 0 && irq_is_percpu(irq)) {
 		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &percpu_pmu);
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
 	} else {
 		for (i = 0; i < irqs; ++i) {
 			if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq >= 0)
-				free_irq(irq, cpu_pmu);
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
 		}
 	}
 }
@@ -107,6 +107,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
 
 	if (!pmu_device)
 		return -ENODEV;
@@ -119,7 +120,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
 	irq = platform_get_irq(pmu_device, 0);
 	if (irq >= 0 && irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu);
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &hw_events->percpu_pmu);
 		if (err) {
 			pr_err("unable to request IRQ%d for ARM PMU counters\n",
 				irq);
@@ -146,7 +148,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
 			err = request_irq(irq, handler,
 					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  cpu_pmu);
+					  per_cpu_ptr(&hw_events->percpu_pmu, i));
 			if (err) {
 				pr_err("unable to request IRQ%d for ARM PMU counters\n",
 					irq);
@@ -166,7 +168,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	for_each_possible_cpu(cpu) {
 		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
-		per_cpu(percpu_pmu, cpu) = cpu_pmu;
+		events->percpu_pmu = cpu_pmu;
 	}
 
 	cpu_pmu->hw_events	= &cpu_hw_events;
-- 
cgit v0.10.2


From abdf655a30b6464fe86c8369de60ccf92f73f589 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 21 Oct 2014 14:11:23 +0100
Subject: arm: perf: dynamically allocate cpu hardware data

To support multiple PMUs, each PMU will need its own accounting data.
As we don't know how (in general) many PMUs we'll have to support at
compile-time, we must allocate the data at runtime dynamically

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index b9391fa..f0f6c5e 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -35,8 +35,6 @@
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *cpu_pmu;
 
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-
 /*
  * Despite the names, these two functions are CPU-specific and are used
  * by the OProfile/perf code.
@@ -162,16 +160,22 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 	return 0;
 }
 
-static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int cpu;
+	struct pmu_hw_events __percpu *cpu_hw_events;
+
+	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!cpu_hw_events)
+		return -ENOMEM;
+
 	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
 		events->percpu_pmu = cpu_pmu;
 	}
 
-	cpu_pmu->hw_events	= &cpu_hw_events;
+	cpu_pmu->hw_events	= cpu_hw_events;
 	cpu_pmu->request_irq	= cpu_pmu_request_irq;
 	cpu_pmu->free_irq	= cpu_pmu_free_irq;
 
@@ -182,6 +186,13 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	/* If no interrupts available, set the corresponding capability flag */
 	if (!platform_get_irq(cpu_pmu->plat_device, 0))
 		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	return 0;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	free_percpu(cpu_pmu->hw_events);
 }
 
 /*
@@ -303,12 +314,18 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	cpu_pmu_init(cpu_pmu);
+	ret = cpu_pmu_init(cpu_pmu);
+	if (ret)
+		goto out_free;
+
 	ret = armpmu_register(cpu_pmu, -1);
+	if (ret)
+		goto out_destroy;
 
-	if (!ret)
-		return 0;
+	return 0;
 
+out_destroy:
+	cpu_pmu_destroy(cpu_pmu);
 out_free:
 	pr_info("failed to register PMU devices!\n");
 	kfree(pmu);
-- 
cgit v0.10.2


From af66abfe2ec8bd82211e9e4f036a64c902ff4cdb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 23 Oct 2014 15:23:35 +0100
Subject: arm: perf: fold hotplug notifier into arm_pmu

Handling multiple PMUs using a single hotplug notifier requires a list
of PMUs to be maintained, with synchronisation in the probe, remove, and
notify paths. This is error-prone and makes the code much harder to
maintain.

Instead of using a single notifier, we can dynamically allocate a
notifier block per-PMU. The end result is the same, but the list of PMUs
is implicit in the hotplug notifier list rather than within a perf-local
data structure, which makes the code far easier to handle.

Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index cc01498..b1596bd 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -116,6 +116,7 @@ struct arm_pmu {
 	u64		max_period;
 	struct platform_device	*plat_device;
 	struct pmu_hw_events	__percpu *hw_events;
+	struct notifier_block	hotplug_nb;
 };
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index f0f6c5e..dd9acc9 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -160,8 +160,31 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 	return 0;
 }
 
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
+			  void *hcpu)
+{
+	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	if (pmu->reset)
+		pmu->reset(pmu);
+	else
+		return NOTIFY_DONE;
+
+	return NOTIFY_OK;
+}
+
 static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
+	int err;
 	int cpu;
 	struct pmu_hw_events __percpu *cpu_hw_events;
 
@@ -169,6 +192,11 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	if (!cpu_hw_events)
 		return -ENOMEM;
 
+	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
+	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
+	if (err)
+		goto out_hw_events;
+
 	for_each_possible_cpu(cpu) {
 		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
@@ -188,38 +216,19 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
 	return 0;
+
+out_hw_events:
+	free_percpu(cpu_hw_events);
+	return err;
 }
 
 static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 {
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 	free_percpu(cpu_pmu->hw_events);
 }
 
 /*
- * PMU hardware loses all context when a CPU goes offline.
- * When a CPU is hotplugged back in, since some hardware registers are
- * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
- * junk values out of them.
- */
-static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-			  void *hcpu)
-{
-	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-		return NOTIFY_DONE;
-
-	if (cpu_pmu && cpu_pmu->reset)
-		cpu_pmu->reset(cpu_pmu);
-	else
-		return NOTIFY_DONE;
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block cpu_pmu_hotplug_notifier = {
-	.notifier_call = cpu_pmu_notify,
-};
-
-/*
  * PMU platform driver and devicetree bindings.
  */
 static struct of_device_id cpu_pmu_of_device_ids[] = {
@@ -344,16 +353,6 @@ static struct platform_driver cpu_pmu_driver = {
 
 static int __init register_pmu_driver(void)
 {
-	int err;
-
-	err = register_cpu_notifier(&cpu_pmu_hotplug_notifier);
-	if (err)
-		return err;
-
-	err = platform_driver_register(&cpu_pmu_driver);
-	if (err)
-		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
-
-	return err;
+	return platform_driver_register(&cpu_pmu_driver);
 }
 device_initcall(register_pmu_driver);
-- 
cgit v0.10.2


From c759e5f76b18350ed2417e89588d6358e58e1ad3 Mon Sep 17 00:00:00 2001
From: Maxime Coquelin <maxime.coquelin@st.com>
Date: Fri, 31 Oct 2014 09:47:54 +0100
Subject: reset: stih407: Add reset controllers DT bindings

This patch adds softreset, powerdown and picophy reset controllers
DT bindings for the STiH407 SoC.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>

diff --git a/include/dt-bindings/reset-controller/stih407-resets.h b/include/dt-bindings/reset-controller/stih407-resets.h
new file mode 100644
index 0000000..02d4328
--- /dev/null
+++ b/include/dt-bindings/reset-controller/stih407-resets.h
@@ -0,0 +1,61 @@
+/*
+ * This header provides constants for the reset controller
+ * based peripheral powerdown requests on the STMicroelectronics
+ * STiH407 SoC.
+ */
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_STIH407
+#define _DT_BINDINGS_RESET_CONTROLLER_STIH407
+
+/* Powerdown requests control 0 */
+#define STIH407_EMISS_POWERDOWN		0
+#define STIH407_NAND_POWERDOWN		1
+
+/* Synp GMAC PowerDown */
+#define STIH407_ETH1_POWERDOWN		2
+
+/* Powerdown requests control 1 */
+#define STIH407_USB3_POWERDOWN		3
+#define STIH407_USB2_PORT1_POWERDOWN	4
+#define STIH407_USB2_PORT0_POWERDOWN	5
+#define STIH407_PCIE1_POWERDOWN		6
+#define STIH407_PCIE0_POWERDOWN		7
+#define STIH407_SATA1_POWERDOWN		8
+#define STIH407_SATA0_POWERDOWN		9
+
+/* Reset defines */
+#define STIH407_ETH1_SOFTRESET		0
+#define STIH407_MMC1_SOFTRESET		1
+#define STIH407_PICOPHY_SOFTRESET	2
+#define STIH407_IRB_SOFTRESET		3
+#define STIH407_PCIE0_SOFTRESET		4
+#define STIH407_PCIE1_SOFTRESET		5
+#define STIH407_SATA0_SOFTRESET		6
+#define STIH407_SATA1_SOFTRESET		7
+#define STIH407_MIPHY0_SOFTRESET	8
+#define STIH407_MIPHY1_SOFTRESET	9
+#define STIH407_MIPHY2_SOFTRESET	10
+#define STIH407_SATA0_PWR_SOFTRESET	11
+#define STIH407_SATA1_PWR_SOFTRESET	12
+#define STIH407_DELTA_SOFTRESET		13
+#define STIH407_BLITTER_SOFTRESET	14
+#define STIH407_HDTVOUT_SOFTRESET	15
+#define STIH407_HDQVDP_SOFTRESET	16
+#define STIH407_VDP_AUX_SOFTRESET	17
+#define STIH407_COMPO_SOFTRESET		18
+#define STIH407_HDMI_TX_PHY_SOFTRESET	19
+#define STIH407_JPEG_DEC_SOFTRESET	20
+#define STIH407_VP8_DEC_SOFTRESET	21
+#define STIH407_GPU_SOFTRESET		22
+#define STIH407_HVA_SOFTRESET		23
+#define STIH407_ERAM_HVA_SOFTRESET	24
+#define STIH407_LPM_SOFTRESET		25
+#define STIH407_KEYSCAN_SOFTRESET	26
+#define STIH407_USB2_PORT0_SOFTRESET	27
+#define STIH407_USB2_PORT1_SOFTRESET	28
+
+/* Picophy reset defines */
+#define STIH407_PICOPHY0_RESET		0
+#define STIH407_PICOPHY1_RESET		1
+#define STIH407_PICOPHY2_RESET		2
+
+#endif /* _DT_BINDINGS_RESET_CONTROLLER_STIH407 */
-- 
cgit v0.10.2


From 3ba9204308eb51fe98fb1ab352c17d5319f26724 Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Wed, 2 Jul 2014 15:08:46 +0100
Subject: reset: stih407: Add softreset, powerdown and picophy controllers

This patch adds softreset, powerdown and picophy reset controllers for
the STiH407 SoC.

With this patch three new devices are registered: -
1. st,stih407-powerdown
2. st,stih407-softreset
3. st,stih407-picophyreset

All three devices use system configuration registers mapped via regmap to
perform the reset or powerdown. The powerdown controller also has
an acknowledgement.

A separate picophy reset controller manages the different reset channels within
the picophy, which have a different polarity to the other system softresets.
Managing these different picophy softreset channels is necessary to correctly
handle resuming from suspend when USB2 devices are plugged into the USB3 port.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>

diff --git a/drivers/reset/sti/Kconfig b/drivers/reset/sti/Kconfig
index 88d2d03..f8c15a3 100644
--- a/drivers/reset/sti/Kconfig
+++ b/drivers/reset/sti/Kconfig
@@ -12,4 +12,8 @@ config STIH416_RESET
 	bool
 	select STI_RESET_SYSCFG
 
+config STIH407_RESET
+	bool
+	select STI_RESET_SYSCFG
+
 endif
diff --git a/drivers/reset/sti/Makefile b/drivers/reset/sti/Makefile
index be1c976..dc85dfb 100644
--- a/drivers/reset/sti/Makefile
+++ b/drivers/reset/sti/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_STI_RESET_SYSCFG) += reset-syscfg.o
 
 obj-$(CONFIG_STIH415_RESET) += reset-stih415.o
 obj-$(CONFIG_STIH416_RESET) += reset-stih416.o
+obj-$(CONFIG_STIH407_RESET) += reset-stih407.o
diff --git a/drivers/reset/sti/reset-stih407.c b/drivers/reset/sti/reset-stih407.c
new file mode 100644
index 0000000..d83db5d7
--- /dev/null
+++ b/drivers/reset/sti/reset-stih407.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2014 STMicroelectronics (R&D) Limited
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <dt-bindings/reset-controller/stih407-resets.h>
+#include "reset-syscfg.h"
+
+/* STiH407 Peripheral powerdown definitions. */
+static const char stih407_core[] = "st,stih407-core-syscfg";
+static const char stih407_sbc_reg[] = "st,stih407-sbc-reg-syscfg";
+static const char stih407_lpm[] = "st,stih407-lpm-syscfg";
+
+#define STIH407_PDN_0(_bit) \
+	_SYSCFG_RST_CH(stih407_core, SYSCFG_5000, _bit, SYSSTAT_5500, _bit)
+#define STIH407_PDN_1(_bit) \
+	_SYSCFG_RST_CH(stih407_core, SYSCFG_5001, _bit, SYSSTAT_5501, _bit)
+#define STIH407_PDN_ETH(_bit, _stat) \
+	_SYSCFG_RST_CH(stih407_sbc_reg, SYSCFG_4032, _bit, SYSSTAT_4520, _stat)
+
+/* Powerdown requests control 0 */
+#define SYSCFG_5000	0x0
+#define SYSSTAT_5500	0x7d0
+/* Powerdown requests control 1 (High Speed Links) */
+#define SYSCFG_5001	0x4
+#define SYSSTAT_5501	0x7d4
+
+/* Ethernet powerdown/status/reset */
+#define SYSCFG_4032	0x80
+#define SYSSTAT_4520	0x820
+#define SYSCFG_4002	0x8
+
+static const struct syscfg_reset_channel_data stih407_powerdowns[] = {
+	[STIH407_EMISS_POWERDOWN] = STIH407_PDN_0(1),
+	[STIH407_NAND_POWERDOWN] = STIH407_PDN_0(0),
+	[STIH407_USB3_POWERDOWN] = STIH407_PDN_1(6),
+	[STIH407_USB2_PORT1_POWERDOWN] = STIH407_PDN_1(5),
+	[STIH407_USB2_PORT0_POWERDOWN] = STIH407_PDN_1(4),
+	[STIH407_PCIE1_POWERDOWN] = STIH407_PDN_1(3),
+	[STIH407_PCIE0_POWERDOWN] = STIH407_PDN_1(2),
+	[STIH407_SATA1_POWERDOWN] = STIH407_PDN_1(1),
+	[STIH407_SATA0_POWERDOWN] = STIH407_PDN_1(0),
+	[STIH407_ETH1_POWERDOWN] = STIH407_PDN_ETH(0, 2),
+};
+
+/* Reset Generator control 0/1 */
+#define SYSCFG_5131	0x20c
+#define SYSCFG_5132	0x210
+
+#define LPM_SYSCFG_1	0x4	/* Softreset IRB & SBC UART */
+
+#define STIH407_SRST_CORE(_reg, _bit) \
+	_SYSCFG_RST_CH_NO_ACK(stih407_core, _reg, _bit)
+
+#define STIH407_SRST_SBC(_reg, _bit) \
+	_SYSCFG_RST_CH_NO_ACK(stih407_sbc_reg, _reg, _bit)
+
+#define STIH407_SRST_LPM(_reg, _bit) \
+	_SYSCFG_RST_CH_NO_ACK(stih407_lpm, _reg, _bit)
+
+static const struct syscfg_reset_channel_data stih407_softresets[] = {
+	[STIH407_ETH1_SOFTRESET] = STIH407_SRST_SBC(SYSCFG_4002, 4),
+	[STIH407_MMC1_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 3),
+	[STIH407_USB2_PORT0_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 28),
+	[STIH407_USB2_PORT1_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 29),
+	[STIH407_PICOPHY_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 30),
+	[STIH407_IRB_SOFTRESET] = STIH407_SRST_LPM(LPM_SYSCFG_1, 6),
+	[STIH407_PCIE0_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 6),
+	[STIH407_PCIE1_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 15),
+	[STIH407_SATA0_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 7),
+	[STIH407_SATA1_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 16),
+	[STIH407_MIPHY0_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 4),
+	[STIH407_MIPHY1_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 13),
+	[STIH407_MIPHY2_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 22),
+	[STIH407_SATA0_PWR_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 5),
+	[STIH407_SATA1_PWR_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 14),
+	[STIH407_DELTA_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 3),
+	[STIH407_BLITTER_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 10),
+	[STIH407_HDTVOUT_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 11),
+	[STIH407_HDQVDP_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 12),
+	[STIH407_VDP_AUX_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 14),
+	[STIH407_COMPO_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 15),
+	[STIH407_HDMI_TX_PHY_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 21),
+	[STIH407_JPEG_DEC_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 23),
+	[STIH407_VP8_DEC_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 24),
+	[STIH407_GPU_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5131, 30),
+	[STIH407_HVA_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 0),
+	[STIH407_ERAM_HVA_SOFTRESET] = STIH407_SRST_CORE(SYSCFG_5132, 1),
+	[STIH407_LPM_SOFTRESET] = STIH407_SRST_SBC(SYSCFG_4002, 2),
+	[STIH407_KEYSCAN_SOFTRESET] = STIH407_SRST_LPM(LPM_SYSCFG_1, 8),
+};
+
+/* PicoPHY reset/control */
+#define SYSCFG_5061	0x0f4
+
+static const struct syscfg_reset_channel_data stih407_picophyresets[] = {
+	[STIH407_PICOPHY0_RESET] = STIH407_SRST_CORE(SYSCFG_5061, 5),
+	[STIH407_PICOPHY1_RESET] = STIH407_SRST_CORE(SYSCFG_5061, 6),
+	[STIH407_PICOPHY2_RESET] = STIH407_SRST_CORE(SYSCFG_5061, 7),
+};
+
+static const struct syscfg_reset_controller_data stih407_powerdown_controller = {
+	.wait_for_ack = true,
+	.nr_channels = ARRAY_SIZE(stih407_powerdowns),
+	.channels = stih407_powerdowns,
+};
+
+static const struct syscfg_reset_controller_data stih407_softreset_controller = {
+	.wait_for_ack = false,
+	.active_low = true,
+	.nr_channels = ARRAY_SIZE(stih407_softresets),
+	.channels = stih407_softresets,
+};
+
+static const struct syscfg_reset_controller_data stih407_picophyreset_controller = {
+	.wait_for_ack = false,
+	.nr_channels = ARRAY_SIZE(stih407_picophyresets),
+	.channels = stih407_picophyresets,
+};
+
+static struct of_device_id stih407_reset_match[] = {
+	{
+		.compatible = "st,stih407-powerdown",
+		.data = &stih407_powerdown_controller,
+	},
+	{
+		.compatible = "st,stih407-softreset",
+		.data = &stih407_softreset_controller,
+	},
+	{
+		.compatible = "st,stih407-picophyreset",
+		.data = &stih407_picophyreset_controller,
+	},
+	{ /* sentinel */ },
+};
+
+static struct platform_driver stih407_reset_driver = {
+	.probe = syscfg_reset_probe,
+	.driver = {
+		.name = "reset-stih407",
+		.of_match_table = stih407_reset_match,
+	},
+};
+
+static int __init stih407_reset_init(void)
+{
+	return platform_driver_register(&stih407_reset_driver);
+}
+
+arch_initcall(stih407_reset_init);
-- 
cgit v0.10.2


From 4b171b3d3bdf256b0d75a6b3430d8b73134b1bf7 Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Wed, 2 Jul 2014 15:08:47 +0100
Subject: reset: sti: Document sti-picophyreset controllers bindings.

Add DT bindings documentation for sti-picophyreset controller.

Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>

diff --git a/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt b/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
new file mode 100644
index 0000000..54ae9f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
@@ -0,0 +1,42 @@
+STMicroelectronics STi family Sysconfig Picophy SoftReset Controller
+=============================================================================
+
+This binding describes a reset controller device that is used to enable and
+disable on-chip PicoPHY USB2 phy(s) using "softreset" control bits found in
+the STi family SoC system configuration registers.
+
+The actual action taken when softreset is asserted is hardware dependent.
+However, when asserted it may not be possible to access the hardware's
+registers and after an assert/deassert sequence the hardware's previous state
+may no longer be valid.
+
+Please refer to Documentation/devicetree/bindings/reset/reset.txt
+for common reset controller binding usage.
+
+Required properties:
+- compatible: Should be "st,stih407-picophyreset"
+- #reset-cells: 1, see below
+
+Example:
+
+	picophyreset: picophyreset-controller {
+		compatible = "st,stih407-picophyreset";
+		#reset-cells = <1>;
+	};
+
+Specifying picophyreset control of devices
+=======================================
+
+Device nodes should specify the reset channel required in their "resets"
+property, containing a phandle to the picophyreset device node and an
+index specifying which channel to use, as described in
+Documentation/devicetree/bindings/reset/reset.txt.
+
+Example:
+
+	usb2_picophy0: usbpicophy@0 {
+		resets = <&picophyreset STIH407_PICOPHY0_RESET>;
+	};
+
+Macro definitions for the supported reset channels can be found in:
+include/dt-bindings/reset-controller/stih407-resets.h
-- 
cgit v0.10.2


From 148bb0439adeeae466dca31dd4c643e194bee023 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 3 Nov 2014 10:21:54 -0800
Subject: soc: ti: knav_qmss_queue: Use list_for_each_entry_safe to prevent use
 after free

list_for_each_entry_safe() is necessary if list objects are deleted from
the list while traversing it.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Santosh Shilimkar <ssantosh@kernel.org>

diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index 6f22d56..9b8dd67 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1306,14 +1306,14 @@ static void knav_free_queue_ranges(struct knav_device *kdev)
 static void knav_queue_free_regions(struct knav_device *kdev)
 {
 	struct knav_region *region;
-	struct knav_pool *pool;
+	struct knav_pool *pool, *tmp;
 	unsigned size;
 
 	for (;;) {
 		region = first_region(kdev);
 		if (!region)
 			break;
-		list_for_each_entry(pool, &region->pools, region_inst)
+		list_for_each_entry_safe(pool, tmp, &region->pools, region_inst)
 			knav_pool_destroy(pool);
 
 		size = region->virt_end - region->virt_start;
-- 
cgit v0.10.2


From f200890f224d9ed0af207145a2279f51c6be230b Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@opensource.altera.com>
Date: Mon, 3 Nov 2014 16:33:05 -0600
Subject: reset: add socfpga_reset_status

Populate the reset_status callback for SOCFPGA.

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c
index 79c32ca..4058208 100644
--- a/drivers/reset/reset-socfpga.c
+++ b/drivers/reset/reset-socfpga.c
@@ -76,9 +76,24 @@ static int socfpga_reset_deassert(struct reset_controller_dev *rcdev,
 	return 0;
 }
 
+static int socfpga_reset_status(struct reset_controller_dev *rcdev,
+				unsigned long id)
+{
+	struct socfpga_reset_data *data = container_of(rcdev,
+						struct socfpga_reset_data, rcdev);
+	int bank = id / BITS_PER_LONG;
+	int offset = id % BITS_PER_LONG;
+	u32 reg;
+
+	reg = readl(data->membase + OFFSET_MODRST + (bank * NR_BANKS));
+
+	return !(reg & BIT(offset));
+}
+
 static struct reset_control_ops socfpga_reset_ops = {
 	.assert		= socfpga_reset_assert,
 	.deassert	= socfpga_reset_deassert,
+	.status		= socfpga_reset_status,
 };
 
 static int socfpga_reset_probe(struct platform_device *pdev)
-- 
cgit v0.10.2


From 315786ebbf4ad6552b6fd8e0e7b2ea220fcbfdbd Mon Sep 17 00:00:00 2001
From: Olav Haugan <ohaugan@codeaurora.org>
Date: Sat, 25 Oct 2014 09:55:16 -0700
Subject: iommu: Add iommu_map_sg() function

Mapping and unmapping are more often than not in the critical path.
map_sg allows IOMMU driver implementations to optimize the process
of mapping buffers into the IOMMU page tables.

Instead of mapping a buffer one page at a time and requiring potentially
expensive TLB operations for each page, this function allows the driver
to map all pages in one go and defer TLB maintenance until after all
pages have been mapped.

Additionally, the mapping operation would be faster in general since
clients does not have to keep calling map API over and over again for
each physically contiguous chunk of memory that needs to be mapped to a
virtually contiguous region.

Signed-off-by: Olav Haugan <ohaugan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 505a9ad..2d84c9e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3424,6 +3424,7 @@ static const struct iommu_ops amd_iommu_ops = {
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
 	.unmap = amd_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f7..e393ae0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1652,6 +1652,7 @@ static const struct iommu_ops arm_smmu_ops = {
 	.detach_dev	= arm_smmu_detach_dev,
 	.map		= arm_smmu_map,
 	.unmap		= arm_smmu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= arm_smmu_iova_to_phys,
 	.add_device	= arm_smmu_add_device,
 	.remove_device	= arm_smmu_remove_device,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 7423318..28372b8 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1178,6 +1178,7 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.detach_dev = exynos_iommu_detach_device,
 	.map = exynos_iommu_map,
 	.unmap = exynos_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
 	.add_device = exynos_iommu_add_device,
 	.remove_device = exynos_iommu_remove_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a27d6cb..02cd26a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4467,6 +4467,7 @@ static const struct iommu_ops intel_iommu_ops = {
 	.detach_dev	= intel_iommu_detach_device,
 	.map		= intel_iommu_map,
 	.unmap		= intel_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.add_device	= intel_iommu_add_device,
 	.remove_device	= intel_iommu_remove_device,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ed8b048..46727ce 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1124,6 +1124,31 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
+size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			 struct scatterlist *sg, unsigned int nents, int prot)
+{
+	int ret;
+	size_t mapped = 0;
+	unsigned int i;
+	struct scatterlist *s;
+
+	for_each_sg(sg, s, nents, i) {
+		phys_addr_t phys = page_to_phys(sg_page(s));
+		size_t page_len = s->offset + s->length;
+
+		ret = iommu_map(domain, iova + mapped, phys, page_len, prot);
+		if (ret) {
+			/* undo mappings already done */
+			iommu_unmap(domain, iova, mapped);
+			mapped = 0;
+			break;
+		}
+		mapped += page_len;
+	}
+
+	return mapped;
+}
+EXPORT_SYMBOL_GPL(default_iommu_map_sg);
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 			       phys_addr_t paddr, u64 size, int prot)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 7dab5cb..e509c58 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1127,6 +1127,7 @@ static const struct iommu_ops ipmmu_ops = {
 	.detach_dev = ipmmu_detach_device,
 	.map = ipmmu_map,
 	.unmap = ipmmu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = ipmmu_iova_to_phys,
 	.add_device = ipmmu_add_device,
 	.remove_device = ipmmu_remove_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 6e3dcc28..1c7b78e 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -681,6 +681,7 @@ static const struct iommu_ops msm_iommu_ops = {
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 };
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 3627887..18003c04 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1288,6 +1288,7 @@ static const struct iommu_ops omap_iommu_ops = {
 	.detach_dev	= omap_iommu_detach_dev,
 	.map		= omap_iommu_map,
 	.unmap		= omap_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= omap_iommu_iova_to_phys,
 	.add_device	= omap_iommu_add_device,
 	.remove_device	= omap_iommu_remove_device,
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
index 1333e6fb..f1b0077 100644
--- a/drivers/iommu/shmobile-iommu.c
+++ b/drivers/iommu/shmobile-iommu.c
@@ -361,6 +361,7 @@ static const struct iommu_ops shmobile_iommu_ops = {
 	.detach_dev = shmobile_iommu_detach_device,
 	.map = shmobile_iommu_map,
 	.unmap = shmobile_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = shmobile_iommu_iova_to_phys,
 	.add_device = shmobile_iommu_add_device,
 	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 3afdf43..73e845a 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -955,6 +955,7 @@ static const struct iommu_ops smmu_iommu_ops = {
 	.detach_dev	= smmu_iommu_detach_dev,
 	.map		= smmu_iommu_map,
 	.unmap		= smmu_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= smmu_iommu_iova_to_phys,
 	.pgsize_bitmap	= SMMU_IOMMU_PGSIZES,
 };
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e6a7c9f..b29a598 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -22,6 +22,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/types.h>
+#include <linux/scatterlist.h>
 #include <trace/events/iommu.h>
 
 #define IOMMU_READ	(1 << 0)
@@ -97,6 +98,8 @@ enum iommu_attr {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @map_sg: map a scatter-gather list of physically contiguous memory chunks
+ * to an iommu domain
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
@@ -114,6 +117,8 @@ struct iommu_ops {
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
+	size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova,
+			 struct scatterlist *sg, unsigned int nents, int prot);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
 	int (*add_device)(struct device *dev);
 	void (*remove_device)(struct device *dev);
@@ -156,6 +161,9 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, size_t size, int prot);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
+extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+				struct scatterlist *sg,unsigned int nents,
+				int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
@@ -241,6 +249,13 @@ static inline int report_iommu_fault(struct iommu_domain *domain,
 	return ret;
 }
 
+static inline size_t iommu_map_sg(struct iommu_domain *domain,
+				  unsigned long iova, struct scatterlist *sg,
+				  unsigned int nents, int prot)
+{
+	return domain->ops->map_sg(domain, iova, sg, nents, prot);
+}
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -293,6 +308,13 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	return -ENODEV;
 }
 
+static inline size_t iommu_map_sg(struct iommu_domain *domain,
+				  unsigned long iova, struct scatterlist *sg,
+				  unsigned int nents, int prot)
+{
+	return -ENODEV;
+}
+
 static inline int iommu_domain_window_enable(struct iommu_domain *domain,
 					     u32 wnd_nr, phys_addr_t paddr,
 					     u64 size, int prot)
-- 
cgit v0.10.2


From 38ec010d9b04ed94845f8ff6f10d33eb6bbfe180 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 4 Nov 2014 14:53:51 +0100
Subject: iommu: Do more input validation in iommu_map_sg()

The IOMMU-API works on page boundarys, unlike the DMA-API
which can work with sub-page buffers. The sg->offset
field does not make sense on the IOMMU level, so force it to
be 0. Do some error-path consolidation while at it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 46727ce..08c53c5 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1127,26 +1127,33 @@ EXPORT_SYMBOL_GPL(iommu_unmap);
 size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			 struct scatterlist *sg, unsigned int nents, int prot)
 {
-	int ret;
+	struct scatterlist *s;
 	size_t mapped = 0;
 	unsigned int i;
-	struct scatterlist *s;
+	int ret;
 
 	for_each_sg(sg, s, nents, i) {
 		phys_addr_t phys = page_to_phys(sg_page(s));
-		size_t page_len = s->offset + s->length;
 
-		ret = iommu_map(domain, iova + mapped, phys, page_len, prot);
-		if (ret) {
-			/* undo mappings already done */
-			iommu_unmap(domain, iova, mapped);
-			mapped = 0;
-			break;
-		}
-		mapped += page_len;
+		/* We are mapping on page boundarys, so offset must be 0 */
+		if (s->offset)
+			goto out_err;
+
+		ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
+		if (ret)
+			goto out_err;
+
+		mapped += s->length;
 	}
 
 	return mapped;
+
+out_err:
+	/* undo mappings already done */
+	iommu_unmap(domain, iova, mapped);
+
+	return 0;
+
 }
 EXPORT_SYMBOL_GPL(default_iommu_map_sg);
 
-- 
cgit v0.10.2


From d7da6bdc322bb79c4326dff7c2727236a48c4be9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Heiko=20St=C3=BCbner?= <heiko@sntech.de>
Date: Wed, 29 Oct 2014 01:22:56 +0100
Subject: iommu: Improve error handling when setting bus iommu

When some part of bus_set_iommu fails it should undo any made changes
and not simply leave everything as is.

This includes unregistering the bus notifier in iommu_bus_init when
add_iommu_group fails and also setting the bus->iommu_ops back to NULL.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 08c53c5..02e4313 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -818,7 +818,15 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
 		kfree(nb);
 		return err;
 	}
-	return bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+
+	err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+	if (err) {
+		bus_unregister_notifier(bus, nb);
+		kfree(nb);
+		return err;
+	}
+
+	return 0;
 }
 
 /**
@@ -836,13 +844,19 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
  */
 int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
 {
+	int err;
+
 	if (bus->iommu_ops != NULL)
 		return -EBUSY;
 
 	bus->iommu_ops = ops;
 
 	/* Do IOMMU specific setup for this bus-type */
-	return iommu_bus_init(bus, ops);
+	err = iommu_bus_init(bus, ops);
+	if (err)
+		bus->iommu_ops = NULL;
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(bus_set_iommu);
 
-- 
cgit v0.10.2


From 126e31faa12c0d40c3b603adb9ac6d72dd424860 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 8 Nov 2014 10:30:32 +1100
Subject: w1: omap-hdq: support device probing with device-tree

This driver has no 'compatible' string and so is not found when
using device-tree.

Add one with value to match
		hdqw1w: 1w@480b2000 {
device in omap3.dtsi.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/Documentation/devicetree/bindings/w1/omap-hdq.txt b/Documentation/devicetree/bindings/w1/omap-hdq.txt
new file mode 100644
index 0000000..fef7947
--- /dev/null
+++ b/Documentation/devicetree/bindings/w1/omap-hdq.txt
@@ -0,0 +1,17 @@
+* OMAP HDQ One wire bus master controller
+
+Required properties:
+- compatible : should be "ti,omap3-1w"
+- reg : Address and length of the register set for the device
+- interrupts : interrupt line.
+- ti,hwmods : "hdq1w"
+
+Example:
+
+- From omap3.dtsi
+  hdqw1w: 1w@480b2000 {
+	compatible = "ti,omap3-1w";
+	reg = <0x480b2000 0x1000>;
+	interrupts = <58>;
+	ti,hwmods = "hdq1w";
+  };
diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c
index 9900e8e..03321d6 100644
--- a/drivers/w1/masters/omap_hdq.c
+++ b/drivers/w1/masters/omap_hdq.c
@@ -72,11 +72,18 @@ struct hdq_data {
 static int omap_hdq_probe(struct platform_device *pdev);
 static int omap_hdq_remove(struct platform_device *pdev);
 
+static struct of_device_id omap_hdq_dt_ids[] = {
+	{ .compatible = "ti,omap3-1w" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, omap_hdq_dt_ids);
+
 static struct platform_driver omap_hdq_driver = {
 	.probe =	omap_hdq_probe,
 	.remove =	omap_hdq_remove,
 	.driver =	{
 		.name =	"omap_hdq",
+		.of_match_table = omap_hdq_dt_ids,
 	},
 };
 
-- 
cgit v0.10.2


From 61b43d4e919e8fa5e10c77ee32ba328da07e0264 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 10 Nov 2014 23:49:47 +0530
Subject: bus: omap_l3_noc: Add resume hook to restore context

On certain SoCs such as AM437x SoC, L3_noc error registers are
maintained in power domain such as per domain which looses context as part
of low power state such as RTC+DDR mode. On these platforms when we
mask interrupts which we cannot handle, the source of these interrupts
still remain on resume, however, the flag mux registers now contain
their reset value (unmasked) - this breaks the system with infinite
interrupts since we do not these interrupts to take place ever again.

To handle this: restore the masking of interrupts which we have
already recorded in the system as ones we cannot handle.

Fixes: 2100b595b7 ("bus: omap_l3_noc: ignore masked out unclearable targets")
Acked-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/bus/omap_l3_noc.c b/drivers/bus/omap_l3_noc.c
index 531ae59..b5eac29 100644
--- a/drivers/bus/omap_l3_noc.c
+++ b/drivers/bus/omap_l3_noc.c
@@ -296,11 +296,66 @@ static int omap_l3_probe(struct platform_device *pdev)
 	return ret;
 }
 
+#ifdef	CONFIG_PM
+
+/**
+ * l3_resume_noirq() - resume function for l3_noc
+ * @dev:	pointer to l3_noc device structure
+ *
+ * We only have the resume handler only since we
+ * have already maintained the delta register
+ * configuration as part of configuring the system
+ */
+static int l3_resume_noirq(struct device *dev)
+{
+	struct omap_l3 *l3 = dev_get_drvdata(dev);
+	int i;
+	struct l3_flagmux_data *flag_mux;
+	void __iomem *base, *mask_regx = NULL;
+	u32 mask_val;
+
+	for (i = 0; i < l3->num_modules; i++) {
+		base = l3->l3_base[i];
+		flag_mux = l3->l3_flagmux[i];
+		if (!flag_mux->mask_app_bits && !flag_mux->mask_dbg_bits)
+			continue;
+
+		mask_regx = base + flag_mux->offset + L3_FLAGMUX_MASK0 +
+			   (L3_APPLICATION_ERROR << 3);
+		mask_val = readl_relaxed(mask_regx);
+		mask_val &= ~(flag_mux->mask_app_bits);
+
+		writel_relaxed(mask_val, mask_regx);
+		mask_regx = base + flag_mux->offset + L3_FLAGMUX_MASK0 +
+			   (L3_DEBUG_ERROR << 3);
+		mask_val = readl_relaxed(mask_regx);
+		mask_val &= ~(flag_mux->mask_dbg_bits);
+
+		writel_relaxed(mask_val, mask_regx);
+	}
+
+	/* Dummy read to force OCP barrier */
+	if (mask_regx)
+		(void)readl(mask_regx);
+
+	return 0;
+}
+
+static const struct dev_pm_ops l3_dev_pm_ops = {
+	.resume_noirq		= l3_resume_noirq,
+};
+
+#define L3_DEV_PM_OPS (&l3_dev_pm_ops)
+#else
+#define L3_DEV_PM_OPS NULL
+#endif
+
 static struct platform_driver omap_l3_driver = {
 	.probe		= omap_l3_probe,
 	.driver		= {
 		.name		= "omap_l3_noc",
 		.owner		= THIS_MODULE,
+		.pm		= L3_DEV_PM_OPS,
 		.of_match_table = of_match_ptr(l3_noc_match),
 	},
 };
-- 
cgit v0.10.2


From c4cf0935a2d8fe6d186bf4253ea3c4b4a8a8a710 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 10 Nov 2014 23:49:48 +0530
Subject: bus: omap_l3_noc: Correct returning IRQ_HANDLED unconditionally in
 the irq handler

Correct returning IRQ_HANDLED unconditionally in the irq handler.
Return IRQ_NONE for some interrupt which we do not expect to be
handled in this handler. This prevents kernel stalling with back
to back spurious interrupts.

Fixes: 2722e56de6 ("OMAP4: l3: Introduce l3-interconnect error handling driver")
Acked-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/drivers/bus/omap_l3_noc.c b/drivers/bus/omap_l3_noc.c
index b5eac29..17d8659 100644
--- a/drivers/bus/omap_l3_noc.c
+++ b/drivers/bus/omap_l3_noc.c
@@ -222,10 +222,14 @@ static irqreturn_t l3_interrupt_handler(int irq, void *_l3)
 			}
 
 			/* Error found so break the for loop */
-			break;
+			return IRQ_HANDLED;
 		}
 	}
-	return IRQ_HANDLED;
+
+	dev_err(l3->dev, "L3 %s IRQ not handled!!\n",
+		inttype ? "debug" : "application");
+
+	return IRQ_NONE;
 }
 
 static const struct of_device_id l3_noc_match[] = {
-- 
cgit v0.10.2


From 6575bd7cbcc40dcdffb62e710ab2cd05355396c6 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:13:29 +0200
Subject: rtc: at91sam9: remove references to mach specific headers

In order to support multi platform kernel drivers should not include
machine specific headers.
Copy RTT macros in the driver code and remove any machine specific
headers.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 5963743..51f0038 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -22,10 +22,6 @@
 #include <linux/platform_data/atmel.h>
 #include <linux/io.h>
 
-#include <mach/at91_rtt.h>
-#include <mach/cpu.h>
-#include <mach/hardware.h>
-
 /*
  * This driver uses two configurable hardware resources that live in the
  * AT91SAM9 backup power domain (intended to be powered at all times)
@@ -47,6 +43,24 @@
  * registers available, likewise usable for more than "RTC" support.
  */
 
+#define AT91_RTT_MR		0x00			/* Real-time Mode Register */
+#define AT91_RTT_RTPRES		(0xffff << 0)		/* Real-time Timer Prescaler Value */
+#define AT91_RTT_ALMIEN		(1 << 16)		/* Alarm Interrupt Enable */
+#define AT91_RTT_RTTINCIEN	(1 << 17)		/* Real Time Timer Increment Interrupt Enable */
+#define AT91_RTT_RTTRST		(1 << 18)		/* Real Time Timer Restart */
+
+#define AT91_RTT_AR		0x04			/* Real-time Alarm Register */
+#define AT91_RTT_ALMV		(0xffffffff)		/* Alarm Value */
+
+#define AT91_RTT_VR		0x08			/* Real-time Value Register */
+#define AT91_RTT_CRTV		(0xffffffff)		/* Current Real-time Value */
+
+#define AT91_RTT_SR		0x0c			/* Real-time Status Register */
+#define AT91_RTT_ALMS		(1 << 0)		/* Real-time Alarm Status */
+#define AT91_RTT_RTTINC		(1 << 1)		/* Real-time Timer Increment */
+
+#define AT91_SLOW_CLOCK		32768
+
 /*
  * We store ALARM_DISABLED in ALMV to record that no alarm is set.
  * It's also the reset value for that field.
-- 
cgit v0.10.2


From 272f1dfa617ac2eb02a34f534d8d24a6b97eeb08 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:13:52 +0200
Subject: rtc: at91sam9: use standard readl/writel functions instead of raw
 versions

Raw versions of writel and writel should not be directly used and should
be replaced by their relaxed versions (readl/writel_relaxed), which take
endianness conversion into account.

In this driver we prefer the standard readl/writel function which add the
appropriate memory barrier around the access (the performance penalty is
negligible for this kind of application).

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 51f0038..74a9ca0 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -77,14 +77,14 @@ struct sam9_rtc {
 };
 
 #define rtt_readl(rtc, field) \
-	__raw_readl((rtc)->rtt + AT91_RTT_ ## field)
+	readl((rtc)->rtt + AT91_RTT_ ## field)
 #define rtt_writel(rtc, field, val) \
-	__raw_writel((val), (rtc)->rtt + AT91_RTT_ ## field)
+	writel((val), (rtc)->rtt + AT91_RTT_ ## field)
 
 #define gpbr_readl(rtc) \
-	__raw_readl((rtc)->gpbr)
+	readl((rtc)->gpbr)
 #define gpbr_writel(rtc, val) \
-	__raw_writel((val), (rtc)->gpbr)
+	writel((val), (rtc)->gpbr)
 
 /*
  * Read current time and date in RTC
-- 
cgit v0.10.2


From d41da3ee1a9c46d175ca4cdca369f35c35f89cdc Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:14:09 +0200
Subject: rtc: at91sam9: replace devm_ioremap by devm_ioremap_resource

Replace devm_ioremap calls by devm_ioremap_resource which already check
resource consistency (resource != NULL) and print an error in case of
failure.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 74a9ca0..38a2693 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -306,18 +306,11 @@ static const struct rtc_class_ops at91_rtc_ops = {
  */
 static int at91_rtc_probe(struct platform_device *pdev)
 {
-	struct resource	*r, *r_gpbr;
+	struct resource	*r;
 	struct sam9_rtc	*rtc;
 	int		ret, irq;
 	u32		mr;
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	r_gpbr = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!r || !r_gpbr) {
-		dev_err(&pdev->dev, "need 2 ressources\n");
-		return -ENODEV;
-	}
-
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(&pdev->dev, "failed to get interrupt resource\n");
@@ -335,18 +328,16 @@ static int at91_rtc_probe(struct platform_device *pdev)
 		device_init_wakeup(&pdev->dev, 1);
 
 	platform_set_drvdata(pdev, rtc);
-	rtc->rtt = devm_ioremap(&pdev->dev, r->start, resource_size(r));
-	if (!rtc->rtt) {
-		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
-		return -ENOMEM;
-	}
 
-	rtc->gpbr = devm_ioremap(&pdev->dev, r_gpbr->start,
-				resource_size(r_gpbr));
-	if (!rtc->gpbr) {
-		dev_err(&pdev->dev, "failed to map gpbr registers, aborting.\n");
-		return -ENOMEM;
-	}
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rtc->rtt = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(rtc->rtt))
+		return PTR_ERR(rtc->rtt);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	rtc->gpbr = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(rtc->gpbr))
+		return PTR_ERR(rtc->rtt);
 
 	mr = rtt_readl(rtc, MR);
 
-- 
cgit v0.10.2


From 07d4d72450ef9bf317dff3d9f3bcad8d1f220f58 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:14:24 +0200
Subject: rtc: at91sam9: add DT support

Add of_match_table to the existing driver so that rtt nodes defined in at91
DTs can be attached to this driver.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 38a2693..d72c34d 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -445,6 +445,14 @@ static int at91_rtc_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume);
 
+#ifdef CONFIG_OF
+static const struct of_device_id at91_rtc_dt_ids[] = {
+	{ .compatible = "atmel,at91sam9260-rtt" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, at91_rtc_dt_ids);
+#endif
+
 static struct platform_driver at91_rtc_driver = {
 	.probe		= at91_rtc_probe,
 	.remove		= at91_rtc_remove,
@@ -453,6 +461,7 @@ static struct platform_driver at91_rtc_driver = {
 		.name	= "rtc-at91sam9",
 		.owner	= THIS_MODULE,
 		.pm	= &at91_rtc_pm_ops,
+		.of_match_table = of_match_ptr(at91_rtc_dt_ids),
 	},
 };
 
-- 
cgit v0.10.2


From 43e112bb3dea87f392871220fcde175c814e26ca Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:14:44 +0200
Subject: rtc: at91sam9: make use of syscon/regmap to access GPBR registers

The GPBR registers are not part of the RTT block and thus should not be
defined in the reg property of the rtt node.

Use syscon to provide a proper DT representation and reference the GPBR
syscon device in a new "atmel,rtt-rtc-time-reg" property which store both
the syscon device phandle and the register offset within the GPBR block.

When using non DT boards, we won't be able to retrieve the syscon regmap,
hence we need to create our own regmap using the memory region defined
in the 2nd memory resource assigned to the RTT platform device.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 94ae179..7b1f592 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1111,6 +1111,7 @@ config RTC_DRV_AT91RM9200
 config RTC_DRV_AT91SAM9
 	tristate "AT91SAM9x/AT91CAP9 RTT as RTC"
 	depends on ARCH_AT91 && !(ARCH_AT91RM9200 || ARCH_AT91X40)
+	select MFD_SYSCON
 	help
 	  RTC driver for the Atmel AT91SAM9x and AT91CAP9 internal RTT
 	  (Real Time Timer). These timers are powered by the backup power
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index d72c34d..be9c28b 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -21,6 +21,8 @@
 #include <linux/slab.h>
 #include <linux/platform_data/atmel.h>
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 /*
  * This driver uses two configurable hardware resources that live in the
@@ -72,7 +74,8 @@ struct sam9_rtc {
 	void __iomem		*rtt;
 	struct rtc_device	*rtcdev;
 	u32			imr;
-	void __iomem		*gpbr;
+	struct regmap		*gpbr;
+	unsigned int		gpbr_offset;
 	int 			irq;
 };
 
@@ -81,10 +84,19 @@ struct sam9_rtc {
 #define rtt_writel(rtc, field, val) \
 	writel((val), (rtc)->rtt + AT91_RTT_ ## field)
 
-#define gpbr_readl(rtc) \
-	readl((rtc)->gpbr)
-#define gpbr_writel(rtc, val) \
-	writel((val), (rtc)->gpbr)
+static inline unsigned int gpbr_readl(struct sam9_rtc *rtc)
+{
+	unsigned int val;
+
+	regmap_read(rtc->gpbr, rtc->gpbr_offset, &val);
+
+	return val;
+}
+
+static inline void gpbr_writel(struct sam9_rtc *rtc, unsigned int val)
+{
+	regmap_write(rtc->gpbr, rtc->gpbr_offset, val);
+}
 
 /*
  * Read current time and date in RTC
@@ -301,6 +313,12 @@ static const struct rtc_class_ops at91_rtc_ops = {
 	.alarm_irq_enable = at91_rtc_alarm_irq_enable,
 };
 
+static struct regmap_config gpbr_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+};
+
 /*
  * Initialize and install RTC driver
  */
@@ -334,10 +352,38 @@ static int at91_rtc_probe(struct platform_device *pdev)
 	if (IS_ERR(rtc->rtt))
 		return PTR_ERR(rtc->rtt);
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	rtc->gpbr = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(rtc->gpbr))
-		return PTR_ERR(rtc->rtt);
+	if (!pdev->dev.of_node) {
+		/*
+		 * TODO: Remove this code chunk when removing non DT board
+		 * support. Remember to remove the gpbr_regmap_config
+		 * variable too.
+		 */
+		void __iomem *gpbr;
+
+		r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		gpbr = devm_ioremap_resource(&pdev->dev, r);
+		if (IS_ERR(gpbr))
+			return PTR_ERR(gpbr);
+
+		rtc->gpbr = regmap_init_mmio(NULL, gpbr,
+					     &gpbr_regmap_config);
+	} else {
+		struct of_phandle_args args;
+
+		ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node,
+						"atmel,rtt-rtc-time-reg", 1, 0,
+						&args);
+		if (ret)
+			return ret;
+
+		rtc->gpbr = syscon_node_to_regmap(args.np);
+		rtc->gpbr_offset = args.args[0];
+	}
+
+	if (IS_ERR(rtc->gpbr)) {
+		dev_err(&pdev->dev, "failed to retrieve gpbr regmap, aborting.\n");
+		return -ENOMEM;
+	}
 
 	mr = rtt_readl(rtc, MR);
 
-- 
cgit v0.10.2


From 3969eb48ad7f3f3bef61f5474b7214e601fd2d75 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:16:05 +0200
Subject: rtc: at91sam9: rework the Kconfig description

Remove all references to AT91CAP9 SoC which has been removed.
Rework help message to remove any specific references to AT91SAM9 SoCs.
State that RTC_DRV_AT91SAM9_RTT and RTC_DRV_AT91SAM9_GPBR options are only
used when booting non DT boards.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 7b1f592..5bdd652 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1109,17 +1109,18 @@ config RTC_DRV_AT91RM9200
 	  this is powered by the backup power supply.
 
 config RTC_DRV_AT91SAM9
-	tristate "AT91SAM9x/AT91CAP9 RTT as RTC"
+	tristate "AT91SAM9 RTT as RTC"
 	depends on ARCH_AT91 && !(ARCH_AT91RM9200 || ARCH_AT91X40)
 	select MFD_SYSCON
 	help
-	  RTC driver for the Atmel AT91SAM9x and AT91CAP9 internal RTT
-	  (Real Time Timer). These timers are powered by the backup power
-	  supply (such as a small coin cell battery), but do not need to
-	  be used as RTCs.
-
-	  (On AT91SAM9rl and AT91SAM9G45 chips you probably want to use the
-	  dedicated RTC module and leave the RTT available for other uses.)
+	  Some AT91SAM9 SoCs provide an RTT (Real Time Timer) block which
+	  can be used as an RTC thanks to the backup power supply (e.g. a
+	  small coin cell battery) which keeps this block and the GPBR
+	  (General Purpose Backup Registers) block powered when the device
+	  is shutdown.
+	  Some AT91SAM9 SoCs provide a real RTC block, on those ones you'd
+	  probably want to use the real RTC block instead of the "RTT as an
+	  RTC" driver.
 
 config RTC_DRV_AT91SAM9_RTT
 	int
@@ -1128,6 +1129,9 @@ config RTC_DRV_AT91SAM9_RTT
 	prompt "RTT module Number" if ARCH_AT91SAM9263
 	depends on RTC_DRV_AT91SAM9
 	help
+	  This option is only relevant for legacy board support and
+	  won't be used when booting a DT board.
+
 	  More than one RTT module is available. You can choose which
 	  one will be used as an RTC. The default of zero is normally
 	  OK to use, though some systems use that for non-RTC purposes.
@@ -1140,6 +1144,9 @@ config RTC_DRV_AT91SAM9_GPBR
 	prompt "Backup Register Number"
 	depends on RTC_DRV_AT91SAM9
 	help
+	  This option is only relevant for legacy board support and
+	  won't be used when booting a DT board.
+
 	  The RTC driver needs to use one of the General Purpose Backup
 	  Registers (GPBRs) as well as the RTT. You can choose which one
 	  will be used. The default of zero is normally OK to use, but
-- 
cgit v0.10.2


From a98250217997f3b8d47d21ffaedd39b32d1798eb Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 13:18:33 +0200
Subject: ARM: at91: add clk_lookup entry for RTT devices

First export the clk32k clk.
Then add clk_lookup entries for RTT devices so that rtc-at91sam9 driver
can retrieve and manipulate the slow clk.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/arch/arm/mach-at91/at91sam9260.c b/arch/arm/mach-at91/at91sam9260.c
index aab1f96..990b82f 100644
--- a/arch/arm/mach-at91/at91sam9260.c
+++ b/arch/arm/mach-at91/at91sam9260.c
@@ -217,6 +217,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("pclk", "fffbc000.ssc", &ssc_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9260.0", &twi_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9g20.0", &twi_clk),
+	CLKDEV_CON_DEV_ID(NULL, "rtc-at91sam9.0", &clk32k),
 	/* more usart lookup table for DT entries */
 	CLKDEV_CON_DEV_ID("usart", "fffff200.serial", &mck),
 	CLKDEV_CON_DEV_ID("usart", "fffb0000.serial", &usart0_clk),
@@ -237,6 +238,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("mci_clk", "fffa8000.mmc", &mmc_clk),
 	CLKDEV_CON_DEV_ID("spi_clk", "fffc8000.spi", &spi0_clk),
 	CLKDEV_CON_DEV_ID("spi_clk", "fffcc000.spi", &spi1_clk),
+	CLKDEV_CON_DEV_ID(NULL, "fffffd20.rtc", &clk32k),
 	/* fake hclk clock */
 	CLKDEV_CON_DEV_ID("hclk", "at91_ohci", &ohci_clk),
 	CLKDEV_CON_ID("pioA", &pioA_clk),
diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c
index a8bd359..a23b3cf 100644
--- a/arch/arm/mach-at91/at91sam9261.c
+++ b/arch/arm/mach-at91/at91sam9261.c
@@ -192,6 +192,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_ID("pioA", &pioA_clk),
 	CLKDEV_CON_ID("pioB", &pioB_clk),
 	CLKDEV_CON_ID("pioC", &pioC_clk),
+	CLKDEV_CON_DEV_ID(NULL, "rtc-at91sam9.0", &clk32k),
 	/* more lookup table for DT entries */
 	CLKDEV_CON_DEV_ID("usart", "fffff200.serial", &mck),
 	CLKDEV_CON_DEV_ID("usart", "fffb0000.serial", &usart0_clk),
@@ -209,6 +210,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID(NULL, "fffff400.gpio", &pioA_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffff600.gpio", &pioB_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffff800.gpio", &pioC_clk),
+	CLKDEV_CON_DEV_ID(NULL, "fffffd20.rtc", &clk32k),
 };
 
 static struct clk_lookup usart_clocks_lookups[] = {
diff --git a/arch/arm/mach-at91/at91sam9263.c b/arch/arm/mach-at91/at91sam9263.c
index fbff228..1082fd4 100644
--- a/arch/arm/mach-at91/at91sam9263.c
+++ b/arch/arm/mach-at91/at91sam9263.c
@@ -201,6 +201,8 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tcb_clk),
 	CLKDEV_CON_DEV_ID(NULL, "i2c-at91sam9260.0", &twi_clk),
 	CLKDEV_CON_DEV_ID(NULL, "at91sam9rl-pwm", &pwm_clk),
+	CLKDEV_CON_DEV_ID(NULL, "rtc-at91sam9.0", &clk32k),
+	CLKDEV_CON_DEV_ID(NULL, "rtc-at91sam9.1", &clk32k),
 	/* fake hclk clock */
 	CLKDEV_CON_DEV_ID("hclk", "at91_ohci", &ohci_clk),
 	CLKDEV_CON_ID("pioA", &pioA_clk),
@@ -227,6 +229,8 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID(NULL, "fffff800.gpio", &pioCDE_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffffa00.gpio", &pioCDE_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffb8000.pwm", &pwm_clk),
+	CLKDEV_CON_DEV_ID(NULL, "fffffd20.rtc", &clk32k),
+	CLKDEV_CON_DEV_ID(NULL, "fffffd50.rtc", &clk32k),
 };
 
 static struct clk_lookup usart_clocks_lookups[] = {
diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c
index 405427e..9c4c4ce 100644
--- a/arch/arm/mach-at91/at91sam9g45.c
+++ b/arch/arm/mach-at91/at91sam9g45.c
@@ -254,6 +254,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID(NULL, "atmel_tdes", &aestdessha_clk),
 	CLKDEV_CON_DEV_ID(NULL, "atmel_aes", &aestdessha_clk),
 	CLKDEV_CON_DEV_ID(NULL, "at91sam9rl-pwm", &pwm_clk),
+	CLKDEV_CON_DEV_ID(NULL, "rtc-at91sam9.0", &clk32k),
 	/* more usart lookup table for DT entries */
 	CLKDEV_CON_DEV_ID("usart", "ffffee00.serial", &mck),
 	CLKDEV_CON_DEV_ID("usart", "fff8c000.serial", &usart0_clk),
@@ -280,6 +281,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID(NULL, "fffff600.gpio", &pioC_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffff800.gpio", &pioDE_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffffa00.gpio", &pioDE_clk),
+	CLKDEV_CON_DEV_ID(NULL, "fffffd20.rtc", &clk32k),
 
 	CLKDEV_CON_ID("pioA", &pioA_clk),
 	CLKDEV_CON_ID("pioB", &pioB_clk),
diff --git a/arch/arm/mach-at91/at91sam9rl.c b/arch/arm/mach-at91/at91sam9rl.c
index f553e4e..40c815c 100644
--- a/arch/arm/mach-at91/at91sam9rl.c
+++ b/arch/arm/mach-at91/at91sam9rl.c
@@ -205,6 +205,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_ID("pioB", &pioB_clk),
 	CLKDEV_CON_ID("pioC", &pioC_clk),
 	CLKDEV_CON_ID("pioD", &pioD_clk),
+	CLKDEV_CON_DEV_ID(NULL, "rtc-at91sam9.0", &clk32k),
 	/* more lookup table for DT entries */
 	CLKDEV_CON_DEV_ID("usart", "fffff200.serial", &mck),
 	CLKDEV_CON_DEV_ID("usart", "fffb0000.serial", &usart0_clk),
@@ -223,6 +224,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID(NULL, "fffff600.gpio", &pioB_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffff800.gpio", &pioC_clk),
 	CLKDEV_CON_DEV_ID(NULL, "fffffa00.gpio", &pioD_clk),
+	CLKDEV_CON_DEV_ID(NULL, "fffffd20.rtc", &clk32k),
 	CLKDEV_CON_ID("adc_clk", &tsc_clk),
 };
 
diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index d66f102..f569e48 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -115,7 +115,7 @@ static u32 at91_pllb_usb_init;
  * 48 MHz (unless no USB function clocks are needed).  The main clock and
  * both PLLs are turned off to run in "slow clock mode" (system suspend).
  */
-static struct clk clk32k = {
+struct clk clk32k = {
 	.name		= "clk32k",
 	.rate_hz	= AT91_SLOW_CLOCK,
 	.users		= 1,		/* always on */
diff --git a/arch/arm/mach-at91/clock.h b/arch/arm/mach-at91/clock.h
index a98a39b..6eb825a 100644
--- a/arch/arm/mach-at91/clock.h
+++ b/arch/arm/mach-at91/clock.h
@@ -34,6 +34,7 @@ struct clk {
 extern int __init clk_register(struct clk *clk);
 extern struct clk mck;
 extern struct clk utmi_clk;
+extern struct clk clk32k;
 
 #define CLKDEV_CON_ID(_id, _clk)			\
 	{						\
-- 
cgit v0.10.2


From a975f47f6e9a4e9762c81973cf2305003aa5b7dc Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 16:41:07 +0200
Subject: rtc: at91sam9: use clk API instead of relying on AT91_SLOW_CLOCK

The RTT block is using the slow clock which is accessible through the clk
API.
Use the clk API to retrieve, enable and get the slow clk rate instead of
the AT91_SLOW_CLOCK macro (which hardcodes the slow clk rate).
Doing this allows us to reference the clk thus preventing the CCF from
disabling it during the "disable unused" phase.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index be9c28b..abac38a 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -23,6 +23,7 @@
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
+#include <linux/clk.h>
 
 /*
  * This driver uses two configurable hardware resources that live in the
@@ -61,8 +62,6 @@
 #define AT91_RTT_ALMS		(1 << 0)		/* Real-time Alarm Status */
 #define AT91_RTT_RTTINC		(1 << 1)		/* Real-time Timer Increment */
 
-#define AT91_SLOW_CLOCK		32768
-
 /*
  * We store ALARM_DISABLED in ALMV to record that no alarm is set.
  * It's also the reset value for that field.
@@ -77,6 +76,7 @@ struct sam9_rtc {
 	struct regmap		*gpbr;
 	unsigned int		gpbr_offset;
 	int 			irq;
+	struct clk		*sclk;
 };
 
 #define rtt_readl(rtc, field) \
@@ -328,6 +328,7 @@ static int at91_rtc_probe(struct platform_device *pdev)
 	struct sam9_rtc	*rtc;
 	int		ret, irq;
 	u32		mr;
+	unsigned int	sclk_rate;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
@@ -385,11 +386,27 @@ static int at91_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	rtc->sclk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(rtc->sclk))
+		return PTR_ERR(rtc->sclk);
+
+	sclk_rate = clk_get_rate(rtc->sclk);
+	if (!sclk_rate || sclk_rate > AT91_RTT_RTPRES) {
+		dev_err(&pdev->dev, "Invalid slow clock rate\n");
+		return -EINVAL;
+	}
+
+	ret = clk_prepare_enable(rtc->sclk);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not enable slow clock\n");
+		return ret;
+	}
+
 	mr = rtt_readl(rtc, MR);
 
 	/* unless RTT is counting at 1 Hz, re-initialize it */
-	if ((mr & AT91_RTT_RTPRES) != AT91_SLOW_CLOCK) {
-		mr = AT91_RTT_RTTRST | (AT91_SLOW_CLOCK & AT91_RTT_RTPRES);
+	if ((mr & AT91_RTT_RTPRES) != sclk_rate) {
+		mr = AT91_RTT_RTTRST | (sclk_rate & AT91_RTT_RTPRES);
 		gpbr_writel(rtc, 0);
 	}
 
@@ -434,6 +451,9 @@ static int at91_rtc_remove(struct platform_device *pdev)
 	/* disable all interrupts */
 	rtt_writel(rtc, MR, mr & ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN));
 
+	if (!IS_ERR(rtc->sclk))
+		clk_disable_unprepare(rtc->sclk);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 2c91e61dc97cce57ffd9dd654a6ee284e1f45a1f Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Tue, 23 Sep 2014 16:45:12 +0200
Subject: rtc: at91sam9: add DT bindings documentation

Add RTT bindings documentation.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Johan Hovold <johan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt
new file mode 100644
index 0000000..6ae79d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt
@@ -0,0 +1,23 @@
+Atmel AT91SAM9260 Real Time Timer
+
+Required properties:
+- compatible: should be: "atmel,at91sam9260-rtt"
+- reg: should encode the memory region of the RTT controller
+- interrupts: rtt alarm/event interrupt
+- clocks: should contain the 32 KHz slow clk that will drive the RTT block.
+- atmel,rtt-rtc-time-reg: should encode the GPBR register used to store
+	the time base when the RTT is used as an RTC.
+	The first cell should point to the GPBR node and the second one
+	encode the offset within the GPBR block (or in other words, the
+	GPBR register used to store the time base).
+
+
+Example:
+
+rtt@fffffd20 {
+	compatible = "atmel,at91sam9260-rtt";
+	reg = <0xfffffd20 0x10>;
+	interrupts = <1 4 7>;
+	clocks = <&clk32k>;
+	atmel,rtt-rtc-time-reg = <&gpbr 0x0>;
+};
-- 
cgit v0.10.2


From 0690cbd2e55a72a8eae557c389d1a136ed9fa142 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 5 Nov 2014 15:28:30 +0100
Subject: powerpc/iommu: Rename iommu_[un]map_sg functions

The IOMMU-API gained support for a new iommu_map_sg
function. This causes compile failures on powerpc because
the function name is already globally used there.
This patch renames adds a ppc_ prefix to these functions to
solve the compile problem.

Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 42632c7..9cfa370 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -137,13 +137,16 @@ static inline void set_iommu_table_base_and_group(struct device *dev,
 	iommu_add_device(dev);
 }
 
-extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
-			struct scatterlist *sglist, int nelems,
-			unsigned long mask, enum dma_data_direction direction,
-			struct dma_attrs *attrs);
-extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
-			   int nelems, enum dma_data_direction direction,
-			   struct dma_attrs *attrs);
+extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+			    struct scatterlist *sglist, int nelems,
+			    unsigned long mask,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs);
+extern void ppc_iommu_unmap_sg(struct iommu_table *tbl,
+			       struct scatterlist *sglist,
+			       int nelems,
+			       enum dma_data_direction direction,
+			       struct dma_attrs *attrs);
 
 extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 				  size_t size, dma_addr_t *dma_handle,
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 54d0116..4c68bfe 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -60,16 +60,16 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 			    int nelems, enum dma_data_direction direction,
 			    struct dma_attrs *attrs)
 {
-	return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
-			    device_to_mask(dev), direction, attrs);
+	return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
+				device_to_mask(dev), direction, attrs);
 }
 
 static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		int nelems, enum dma_data_direction direction,
 		struct dma_attrs *attrs)
 {
-	iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
-		       attrs);
+	ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+			   direction, attrs);
 }
 
 /* We support DMA to/from any memory page via the iommu */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index a10642a..a83cf5e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -428,10 +428,10 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 		ppc_md.tce_flush(tbl);
 }
 
-int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
-		 struct scatterlist *sglist, int nelems,
-		 unsigned long mask, enum dma_data_direction direction,
-		 struct dma_attrs *attrs)
+int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+		     struct scatterlist *sglist, int nelems,
+		     unsigned long mask, enum dma_data_direction direction,
+		     struct dma_attrs *attrs)
 {
 	dma_addr_t dma_next = 0, dma_addr;
 	struct scatterlist *s, *outs, *segstart;
@@ -539,7 +539,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 
 	DBG("mapped %d elements:\n", outcount);
 
-	/* For the sake of iommu_unmap_sg, we clear out the length in the
+	/* For the sake of ppc_iommu_unmap_sg, we clear out the length in the
 	 * next entry of the sglist if we didn't fill the list completely
 	 */
 	if (outcount < incount) {
@@ -572,9 +572,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 }
 
 
-void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
-		int nelems, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+			int nelems, enum dma_data_direction direction,
+			struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
 
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 2b90ff8..c7c8720 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -621,8 +621,9 @@ static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
 	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
 		return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
 	else
-		return iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents,
-				    device_to_mask(dev), direction, attrs);
+		return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
+					nents, device_to_mask(dev),
+					direction, attrs);
 }
 
 static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
@@ -632,8 +633,8 @@ static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
 	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
 		dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
 	else
-		iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction,
-			       attrs);
+		ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
+				   direction, attrs);
 }
 
 static int dma_fixed_dma_supported(struct device *dev, u64 mask)
-- 
cgit v0.10.2


From a2b1175131ccb5d4a15456f4f31836356abbce09 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Tue, 26 Aug 2014 10:52:53 +0200
Subject: ARM: common: edma: add suspend resume hook

This patch makes the edma driver resume correctly after suspend. Tested
on an AM33xx platform with cyclic audio streams and omap_hsmmc.

All information can be reconstructed by already known runtime
information.

As we now use some functions that were previously only used from __init
context, annotations had to be dropped.

[nm@ti.com: added error handling for runtime + suspend_late/early_resume]
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Daniel Mack <zonque@gmail.com>
Tested-by: Joel Fernandes <joelf@ti.com>
Acked-by: Joel Fernandes <joelf@ti.com>
[nsekhar@ti.com: remove unneeded pm_runtime_get_sync() from resume]
Signed-off-by: Sekhar Nori <nsekhar@ti.com>

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index d86771a..79de6a2 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -244,6 +244,8 @@ struct edma {
 	/* list of channels with no even trigger; terminated by "-1" */
 	const s8	*noevent;
 
+	struct edma_soc_info *info;
+
 	/* The edma_inuse bit for each PaRAM slot is clear unless the
 	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
 	 */
@@ -295,7 +297,7 @@ static void map_dmach_queue(unsigned ctlr, unsigned ch_no,
 			~(0x7 << bit), queue_no << bit);
 }
 
-static void __init assign_priority_to_queue(unsigned ctlr, int queue_no,
+static void assign_priority_to_queue(unsigned ctlr, int queue_no,
 		int priority)
 {
 	int bit = queue_no * 4;
@@ -314,7 +316,7 @@ static void __init assign_priority_to_queue(unsigned ctlr, int queue_no,
  * included in that particular EDMA variant (Eg : dm646x)
  *
  */
-static void __init map_dmach_param(unsigned ctlr)
+static void map_dmach_param(unsigned ctlr)
 {
 	int i;
 	for (i = 0; i < EDMA_MAX_DMACH; i++)
@@ -1792,15 +1794,61 @@ static int edma_probe(struct platform_device *pdev)
 			edma_write_array2(j, EDMA_DRAE, i, 1, 0x0);
 			edma_write_array(j, EDMA_QRAE, i, 0x0);
 		}
+		edma_cc[j]->info = info[j];
 		arch_num_cc++;
 	}
 
 	return 0;
 }
 
+static int edma_pm_resume(struct device *dev)
+{
+	int i, j;
+
+	for (j = 0; j < arch_num_cc; j++) {
+		struct edma *cc = edma_cc[j];
+
+		s8 (*queue_priority_mapping)[2];
+
+		queue_priority_mapping = cc->info->queue_priority_mapping;
+
+		/* Event queue priority mapping */
+		for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+			assign_priority_to_queue(j,
+						 queue_priority_mapping[i][0],
+						 queue_priority_mapping[i][1]);
+
+		/*
+		 * Map the channel to param entry if channel mapping logic
+		 * exist
+		 */
+		if (edma_read(j, EDMA_CCCFG) & CHMAP_EXIST)
+			map_dmach_param(j);
+
+		for (i = 0; i < cc->num_channels; i++) {
+			if (test_bit(i, cc->edma_inuse)) {
+				/* ensure access through shadow region 0 */
+				edma_or_array2(j, EDMA_DRAE, 0, i >> 5,
+					       BIT(i & 0x1f));
+
+				setup_dma_interrupt(i,
+						    cc->intr_data[i].callback,
+						    cc->intr_data[i].data);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops edma_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
+};
+
 static struct platform_driver edma_driver = {
 	.driver = {
 		.name	= "edma",
+		.pm	= &edma_pm_ops,
 		.of_match_table = edma_of_ids,
 	},
 	.probe = edma_probe,
-- 
cgit v0.10.2


From a850c427029e5b9953fe4d4fe4fef10a61ee165a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 21 Nov 2014 11:48:54 +0100
Subject: ARM: common: edma: edma_pm_resume may be unused

The recently introduced resume hook in the edma driver
is not referenced when CONFIG_PM_SLEEP is not set, which
results in a compile warning in keystone builds.

This adds an appropriate #ifdef.

Cc: Nishanth Menon <nm@ti.com>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Joel Fernandes <joelf@ti.com>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Fixes: a2b1175131: ("ARM: common: edma: add suspend resume hook")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 79de6a2..e093f2f 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1801,6 +1801,7 @@ static int edma_probe(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int edma_pm_resume(struct device *dev)
 {
 	int i, j;
@@ -1840,6 +1841,7 @@ static int edma_pm_resume(struct device *dev)
 
 	return 0;
 }
+#endif
 
 static const struct dev_pm_ops edma_pm_ops = {
 	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
-- 
cgit v0.10.2


From 2e8a29a1c9aaa41f72a71bb81c3df66da8156c1e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 20 Nov 2014 10:14:46 -0800
Subject: bus: brcmstb_gisb: resolve section mismatch

Commit f1bee783dd37 moved the call to hook_fault_code in
brcmstb_gisb_arb_probe() which now calls a function annotated with __init, so
this one must also be annotated with __init.

In order to avoid introducing another section mismatch, call
platform_driver_probe() manually and remove the .probe assignment from
brcmstb_gisb_arb_driver, this is very similar to what
drivers/pci/host/pci-imx6.c does since we basically have the same constraints
here.

Fixes: f1bee783dd37 ("bus: brcmstb_gisb: register the fault code hook")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index f2cd6a2d..b801234 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -192,7 +192,7 @@ static struct attribute_group gisb_arb_sysfs_attr_group = {
 	.attrs = gisb_arb_sysfs_attrs,
 };
 
-static int brcmstb_gisb_arb_probe(struct platform_device *pdev)
+static int __init brcmstb_gisb_arb_probe(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
 	struct brcmstb_gisb_arb_device *gdev;
@@ -273,7 +273,6 @@ static const struct of_device_id brcmstb_gisb_arb_of_match[] = {
 };
 
 static struct platform_driver brcmstb_gisb_arb_driver = {
-	.probe	= brcmstb_gisb_arb_probe,
 	.driver = {
 		.name	= "brcm-gisb-arb",
 		.owner	= THIS_MODULE,
@@ -283,7 +282,8 @@ static struct platform_driver brcmstb_gisb_arb_driver = {
 
 static int __init brcm_gisb_driver_init(void)
 {
-	return platform_driver_register(&brcmstb_gisb_arb_driver);
+	return platform_driver_probe(&brcmstb_gisb_arb_driver,
+				     brcmstb_gisb_arb_probe);
 }
 
 module_init(brcm_gisb_driver_init);
-- 
cgit v0.10.2


From edeeec85f7145fe8f2a5ffe250a8ee6b1fe4ab28 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Sat, 22 Nov 2014 00:09:25 +0900
Subject: serial: samsung: Fix serial config dependencies for exynos7

Exynos7 has a similar serial controller to that present in older Samsung
SoCs. To re-use the existing serial driver on Exynos7 we need to have
SERIAL_SAMSUNG_UARTS_4 and SERIAL_SAMSUNG_UARTS selected. This is not
possible because these symbols are dependent on PLAT_SAMSUNG which is
not present for the ARMv8 based exynos7.

Change the dependency of these symbols from PLAT_SAMSUNG to the serial
driver thus making it available on exynos7. As the existing platform
specific code making use of these symbols is related to uart driver this
change in dependency should not cause any issues.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Signed-off-by: Naveen Krishna Chatradhi <ch.naveen@samsung.com>
Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 649b784..98f8bca 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -249,14 +249,14 @@ config SERIAL_SAMSUNG
 
 config SERIAL_SAMSUNG_UARTS_4
 	bool
-	depends on PLAT_SAMSUNG
+	depends on SERIAL_SAMSUNG
 	default y if !(CPU_S3C2410 || CPU_S3C2412 || CPU_S3C2440 || CPU_S3C2442)
 	help
 	  Internal node for the common case of 4 Samsung compatible UARTs
 
 config SERIAL_SAMSUNG_UARTS
 	int
-	depends on PLAT_SAMSUNG
+	depends on SERIAL_SAMSUNG
 	default 4 if SERIAL_SAMSUNG_UARTS_4 || CPU_S3C2416
 	default 3
 	help
-- 
cgit v0.10.2


From 4f4f85fa0b96a35429ebb4bc278d70ae0f72113c Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 29 Jul 2014 10:17:53 +0200
Subject: clk: tegra: Implement memory-controller clock

The memory controller clock runs either at half or the same frequency as
the EMC clock.

Reviewed-By: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>

diff --git a/drivers/clk/tegra/clk-divider.c b/drivers/clk/tegra/clk-divider.c
index 290f9c1..59a5714 100644
--- a/drivers/clk/tegra/clk-divider.c
+++ b/drivers/clk/tegra/clk-divider.c
@@ -185,3 +185,16 @@ struct clk *tegra_clk_register_divider(const char *name,
 
 	return clk;
 }
+
+static const struct clk_div_table mc_div_table[] = {
+	{ .val = 0, .div = 2 },
+	{ .val = 1, .div = 1 },
+	{ .val = 0, .div = 0 },
+};
+
+struct clk *tegra_clk_register_mc(const char *name, const char *parent_name,
+				  void __iomem *reg, spinlock_t *lock)
+{
+	return clk_register_divider_table(NULL, name, parent_name, 0, reg,
+					  16, 1, 0, mc_div_table, lock);
+}
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index f760f31..0b03d2c 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -173,6 +173,7 @@ static DEFINE_SPINLOCK(pll_d_lock);
 static DEFINE_SPINLOCK(pll_d2_lock);
 static DEFINE_SPINLOCK(pll_u_lock);
 static DEFINE_SPINLOCK(pll_re_lock);
+static DEFINE_SPINLOCK(emc_lock);
 
 static struct div_nmp pllxc_nmp = {
 	.divm_shift = 0,
@@ -1228,7 +1229,11 @@ static __init void tegra114_periph_clk_init(void __iomem *clk_base,
 			       ARRAY_SIZE(mux_pllmcp_clkm),
 			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
-			       29, 3, 0, NULL);
+			       29, 3, 0, &emc_lock);
+
+	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
+				    &emc_lock);
+	clks[TEGRA114_CLK_MC] = clk;
 
 	for (i = 0; i < ARRAY_SIZE(tegra_periph_clk_list); i++) {
 		data = &tegra_periph_clk_list[i];
diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c
index e3a8584..f5f9bac 100644
--- a/drivers/clk/tegra/clk-tegra124.c
+++ b/drivers/clk/tegra/clk-tegra124.c
@@ -132,6 +132,7 @@ static DEFINE_SPINLOCK(pll_d2_lock);
 static DEFINE_SPINLOCK(pll_e_lock);
 static DEFINE_SPINLOCK(pll_re_lock);
 static DEFINE_SPINLOCK(pll_u_lock);
+static DEFINE_SPINLOCK(emc_lock);
 
 /* possible OSC frequencies in Hz */
 static unsigned long tegra124_input_freq[] = {
@@ -1127,7 +1128,11 @@ static __init void tegra124_periph_clk_init(void __iomem *clk_base,
 	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
 			       ARRAY_SIZE(mux_pllmcp_clkm), 0,
 			       clk_base + CLK_SOURCE_EMC,
-			       29, 3, 0, NULL);
+			       29, 3, 0, &emc_lock);
+
+	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
+				    &emc_lock);
+	clks[TEGRA124_CLK_MC] = clk;
 
 	/* cml0 */
 	clk = clk_register_gate(NULL, "cml0", "pll_e", 0, clk_base + PLLE_AUX,
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index dace2b1..41272dc 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -140,6 +140,8 @@ static struct cpu_clk_suspend_context {
 static void __iomem *clk_base;
 static void __iomem *pmc_base;
 
+static DEFINE_SPINLOCK(emc_lock);
+
 #define TEGRA_INIT_DATA_MUX(_name, _parents, _offset,	\
 			    _clk_num, _gate_flags, _clk_id)	\
 	TEGRA_INIT_DATA(_name, NULL, NULL, _parents, _offset,	\
@@ -819,11 +821,15 @@ static void __init tegra20_periph_clk_init(void)
 			       ARRAY_SIZE(mux_pllmcp_clkm),
 			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
-			       30, 2, 0, NULL);
+			       30, 2, 0, &emc_lock);
 	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base, 0,
 				    57, periph_clk_enb_refcnt);
 	clks[TEGRA20_CLK_EMC] = clk;
 
+	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
+				    &emc_lock);
+	clks[TEGRA20_CLK_MC] = clk;
+
 	/* dsi */
 	clk = tegra_clk_register_periph_gate("dsi", "pll_d", 0, clk_base, 0,
 				    48, periph_clk_enb_refcnt);
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 5bbacd0..4b9d8bd 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -177,6 +177,7 @@ static unsigned long input_freq;
 
 static DEFINE_SPINLOCK(cml_lock);
 static DEFINE_SPINLOCK(pll_d_lock);
+static DEFINE_SPINLOCK(emc_lock);
 
 #define TEGRA_INIT_DATA_MUX(_name, _parents, _offset,	\
 			    _clk_num, _gate_flags, _clk_id)	\
@@ -1157,11 +1158,15 @@ static void __init tegra30_periph_clk_init(void)
 			       ARRAY_SIZE(mux_pllmcp_clkm),
 			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
-			       30, 2, 0, NULL);
+			       30, 2, 0, &emc_lock);
 	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base, 0,
 				    57, periph_clk_enb_refcnt);
 	clks[TEGRA30_CLK_EMC] = clk;
 
+	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
+				    &emc_lock);
+	clks[TEGRA30_CLK_MC] = clk;
+
 	/* cml0 */
 	clk = clk_register_gate(NULL, "cml0", "pll_e", 0, clk_base + PLLE_AUX,
 				0, 0, &cml_lock);
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 16ec8d6..4e458aa 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -86,6 +86,8 @@ struct clk *tegra_clk_register_divider(const char *name,
 		const char *parent_name, void __iomem *reg,
 		unsigned long flags, u8 clk_divider_flags, u8 shift, u8 width,
 		u8 frac_width, spinlock_t *lock);
+struct clk *tegra_clk_register_mc(const char *name, const char *parent_name,
+				  void __iomem *reg, spinlock_t *lock);
 
 /*
  * Tegra PLL:
diff --git a/include/dt-bindings/clock/tegra114-car.h b/include/dt-bindings/clock/tegra114-car.h
index fc12621..534c03f 100644
--- a/include/dt-bindings/clock/tegra114-car.h
+++ b/include/dt-bindings/clock/tegra114-car.h
@@ -49,7 +49,7 @@
 #define TEGRA114_CLK_I2S0 30
 /* 31 */
 
-/* 32 */
+#define TEGRA114_CLK_MC 32
 /* 33 */
 #define TEGRA114_CLK_APBDMA 34
 /* 35 */
diff --git a/include/dt-bindings/clock/tegra124-car.h b/include/dt-bindings/clock/tegra124-car.h
index 6bac637..af9bc9a 100644
--- a/include/dt-bindings/clock/tegra124-car.h
+++ b/include/dt-bindings/clock/tegra124-car.h
@@ -48,7 +48,7 @@
 #define TEGRA124_CLK_I2S0 30
 /* 31 */
 
-/* 32 */
+#define TEGRA124_CLK_MC 32
 /* 33 */
 #define TEGRA124_CLK_APBDMA 34
 /* 35 */
diff --git a/include/dt-bindings/clock/tegra20-car.h b/include/dt-bindings/clock/tegra20-car.h
index 9406207..04500b2 100644
--- a/include/dt-bindings/clock/tegra20-car.h
+++ b/include/dt-bindings/clock/tegra20-car.h
@@ -49,7 +49,7 @@
 /* 30 */
 #define TEGRA20_CLK_CACHE2 31
 
-#define TEGRA20_CLK_MEM 32
+#define TEGRA20_CLK_MC 32
 #define TEGRA20_CLK_AHBDMA 33
 #define TEGRA20_CLK_APBDMA 34
 /* 35 */
-- 
cgit v0.10.2


From d075f4a2b8b9b3531a00fa359fdc4c8eb3fad97b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 2 Oct 2014 14:57:43 +0200
Subject: amba: Add Kconfig file

Rather than duplicate the ARM_AMBA Kconfig symbol in both 32-bit and
64-bit ARM architectures, move the common definition to drivers/amba
where dependent drivers will be located.

Signed-off-by: Thierry Reding <treding@nvidia.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 89c4b5c..77f8ca5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1259,9 +1259,6 @@ source "arch/arm/common/Kconfig"
 
 menu "Bus support"
 
-config ARM_AMBA
-	bool
-
 config ISA
 	bool
 	help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9532f8d..db1aa54 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -166,9 +166,6 @@ endmenu
 
 menu "Bus support"
 
-config ARM_AMBA
-	bool
-
 config PCI
 	bool "PCI support"
 	help
diff --git a/drivers/amba/Kconfig b/drivers/amba/Kconfig
new file mode 100644
index 0000000..d1cba6a
--- /dev/null
+++ b/drivers/amba/Kconfig
@@ -0,0 +1,2 @@
+config ARM_AMBA
+	bool
-- 
cgit v0.10.2


From bd968d59ad1bf0a21dfadda01e842c477712097d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 29 Jul 2014 16:24:25 +0200
Subject: ARM: tegra: Move AHB Kconfig to drivers/amba

This will allow the Kconfig option to be shared among 32-bit and 64-bit
ARM.

Signed-off-by: Thierry Reding <treding@nvidia.com>

diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 0953996..d0be9a1 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -2,6 +2,7 @@ menuconfig ARCH_TEGRA
 	bool "NVIDIA Tegra" if ARCH_MULTI_V7
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_SUPPORTS_TRUSTED_FOUNDATIONS
+	select ARM_AMBA
 	select ARM_GIC
 	select CLKSRC_MMIO
 	select HAVE_ARM_SCU if SMP
@@ -59,12 +60,4 @@ config ARCH_TEGRA_124_SOC
 	  Support for NVIDIA Tegra T124 processor family, based on the
 	  ARM CortexA15MP CPU
 
-config TEGRA_AHB
-	bool "Enable AHB driver for NVIDIA Tegra SoCs"
-	default y
-	help
-	  Adds AHB configuration functionality for NVIDIA Tegra SoCs,
-	  which controls AHB bus master arbitration and some
-	  performance parameters(priority, prefech size).
-
 endif
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 1a693d3..af02a8a 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -1,5 +1,7 @@
 menu "Device Drivers"
 
+source "drivers/amba/Kconfig"
+
 source "drivers/base/Kconfig"
 
 source "drivers/bus/Kconfig"
diff --git a/drivers/amba/Kconfig b/drivers/amba/Kconfig
index d1cba6a..4a5c9d2 100644
--- a/drivers/amba/Kconfig
+++ b/drivers/amba/Kconfig
@@ -1,2 +1,14 @@
 config ARM_AMBA
 	bool
+
+if ARM_AMBA
+
+config TEGRA_AHB
+	bool "Enable AHB driver for NVIDIA Tegra SoCs"
+	default y if ARCH_TEGRA
+	help
+	  Adds AHB configuration functionality for NVIDIA Tegra SoCs,
+	  which controls AHB bus master arbitration and some performance
+	  parameters (priority, prefetch size).
+
+endif
-- 
cgit v0.10.2


From 4bc567dd60a1cfa9abd8484cff2de31cdf51649d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 26 Jun 2014 21:18:53 +0200
Subject: of: Add NVIDIA Tegra memory controller binding

The memory controller on NVIDIA Tegra exposes various knobs that can be
used to tune the behaviour of the clients attached to it.

In addition, the memory controller implements an SMMU (IOMMU) which can
translate I/O virtual addresses to physical addresses for clients. This
is useful for scatter-gather operation on devices that don't support it
natively and for virtualization or process separation.

Signed-off-by: Thierry Reding <treding@nvidia.com>

diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt
new file mode 100644
index 0000000..f3db93c
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt
@@ -0,0 +1,36 @@
+NVIDIA Tegra Memory Controller device tree bindings
+===================================================
+
+Required properties:
+- compatible: Should be "nvidia,tegra<chip>-mc"
+- reg: Physical base address and length of the controller's registers.
+- clocks: Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+  - mc: the module's clock input
+- interrupts: The interrupt outputs from the controller.
+- #iommu-cells: Should be 1. The single cell of the IOMMU specifier defines
+  the SWGROUP of the master.
+
+This device implements an IOMMU that complies with the generic IOMMU binding.
+See ../iommu/iommu.txt for details.
+
+Example:
+--------
+
+	mc: memory-controller@0,70019000 {
+		compatible = "nvidia,tegra124-mc";
+		reg = <0x0 0x70019000 0x0 0x1000>;
+		clocks = <&tegra_car TEGRA124_CLK_MC>;
+		clock-names = "mc";
+
+		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+		#iommu-cells = <1>;
+	};
+
+	sdhci@0,700b0000 {
+		compatible = "nvidia,tegra124-sdhci";
+		...
+		iommus = <&mc TEGRA_SWGROUP_SDMMC1A>;
+	};
-- 
cgit v0.10.2


From dd1d78a11aecd68f5c688c3259c48b8ea4130aaa Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 25 Nov 2014 16:49:49 -0800
Subject: bus: brcmstb_gisb: Make the driver buildable on MIPS

BCM7xxx ARM and MIPS platforms share a similar hardware block for
reporting GISB errors, so they both benefit from the use of this driver.
Conditionally compile the ARM-specific bus error handler so that the
GISB error IRQ handler works on other architectures.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 603eb1b..b99729e 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -6,7 +6,7 @@ menu "Bus devices"
 
 config BRCMSTB_GISB_ARB
 	bool "Broadcom STB GISB bus arbiter"
-	depends on ARM
+	depends on ARM || MIPS
 	help
 	  Driver for the Broadcom Set Top Box System-on-a-chip internal bus
 	  arbiter. This driver provides timeout and target abort error handling
diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index f2cd6a2d..5da935a 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -24,8 +24,10 @@
 #include <linux/of.h>
 #include <linux/bitops.h>
 
+#ifdef CONFIG_ARM
 #include <asm/bug.h>
 #include <asm/signal.h>
+#endif
 
 #define ARB_TIMER			0x008
 #define ARB_ERR_CAP_CLR			0x7e4
@@ -141,6 +143,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev,
 	return 0;
 }
 
+#ifdef CONFIG_ARM
 static int brcmstb_bus_error_handler(unsigned long addr, unsigned int fsr,
 				     struct pt_regs *regs)
 {
@@ -165,6 +168,7 @@ void __init brcmstb_hook_fault_code(void)
 	hook_fault_code(22, brcmstb_bus_error_handler, SIGBUS, 0,
 			"imprecise external abort");
 }
+#endif
 
 static irqreturn_t brcmstb_gisb_timeout_handler(int irq, void *dev_id)
 {
-- 
cgit v0.10.2


From 2b53eadcea05b680278f8d078b166e1e295e2a4f Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 25 Nov 2014 16:49:50 -0800
Subject: bus: brcmstb_gisb: Introduce wrapper functions for MMIO accesses

These will be used to abstract out chip-to-chip differences.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index 5da935a..8ff403d 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -54,6 +54,16 @@ struct brcmstb_gisb_arb_device {
 
 static LIST_HEAD(brcmstb_gisb_arb_device_list);
 
+static u32 gisb_read(struct brcmstb_gisb_arb_device *gdev, int reg)
+{
+	return ioread32(gdev->base + reg);
+}
+
+static void gisb_write(struct brcmstb_gisb_arb_device *gdev, u32 val, int reg)
+{
+	iowrite32(val, gdev->base + reg);
+}
+
 static ssize_t gisb_arb_get_timeout(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
@@ -63,7 +73,7 @@ static ssize_t gisb_arb_get_timeout(struct device *dev,
 	u32 timeout;
 
 	mutex_lock(&gdev->lock);
-	timeout = ioread32(gdev->base + ARB_TIMER);
+	timeout = gisb_read(gdev, ARB_TIMER);
 	mutex_unlock(&gdev->lock);
 
 	return sprintf(buf, "%d", timeout);
@@ -85,7 +95,7 @@ static ssize_t gisb_arb_set_timeout(struct device *dev,
 		return -EINVAL;
 
 	mutex_lock(&gdev->lock);
-	iowrite32(val, gdev->base + ARB_TIMER);
+	gisb_write(gdev, val, ARB_TIMER);
 	mutex_unlock(&gdev->lock);
 
 	return count;
@@ -112,18 +122,18 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev,
 	const char *m_name;
 	char m_fmt[11];
 
-	cap_status = ioread32(gdev->base + ARB_ERR_CAP_STATUS);
+	cap_status = gisb_read(gdev, ARB_ERR_CAP_STATUS);
 
 	/* Invalid captured address, bail out */
 	if (!(cap_status & ARB_ERR_CAP_STATUS_VALID))
 		return 1;
 
 	/* Read the address and master */
-	arb_addr = ioread32(gdev->base + ARB_ERR_CAP_ADDR) & 0xffffffff;
+	arb_addr = gisb_read(gdev, ARB_ERR_CAP_ADDR) & 0xffffffff;
 #if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
-	arb_addr |= (u64)ioread32(gdev->base + ARB_ERR_CAP_HI_ADDR) << 32;
+	arb_addr |= (u64)gisb_read(gdev, ARB_ERR_CAP_HI_ADDR) << 32;
 #endif
-	master = ioread32(gdev->base + ARB_ERR_CAP_MASTER);
+	master = gisb_read(gdev, ARB_ERR_CAP_MASTER);
 
 	m_name = brcmstb_gisb_master_to_str(gdev, master);
 	if (!m_name) {
@@ -138,7 +148,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev,
 		m_name);
 
 	/* clear the GISB error */
-	iowrite32(ARB_ERR_CAP_CLEAR, gdev->base + ARB_ERR_CAP_CLR);
+	gisb_write(gdev, ARB_ERR_CAP_CLEAR, ARB_ERR_CAP_CLR);
 
 	return 0;
 }
-- 
cgit v0.10.2


From f80835875d3d1a4764711a90f6cc2669f037f527 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 25 Nov 2014 16:49:51 -0800
Subject: bus: brcmstb_gisb: Look up register offsets in a table

There are at least 4 incompatible variations of this hardware block,
so let's use the ARB_* constants as a table index instead of hardcoding
specific register offsets.  Also, allow for the possibility of adding
old devices that are missing some of the registers.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index 8ff403d..ef1e423 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -29,23 +29,37 @@
 #include <asm/signal.h>
 #endif
 
-#define ARB_TIMER			0x008
-#define ARB_ERR_CAP_CLR			0x7e4
 #define  ARB_ERR_CAP_CLEAR		(1 << 0)
-#define ARB_ERR_CAP_HI_ADDR		0x7e8
-#define ARB_ERR_CAP_ADDR		0x7ec
-#define ARB_ERR_CAP_DATA		0x7f0
-#define ARB_ERR_CAP_STATUS		0x7f4
 #define  ARB_ERR_CAP_STATUS_TIMEOUT	(1 << 12)
 #define  ARB_ERR_CAP_STATUS_TEA		(1 << 11)
 #define  ARB_ERR_CAP_STATUS_BS_SHIFT	(1 << 2)
 #define  ARB_ERR_CAP_STATUS_BS_MASK	0x3c
 #define  ARB_ERR_CAP_STATUS_WRITE	(1 << 1)
 #define  ARB_ERR_CAP_STATUS_VALID	(1 << 0)
-#define ARB_ERR_CAP_MASTER		0x7f8
+
+enum {
+	ARB_TIMER,
+	ARB_ERR_CAP_CLR,
+	ARB_ERR_CAP_HI_ADDR,
+	ARB_ERR_CAP_ADDR,
+	ARB_ERR_CAP_DATA,
+	ARB_ERR_CAP_STATUS,
+	ARB_ERR_CAP_MASTER,
+};
+
+static const int gisb_offsets_bcm7445[] = {
+	[ARB_TIMER]		= 0x008,
+	[ARB_ERR_CAP_CLR]	= 0x7e4,
+	[ARB_ERR_CAP_HI_ADDR]	= 0x7e8,
+	[ARB_ERR_CAP_ADDR]	= 0x7ec,
+	[ARB_ERR_CAP_DATA]	= 0x7f0,
+	[ARB_ERR_CAP_STATUS]	= 0x7f4,
+	[ARB_ERR_CAP_MASTER]	= 0x7f8,
+};
 
 struct brcmstb_gisb_arb_device {
 	void __iomem	*base;
+	const int	*gisb_offsets;
 	struct mutex	lock;
 	struct list_head next;
 	u32 valid_mask;
@@ -56,11 +70,21 @@ static LIST_HEAD(brcmstb_gisb_arb_device_list);
 
 static u32 gisb_read(struct brcmstb_gisb_arb_device *gdev, int reg)
 {
-	return ioread32(gdev->base + reg);
+	int offset = gdev->gisb_offsets[reg];
+
+	/* return 1 if the hardware doesn't have ARB_ERR_CAP_MASTER */
+	if (offset == -1)
+		return 1;
+
+	return ioread32(gdev->base + offset);
 }
 
 static void gisb_write(struct brcmstb_gisb_arb_device *gdev, u32 val, int reg)
 {
+	int offset = gdev->gisb_offsets[reg];
+
+	if (offset == -1)
+		return;
 	iowrite32(val, gdev->base + reg);
 }
 
@@ -230,6 +254,8 @@ static int brcmstb_gisb_arb_probe(struct platform_device *pdev)
 	if (IS_ERR(gdev->base))
 		return PTR_ERR(gdev->base);
 
+	gdev->gisb_offsets = gisb_offsets_bcm7445;
+
 	err = devm_request_irq(&pdev->dev, timeout_irq,
 				brcmstb_gisb_timeout_handler, 0, pdev->name,
 				gdev);
-- 
cgit v0.10.2


From d1d6786846e1c40f780edb83569597a8a7769e95 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 25 Nov 2014 16:49:52 -0800
Subject: bus: brcmstb_gisb: Add register offset tables for older chips

This will select the appropriate register layout based on the DT
"compatible" string.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

diff --git a/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
index e2d501d..1eceefb 100644
--- a/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
+++ b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
@@ -2,7 +2,11 @@ Broadcom GISB bus Arbiter controller
 
 Required properties:
 
-- compatible: should be "brcm,gisb-arb"
+- compatible:
+    "brcm,gisb-arb" or "brcm,bcm7445-gisb-arb" for 28nm chips
+    "brcm,bcm7435-gisb-arb" for newer 40nm chips
+    "brcm,bcm7400-gisb-arb" for older 40nm chips and all 65nm chips
+    "brcm,bcm7038-gisb-arb" for 130nm chips
 - reg: specifies the base physical address and size of the registers
 - interrupt-parent: specifies the phandle to the parent interrupt controller
   this arbiter gets interrupt line from
diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index ef1e423..172908d 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -47,6 +47,36 @@ enum {
 	ARB_ERR_CAP_MASTER,
 };
 
+static const int gisb_offsets_bcm7038[] = {
+	[ARB_TIMER]		= 0x00c,
+	[ARB_ERR_CAP_CLR]	= 0x0c4,
+	[ARB_ERR_CAP_HI_ADDR]	= -1,
+	[ARB_ERR_CAP_ADDR]	= 0x0c8,
+	[ARB_ERR_CAP_DATA]	= 0x0cc,
+	[ARB_ERR_CAP_STATUS]	= 0x0d0,
+	[ARB_ERR_CAP_MASTER]	= -1,
+};
+
+static const int gisb_offsets_bcm7400[] = {
+	[ARB_TIMER]		= 0x00c,
+	[ARB_ERR_CAP_CLR]	= 0x0c8,
+	[ARB_ERR_CAP_HI_ADDR]	= -1,
+	[ARB_ERR_CAP_ADDR]	= 0x0cc,
+	[ARB_ERR_CAP_DATA]	= 0x0d0,
+	[ARB_ERR_CAP_STATUS]	= 0x0d4,
+	[ARB_ERR_CAP_MASTER]	= 0x0d8,
+};
+
+static const int gisb_offsets_bcm7435[] = {
+	[ARB_TIMER]		= 0x00c,
+	[ARB_ERR_CAP_CLR]	= 0x168,
+	[ARB_ERR_CAP_HI_ADDR]	= -1,
+	[ARB_ERR_CAP_ADDR]	= 0x16c,
+	[ARB_ERR_CAP_DATA]	= 0x170,
+	[ARB_ERR_CAP_STATUS]	= 0x174,
+	[ARB_ERR_CAP_MASTER]	= 0x178,
+};
+
 static const int gisb_offsets_bcm7445[] = {
 	[ARB_TIMER]		= 0x008,
 	[ARB_ERR_CAP_CLR]	= 0x7e4,
@@ -230,10 +260,20 @@ static struct attribute_group gisb_arb_sysfs_attr_group = {
 	.attrs = gisb_arb_sysfs_attrs,
 };
 
+static const struct of_device_id brcmstb_gisb_arb_of_match[] = {
+	{ .compatible = "brcm,gisb-arb",         .data = gisb_offsets_bcm7445 },
+	{ .compatible = "brcm,bcm7445-gisb-arb", .data = gisb_offsets_bcm7445 },
+	{ .compatible = "brcm,bcm7435-gisb-arb", .data = gisb_offsets_bcm7435 },
+	{ .compatible = "brcm,bcm7400-gisb-arb", .data = gisb_offsets_bcm7400 },
+	{ .compatible = "brcm,bcm7038-gisb-arb", .data = gisb_offsets_bcm7038 },
+	{ },
+};
+
 static int brcmstb_gisb_arb_probe(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
 	struct brcmstb_gisb_arb_device *gdev;
+	const struct of_device_id *of_id;
 	struct resource *r;
 	int err, timeout_irq, tea_irq;
 	unsigned int num_masters, j = 0;
@@ -254,7 +294,12 @@ static int brcmstb_gisb_arb_probe(struct platform_device *pdev)
 	if (IS_ERR(gdev->base))
 		return PTR_ERR(gdev->base);
 
-	gdev->gisb_offsets = gisb_offsets_bcm7445;
+	of_id = of_match_node(brcmstb_gisb_arb_of_match, dn);
+	if (!of_id) {
+		pr_err("failed to look up compatible string\n");
+		return -EINVAL;
+	}
+	gdev->gisb_offsets = of_id->data;
 
 	err = devm_request_irq(&pdev->dev, timeout_irq,
 				brcmstb_gisb_timeout_handler, 0, pdev->name,
@@ -307,11 +352,6 @@ static int brcmstb_gisb_arb_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id brcmstb_gisb_arb_of_match[] = {
-	{ .compatible = "brcm,gisb-arb" },
-	{ },
-};
-
 static struct platform_driver brcmstb_gisb_arb_driver = {
 	.probe	= brcmstb_gisb_arb_probe,
 	.driver = {
-- 
cgit v0.10.2


From 8918465163171322c77a19d5258a95f56d89d2e4 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 Apr 2014 09:24:44 +0200
Subject: memory: Add NVIDIA Tegra memory controller support

The memory controller on NVIDIA Tegra exposes various knobs that can be
used to tune the behaviour of the clients attached to it.

Currently this driver sets up the latency allowance registers to the HW
defaults. Eventually an API should be exported by this driver (via a
custom API or a generic subsystem) to allow clients to register latency
requirements.

This driver also registers an IOMMU (SMMU) that's implemented by the
memory controller. It is supported on Tegra30, Tegra114 and Tegra124
currently. Tegra20 has a GART instead.

The Tegra SMMU operates on memory clients and SWGROUPs. A memory client
is a unidirectional, special-purpose DMA master. A SWGROUP represents a
set of memory clients that form a logical functional unit corresponding
to a single device. Typically a device has two clients: one client for
read transactions and one client for write transactions, but there are
also devices that have only read clients, but many of them (such as the
display controllers).

Because there is no 1:1 relationship between memory clients and devices
the driver keeps a table of memory clients and the SWGROUPs that they
belong to per SoC. Note that this is an exception and due to the fact
that the SMMU is tightly integrated with the rest of the Tegra SoC. The
use of these tables is discouraged in drivers for generic IOMMU devices
such as the ARM SMMU because the same IOMMU could be used in any number
of SoCs and keeping such tables for each SoC would not scale.

Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Thierry Reding <treding@nvidia.com>

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dd51122..6dbfbc2 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -163,14 +163,14 @@ config TEGRA_IOMMU_GART
 	  hardware included on Tegra SoCs.
 
 config TEGRA_IOMMU_SMMU
-	bool "Tegra SMMU IOMMU Support"
-	depends on ARCH_TEGRA && TEGRA_AHB
+	bool "NVIDIA Tegra SMMU Support"
+	depends on ARCH_TEGRA
+	depends on TEGRA_AHB
+	depends on TEGRA_MC
 	select IOMMU_API
 	help
-	  Enables support for remapping discontiguous physical memory
-	  shared with the operating system into contiguous I/O virtual
-	  space through the SMMU (System Memory Management Unit)
-	  hardware included on Tegra SoCs.
+	  This driver supports the IOMMU hardware (SMMU) found on NVIDIA Tegra
+	  SoCs (Tegra30 up to Tegra124).
 
 config EXYNOS_IOMMU
 	bool "Exynos IOMMU Support"
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 73e845a..6e134c7 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -1,1296 +1,732 @@
 /*
- * IOMMU API for SMMU in Tegra30
+ * Copyright (C) 2011-2014 NVIDIA CORPORATION.  All rights reserved.
  *
- * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
  */
 
-#define pr_fmt(fmt)	"%s(): " fmt, __func__
-
 #include <linux/err.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/device.h>
-#include <linux/sched.h>
 #include <linux/iommu.h>
-#include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/of.h>
-#include <linux/of_iommu.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 #include <soc/tegra/ahb.h>
+#include <soc/tegra/mc.h>
 
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-
-enum smmu_hwgrp {
-	HWGRP_AFI,
-	HWGRP_AVPC,
-	HWGRP_DC,
-	HWGRP_DCB,
-	HWGRP_EPP,
-	HWGRP_G2,
-	HWGRP_HC,
-	HWGRP_HDA,
-	HWGRP_ISP,
-	HWGRP_MPE,
-	HWGRP_NV,
-	HWGRP_NV2,
-	HWGRP_PPCS,
-	HWGRP_SATA,
-	HWGRP_VDE,
-	HWGRP_VI,
-
-	HWGRP_COUNT,
-
-	HWGRP_END = ~0,
-};
+struct tegra_smmu {
+	void __iomem *regs;
+	struct device *dev;
 
-#define HWG_AFI		(1 << HWGRP_AFI)
-#define HWG_AVPC	(1 << HWGRP_AVPC)
-#define HWG_DC		(1 << HWGRP_DC)
-#define HWG_DCB		(1 << HWGRP_DCB)
-#define HWG_EPP		(1 << HWGRP_EPP)
-#define HWG_G2		(1 << HWGRP_G2)
-#define HWG_HC		(1 << HWGRP_HC)
-#define HWG_HDA		(1 << HWGRP_HDA)
-#define HWG_ISP		(1 << HWGRP_ISP)
-#define HWG_MPE		(1 << HWGRP_MPE)
-#define HWG_NV		(1 << HWGRP_NV)
-#define HWG_NV2		(1 << HWGRP_NV2)
-#define HWG_PPCS	(1 << HWGRP_PPCS)
-#define HWG_SATA	(1 << HWGRP_SATA)
-#define HWG_VDE		(1 << HWGRP_VDE)
-#define HWG_VI		(1 << HWGRP_VI)
-
-/* bitmap of the page sizes currently supported */
-#define SMMU_IOMMU_PGSIZES	(SZ_4K)
-
-#define SMMU_CONFIG				0x10
-#define SMMU_CONFIG_DISABLE			0
-#define SMMU_CONFIG_ENABLE			1
-
-/* REVISIT: To support multiple MCs */
-enum {
-	_MC = 0,
-};
+	struct tegra_mc *mc;
+	const struct tegra_smmu_soc *soc;
 
-enum {
-	_TLB = 0,
-	_PTC,
-};
+	unsigned long *asids;
+	struct mutex lock;
 
-#define SMMU_CACHE_CONFIG_BASE			0x14
-#define __SMMU_CACHE_CONFIG(mc, cache)		(SMMU_CACHE_CONFIG_BASE + 4 * cache)
-#define SMMU_CACHE_CONFIG(cache)		__SMMU_CACHE_CONFIG(_MC, cache)
-
-#define SMMU_CACHE_CONFIG_STATS_SHIFT		31
-#define SMMU_CACHE_CONFIG_STATS_ENABLE		(1 << SMMU_CACHE_CONFIG_STATS_SHIFT)
-#define SMMU_CACHE_CONFIG_STATS_TEST_SHIFT	30
-#define SMMU_CACHE_CONFIG_STATS_TEST		(1 << SMMU_CACHE_CONFIG_STATS_TEST_SHIFT)
-
-#define SMMU_TLB_CONFIG_HIT_UNDER_MISS__ENABLE	(1 << 29)
-#define SMMU_TLB_CONFIG_ACTIVE_LINES__VALUE	0x10
-#define SMMU_TLB_CONFIG_RESET_VAL		0x20000010
-
-#define SMMU_PTC_CONFIG_CACHE__ENABLE		(1 << 29)
-#define SMMU_PTC_CONFIG_INDEX_MAP__PATTERN	0x3f
-#define SMMU_PTC_CONFIG_RESET_VAL		0x2000003f
-
-#define SMMU_PTB_ASID				0x1c
-#define SMMU_PTB_ASID_CURRENT_SHIFT		0
-
-#define SMMU_PTB_DATA				0x20
-#define SMMU_PTB_DATA_RESET_VAL			0
-#define SMMU_PTB_DATA_ASID_NONSECURE_SHIFT	29
-#define SMMU_PTB_DATA_ASID_WRITABLE_SHIFT	30
-#define SMMU_PTB_DATA_ASID_READABLE_SHIFT	31
-
-#define SMMU_TLB_FLUSH				0x30
-#define SMMU_TLB_FLUSH_VA_MATCH_ALL		0
-#define SMMU_TLB_FLUSH_VA_MATCH_SECTION		2
-#define SMMU_TLB_FLUSH_VA_MATCH_GROUP		3
-#define SMMU_TLB_FLUSH_ASID_SHIFT		29
-#define SMMU_TLB_FLUSH_ASID_MATCH_DISABLE	0
-#define SMMU_TLB_FLUSH_ASID_MATCH_ENABLE	1
-#define SMMU_TLB_FLUSH_ASID_MATCH_SHIFT		31
-
-#define SMMU_PTC_FLUSH				0x34
-#define SMMU_PTC_FLUSH_TYPE_ALL			0
-#define SMMU_PTC_FLUSH_TYPE_ADR			1
-#define SMMU_PTC_FLUSH_ADR_SHIFT		4
-
-#define SMMU_ASID_SECURITY			0x38
-
-#define SMMU_STATS_CACHE_COUNT_BASE		0x1f0
-
-#define SMMU_STATS_CACHE_COUNT(mc, cache, hitmiss)		\
-	(SMMU_STATS_CACHE_COUNT_BASE + 8 * cache + 4 * hitmiss)
-
-#define SMMU_TRANSLATION_ENABLE_0		0x228
-#define SMMU_TRANSLATION_ENABLE_1		0x22c
-#define SMMU_TRANSLATION_ENABLE_2		0x230
-
-#define SMMU_AFI_ASID	0x238   /* PCIE */
-#define SMMU_AVPC_ASID	0x23c   /* AVP */
-#define SMMU_DC_ASID	0x240   /* Display controller */
-#define SMMU_DCB_ASID	0x244   /* Display controller B */
-#define SMMU_EPP_ASID	0x248   /* Encoder pre-processor */
-#define SMMU_G2_ASID	0x24c   /* 2D engine */
-#define SMMU_HC_ASID	0x250   /* Host1x */
-#define SMMU_HDA_ASID	0x254   /* High-def audio */
-#define SMMU_ISP_ASID	0x258   /* Image signal processor */
-#define SMMU_MPE_ASID	0x264   /* MPEG encoder */
-#define SMMU_NV_ASID	0x268   /* (3D) */
-#define SMMU_NV2_ASID	0x26c   /* (3D) */
-#define SMMU_PPCS_ASID	0x270   /* AHB */
-#define SMMU_SATA_ASID	0x278   /* SATA */
-#define SMMU_VDE_ASID	0x27c   /* Video decoder */
-#define SMMU_VI_ASID	0x280   /* Video input */
-
-#define SMMU_PDE_NEXT_SHIFT		28
-
-#define SMMU_TLB_FLUSH_VA_SECTION__MASK		0xffc00000
-#define SMMU_TLB_FLUSH_VA_SECTION__SHIFT	12 /* right shift */
-#define SMMU_TLB_FLUSH_VA_GROUP__MASK		0xffffc000
-#define SMMU_TLB_FLUSH_VA_GROUP__SHIFT		12 /* right shift */
-#define SMMU_TLB_FLUSH_VA(iova, which)	\
-	((((iova) & SMMU_TLB_FLUSH_VA_##which##__MASK) >> \
-		SMMU_TLB_FLUSH_VA_##which##__SHIFT) |	\
-	SMMU_TLB_FLUSH_VA_MATCH_##which)
-#define SMMU_PTB_ASID_CUR(n)	\
-		((n) << SMMU_PTB_ASID_CURRENT_SHIFT)
-#define SMMU_TLB_FLUSH_ASID_MATCH_disable		\
-		(SMMU_TLB_FLUSH_ASID_MATCH_DISABLE <<	\
-			SMMU_TLB_FLUSH_ASID_MATCH_SHIFT)
-#define SMMU_TLB_FLUSH_ASID_MATCH__ENABLE		\
-		(SMMU_TLB_FLUSH_ASID_MATCH_ENABLE <<	\
-			SMMU_TLB_FLUSH_ASID_MATCH_SHIFT)
-
-#define SMMU_PAGE_SHIFT 12
-#define SMMU_PAGE_SIZE	(1 << SMMU_PAGE_SHIFT)
-#define SMMU_PAGE_MASK	((1 << SMMU_PAGE_SHIFT) - 1)
-
-#define SMMU_PDIR_COUNT	1024
-#define SMMU_PDIR_SIZE	(sizeof(unsigned long) * SMMU_PDIR_COUNT)
-#define SMMU_PTBL_COUNT	1024
-#define SMMU_PTBL_SIZE	(sizeof(unsigned long) * SMMU_PTBL_COUNT)
-#define SMMU_PDIR_SHIFT	12
-#define SMMU_PDE_SHIFT	12
-#define SMMU_PTE_SHIFT	12
-#define SMMU_PFN_MASK	0x000fffff
-
-#define SMMU_ADDR_TO_PFN(addr)	((addr) >> 12)
-#define SMMU_ADDR_TO_PDN(addr)	((addr) >> 22)
-#define SMMU_PDN_TO_ADDR(pdn)	((pdn) << 22)
-
-#define _READABLE	(1 << SMMU_PTB_DATA_ASID_READABLE_SHIFT)
-#define _WRITABLE	(1 << SMMU_PTB_DATA_ASID_WRITABLE_SHIFT)
-#define _NONSECURE	(1 << SMMU_PTB_DATA_ASID_NONSECURE_SHIFT)
-#define _PDE_NEXT	(1 << SMMU_PDE_NEXT_SHIFT)
-#define _MASK_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
-
-#define _PDIR_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
-
-#define _PDE_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
-#define _PDE_ATTR_N	(_PDE_ATTR | _PDE_NEXT)
-#define _PDE_VACANT(pdn)	(((pdn) << 10) | _PDE_ATTR)
-
-#define _PTE_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
-#define _PTE_VACANT(addr)	(((addr) >> SMMU_PAGE_SHIFT) | _PTE_ATTR)
-
-#define SMMU_MK_PDIR(page, attr)	\
-		((page_to_phys(page) >> SMMU_PDIR_SHIFT) | (attr))
-#define SMMU_MK_PDE(page, attr)		\
-		(unsigned long)((page_to_phys(page) >> SMMU_PDE_SHIFT) | (attr))
-#define SMMU_EX_PTBL_PAGE(pde)		\
-		pfn_to_page((unsigned long)(pde) & SMMU_PFN_MASK)
-#define SMMU_PFN_TO_PTE(pfn, attr)	(unsigned long)((pfn) | (attr))
-
-#define SMMU_ASID_ENABLE(asid)	((asid) | (1 << 31))
-#define SMMU_ASID_DISABLE	0
-#define SMMU_ASID_ASID(n)	((n) & ~SMMU_ASID_ENABLE(0))
-
-#define NUM_SMMU_REG_BANKS	3
-
-#define smmu_client_enable_hwgrp(c, m)	smmu_client_set_hwgrp(c, m, 1)
-#define smmu_client_disable_hwgrp(c)	smmu_client_set_hwgrp(c, 0, 0)
-#define __smmu_client_enable_hwgrp(c, m) __smmu_client_set_hwgrp(c, m, 1)
-#define __smmu_client_disable_hwgrp(c)	__smmu_client_set_hwgrp(c, 0, 0)
-
-#define HWGRP_INIT(client) [HWGRP_##client] = SMMU_##client##_ASID
-
-static const u32 smmu_hwgrp_asid_reg[] = {
-	HWGRP_INIT(AFI),
-	HWGRP_INIT(AVPC),
-	HWGRP_INIT(DC),
-	HWGRP_INIT(DCB),
-	HWGRP_INIT(EPP),
-	HWGRP_INIT(G2),
-	HWGRP_INIT(HC),
-	HWGRP_INIT(HDA),
-	HWGRP_INIT(ISP),
-	HWGRP_INIT(MPE),
-	HWGRP_INIT(NV),
-	HWGRP_INIT(NV2),
-	HWGRP_INIT(PPCS),
-	HWGRP_INIT(SATA),
-	HWGRP_INIT(VDE),
-	HWGRP_INIT(VI),
+	struct list_head list;
 };
-#define HWGRP_ASID_REG(x) (smmu_hwgrp_asid_reg[x])
 
-/*
- * Per client for address space
- */
-struct smmu_client {
-	struct device		*dev;
-	struct list_head	list;
-	struct smmu_as		*as;
-	u32			hwgrp;
+struct tegra_smmu_as {
+	struct iommu_domain *domain;
+	struct tegra_smmu *smmu;
+	unsigned int use_count;
+	struct page *count;
+	struct page *pd;
+	unsigned id;
+	u32 attr;
 };
 
-/*
- * Per address space
- */
-struct smmu_as {
-	struct smmu_device	*smmu;	/* back pointer to container */
-	unsigned int		asid;
-	spinlock_t		lock;	/* for pagetable */
-	struct page		*pdir_page;
-	unsigned long		pdir_attr;
-	unsigned long		pde_attr;
-	unsigned long		pte_attr;
-	unsigned int		*pte_count;
-
-	struct list_head	client;
-	spinlock_t		client_lock; /* for client list */
-};
+static inline void smmu_writel(struct tegra_smmu *smmu, u32 value,
+			       unsigned long offset)
+{
+	writel(value, smmu->regs + offset);
+}
 
-struct smmu_debugfs_info {
-	struct smmu_device *smmu;
-	int mc;
-	int cache;
-};
+static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
+{
+	return readl(smmu->regs + offset);
+}
 
-/*
- * Per SMMU device - IOMMU device
- */
-struct smmu_device {
-	void __iomem	*regbase;	/* register offset base */
-	void __iomem	**regs;		/* register block start address array */
-	void __iomem	**rege;		/* register block end address array */
-	int		nregs;		/* number of register blocks */
-
-	unsigned long	iovmm_base;	/* remappable base address */
-	unsigned long	page_count;	/* total remappable size */
-	spinlock_t	lock;
-	char		*name;
-	struct device	*dev;
-	struct page *avp_vector_page;	/* dummy page shared by all AS's */
+#define SMMU_CONFIG 0x010
+#define  SMMU_CONFIG_ENABLE (1 << 0)
 
-	/*
-	 * Register image savers for suspend/resume
-	 */
-	unsigned long translation_enable_0;
-	unsigned long translation_enable_1;
-	unsigned long translation_enable_2;
-	unsigned long asid_security;
+#define SMMU_TLB_CONFIG 0x14
+#define  SMMU_TLB_CONFIG_HIT_UNDER_MISS (1 << 29)
+#define  SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION (1 << 28)
+#define  SMMU_TLB_CONFIG_ACTIVE_LINES(x) ((x) & 0x3f)
 
-	struct dentry *debugfs_root;
-	struct smmu_debugfs_info *debugfs_info;
+#define SMMU_PTC_CONFIG 0x18
+#define  SMMU_PTC_CONFIG_ENABLE (1 << 29)
+#define  SMMU_PTC_CONFIG_REQ_LIMIT(x) (((x) & 0x0f) << 24)
+#define  SMMU_PTC_CONFIG_INDEX_MAP(x) ((x) & 0x3f)
 
-	struct device_node *ahb;
+#define SMMU_PTB_ASID 0x01c
+#define  SMMU_PTB_ASID_VALUE(x) ((x) & 0x7f)
 
-	int		num_as;
-	struct smmu_as	as[0];		/* Run-time allocated array */
-};
+#define SMMU_PTB_DATA 0x020
+#define  SMMU_PTB_DATA_VALUE(page, attr) (page_to_phys(page) >> 12 | (attr))
 
-static struct smmu_device *smmu_handle; /* unique for a system */
+#define SMMU_MK_PDE(page, attr) (page_to_phys(page) >> SMMU_PTE_SHIFT | (attr))
 
-/*
- *	SMMU register accessors
- */
-static bool inline smmu_valid_reg(struct smmu_device *smmu,
-				  void __iomem *addr)
-{
-	int i;
+#define SMMU_TLB_FLUSH 0x030
+#define  SMMU_TLB_FLUSH_VA_MATCH_ALL     (0 << 0)
+#define  SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0)
+#define  SMMU_TLB_FLUSH_VA_MATCH_GROUP   (3 << 0)
+#define  SMMU_TLB_FLUSH_ASID(x)          (((x) & 0x7f) << 24)
+#define  SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \
+					  SMMU_TLB_FLUSH_VA_MATCH_SECTION)
+#define  SMMU_TLB_FLUSH_VA_GROUP(addr)   ((((addr) & 0xffffc000) >> 12) | \
+					  SMMU_TLB_FLUSH_VA_MATCH_GROUP)
+#define  SMMU_TLB_FLUSH_ASID_MATCH       (1 << 31)
 
-	for (i = 0; i < smmu->nregs; i++) {
-		if (addr < smmu->regs[i])
-			break;
-		if (addr <= smmu->rege[i])
-			return true;
-	}
+#define SMMU_PTC_FLUSH 0x034
+#define  SMMU_PTC_FLUSH_TYPE_ALL (0 << 0)
+#define  SMMU_PTC_FLUSH_TYPE_ADR (1 << 0)
 
-	return false;
-}
+#define SMMU_PTC_FLUSH_HI 0x9b8
+#define  SMMU_PTC_FLUSH_HI_MASK 0x3
 
-static inline u32 smmu_read(struct smmu_device *smmu, size_t offs)
-{
-	void __iomem *addr = smmu->regbase + offs;
+/* per-SWGROUP SMMU_*_ASID register */
+#define SMMU_ASID_ENABLE (1 << 31)
+#define SMMU_ASID_MASK 0x7f
+#define SMMU_ASID_VALUE(x) ((x) & SMMU_ASID_MASK)
 
-	BUG_ON(!smmu_valid_reg(smmu, addr));
+/* page table definitions */
+#define SMMU_NUM_PDE 1024
+#define SMMU_NUM_PTE 1024
 
-	return readl(addr);
-}
+#define SMMU_SIZE_PD (SMMU_NUM_PDE * 4)
+#define SMMU_SIZE_PT (SMMU_NUM_PTE * 4)
 
-static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs)
-{
-	void __iomem *addr = smmu->regbase + offs;
+#define SMMU_PDE_SHIFT 22
+#define SMMU_PTE_SHIFT 12
 
-	BUG_ON(!smmu_valid_reg(smmu, addr));
+#define SMMU_PFN_MASK 0x000fffff
 
-	writel(val, addr);
-}
+#define SMMU_PD_READABLE	(1 << 31)
+#define SMMU_PD_WRITABLE	(1 << 30)
+#define SMMU_PD_NONSECURE	(1 << 29)
 
-#define VA_PAGE_TO_PA(va, page)	\
-	(page_to_phys(page) + ((unsigned long)(va) & ~PAGE_MASK))
+#define SMMU_PDE_READABLE	(1 << 31)
+#define SMMU_PDE_WRITABLE	(1 << 30)
+#define SMMU_PDE_NONSECURE	(1 << 29)
+#define SMMU_PDE_NEXT		(1 << 28)
 
-#define FLUSH_CPU_DCACHE(va, page, size)	\
-	do {	\
-		unsigned long _pa_ = VA_PAGE_TO_PA(va, page);		\
-		__cpuc_flush_dcache_area((void *)(va), (size_t)(size));	\
-		outer_flush_range(_pa_, _pa_+(size_t)(size));		\
-	} while (0)
+#define SMMU_PTE_READABLE	(1 << 31)
+#define SMMU_PTE_WRITABLE	(1 << 30)
+#define SMMU_PTE_NONSECURE	(1 << 29)
 
-/*
- * Any interaction between any block on PPSB and a block on APB or AHB
- * must have these read-back barriers to ensure the APB/AHB bus
- * transaction is complete before initiating activity on the PPSB
- * block.
- */
-#define FLUSH_SMMU_REGS(smmu)	smmu_read(smmu, SMMU_CONFIG)
+#define SMMU_PDE_ATTR		(SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
+				 SMMU_PDE_NONSECURE)
+#define SMMU_PTE_ATTR		(SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
+				 SMMU_PTE_NONSECURE)
 
-#define smmu_client_hwgrp(c) (u32)((c)->dev->platform_data)
-
-static int __smmu_client_set_hwgrp(struct smmu_client *c,
-				   unsigned long map, int on)
+static inline void smmu_flush_ptc(struct tegra_smmu *smmu, struct page *page,
+				  unsigned long offset)
 {
-	int i;
-	struct smmu_as *as = c->as;
-	u32 val, offs, mask = SMMU_ASID_ENABLE(as->asid);
-	struct smmu_device *smmu = as->smmu;
-
-	WARN_ON(!on && map);
-	if (on && !map)
-		return -EINVAL;
-	if (!on)
-		map = smmu_client_hwgrp(c);
-
-	for_each_set_bit(i, &map, HWGRP_COUNT) {
-		offs = HWGRP_ASID_REG(i);
-		val = smmu_read(smmu, offs);
-		if (on) {
-			if (WARN_ON(val & mask))
-				goto err_hw_busy;
-			val |= mask;
-		} else {
-			WARN_ON((val & mask) == mask);
-			val &= ~mask;
+	phys_addr_t phys = page ? page_to_phys(page) : 0;
+	u32 value;
+
+	if (page) {
+		offset &= ~(smmu->mc->soc->atom_size - 1);
+
+		if (smmu->mc->soc->num_address_bits > 32) {
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+			value = (phys >> 32) & SMMU_PTC_FLUSH_HI_MASK;
+#else
+			value = 0;
+#endif
+			smmu_writel(smmu, value, SMMU_PTC_FLUSH_HI);
 		}
-		smmu_write(smmu, val, offs);
-	}
-	FLUSH_SMMU_REGS(smmu);
-	c->hwgrp = map;
-	return 0;
 
-err_hw_busy:
-	for_each_set_bit(i, &map, HWGRP_COUNT) {
-		offs = HWGRP_ASID_REG(i);
-		val = smmu_read(smmu, offs);
-		val &= ~mask;
-		smmu_write(smmu, val, offs);
+		value = (phys + offset) | SMMU_PTC_FLUSH_TYPE_ADR;
+	} else {
+		value = SMMU_PTC_FLUSH_TYPE_ALL;
 	}
-	return -EBUSY;
+
+	smmu_writel(smmu, value, SMMU_PTC_FLUSH);
 }
 
-static int smmu_client_set_hwgrp(struct smmu_client *c, u32 map, int on)
+static inline void smmu_flush_tlb(struct tegra_smmu *smmu)
 {
-	u32 val;
-	unsigned long flags;
-	struct smmu_as *as = c->as;
-	struct smmu_device *smmu = as->smmu;
-
-	spin_lock_irqsave(&smmu->lock, flags);
-	val = __smmu_client_set_hwgrp(c, map, on);
-	spin_unlock_irqrestore(&smmu->lock, flags);
-	return val;
+	smmu_writel(smmu, SMMU_TLB_FLUSH_VA_MATCH_ALL, SMMU_TLB_FLUSH);
 }
 
-/*
- * Flush all TLB entries and all PTC entries
- * Caller must lock smmu
- */
-static void smmu_flush_regs(struct smmu_device *smmu, int enable)
+static inline void smmu_flush_tlb_asid(struct tegra_smmu *smmu,
+				       unsigned long asid)
 {
-	u32 val;
-
-	smmu_write(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH);
-	FLUSH_SMMU_REGS(smmu);
-	val = SMMU_TLB_FLUSH_VA_MATCH_ALL |
-		SMMU_TLB_FLUSH_ASID_MATCH_disable;
-	smmu_write(smmu, val, SMMU_TLB_FLUSH);
+	u32 value;
 
-	if (enable)
-		smmu_write(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG);
-	FLUSH_SMMU_REGS(smmu);
+	value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
+		SMMU_TLB_FLUSH_VA_MATCH_ALL;
+	smmu_writel(smmu, value, SMMU_TLB_FLUSH);
 }
 
-static int smmu_setup_regs(struct smmu_device *smmu)
+static inline void smmu_flush_tlb_section(struct tegra_smmu *smmu,
+					  unsigned long asid,
+					  unsigned long iova)
 {
-	int i;
-	u32 val;
+	u32 value;
 
-	for (i = 0; i < smmu->num_as; i++) {
-		struct smmu_as *as = &smmu->as[i];
-		struct smmu_client *c;
-
-		smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
-		val = as->pdir_page ?
-			SMMU_MK_PDIR(as->pdir_page, as->pdir_attr) :
-			SMMU_PTB_DATA_RESET_VAL;
-		smmu_write(smmu, val, SMMU_PTB_DATA);
-
-		list_for_each_entry(c, &as->client, list)
-			__smmu_client_set_hwgrp(c, c->hwgrp, 1);
-	}
-
-	smmu_write(smmu, smmu->translation_enable_0, SMMU_TRANSLATION_ENABLE_0);
-	smmu_write(smmu, smmu->translation_enable_1, SMMU_TRANSLATION_ENABLE_1);
-	smmu_write(smmu, smmu->translation_enable_2, SMMU_TRANSLATION_ENABLE_2);
-	smmu_write(smmu, smmu->asid_security, SMMU_ASID_SECURITY);
-	smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_TLB));
-	smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_PTC));
-
-	smmu_flush_regs(smmu, 1);
-
-	return tegra_ahb_enable_smmu(smmu->ahb);
+	value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
+		SMMU_TLB_FLUSH_VA_SECTION(iova);
+	smmu_writel(smmu, value, SMMU_TLB_FLUSH);
 }
 
-static void flush_ptc_and_tlb(struct smmu_device *smmu,
-		      struct smmu_as *as, dma_addr_t iova,
-		      unsigned long *pte, struct page *page, int is_pde)
+static inline void smmu_flush_tlb_group(struct tegra_smmu *smmu,
+					unsigned long asid,
+					unsigned long iova)
 {
-	u32 val;
-	unsigned long tlb_flush_va = is_pde
-		?  SMMU_TLB_FLUSH_VA(iova, SECTION)
-		:  SMMU_TLB_FLUSH_VA(iova, GROUP);
-
-	val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pte, page);
-	smmu_write(smmu, val, SMMU_PTC_FLUSH);
-	FLUSH_SMMU_REGS(smmu);
-	val = tlb_flush_va |
-		SMMU_TLB_FLUSH_ASID_MATCH__ENABLE |
-		(as->asid << SMMU_TLB_FLUSH_ASID_SHIFT);
-	smmu_write(smmu, val, SMMU_TLB_FLUSH);
-	FLUSH_SMMU_REGS(smmu);
-}
+	u32 value;
 
-static void free_ptbl(struct smmu_as *as, dma_addr_t iova)
-{
-	unsigned long pdn = SMMU_ADDR_TO_PDN(iova);
-	unsigned long *pdir = (unsigned long *)page_address(as->pdir_page);
-
-	if (pdir[pdn] != _PDE_VACANT(pdn)) {
-		dev_dbg(as->smmu->dev, "pdn: %lx\n", pdn);
-
-		ClearPageReserved(SMMU_EX_PTBL_PAGE(pdir[pdn]));
-		__free_page(SMMU_EX_PTBL_PAGE(pdir[pdn]));
-		pdir[pdn] = _PDE_VACANT(pdn);
-		FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]);
-		flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn],
-				  as->pdir_page, 1);
-	}
+	value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) |
+		SMMU_TLB_FLUSH_VA_GROUP(iova);
+	smmu_writel(smmu, value, SMMU_TLB_FLUSH);
 }
 
-static void free_pdir(struct smmu_as *as)
+static inline void smmu_flush(struct tegra_smmu *smmu)
 {
-	unsigned addr;
-	int count;
-	struct device *dev = as->smmu->dev;
-
-	if (!as->pdir_page)
-		return;
-
-	addr = as->smmu->iovmm_base;
-	count = as->smmu->page_count;
-	while (count-- > 0) {
-		free_ptbl(as, addr);
-		addr += SMMU_PAGE_SIZE * SMMU_PTBL_COUNT;
-	}
-	ClearPageReserved(as->pdir_page);
-	__free_page(as->pdir_page);
-	as->pdir_page = NULL;
-	devm_kfree(dev, as->pte_count);
-	as->pte_count = NULL;
+	smmu_readl(smmu, SMMU_CONFIG);
 }
 
-/*
- * Maps PTBL for given iova and returns the PTE address
- * Caller must unmap the mapped PTBL returned in *ptbl_page_p
- */
-static unsigned long *locate_pte(struct smmu_as *as,
-				 dma_addr_t iova, bool allocate,
-				 struct page **ptbl_page_p,
-				 unsigned int **count)
+static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp)
 {
-	unsigned long ptn = SMMU_ADDR_TO_PFN(iova);
-	unsigned long pdn = SMMU_ADDR_TO_PDN(iova);
-	unsigned long *pdir = page_address(as->pdir_page);
-	unsigned long *ptbl;
-
-	if (pdir[pdn] != _PDE_VACANT(pdn)) {
-		/* Mapped entry table already exists */
-		*ptbl_page_p = SMMU_EX_PTBL_PAGE(pdir[pdn]);
-		ptbl = page_address(*ptbl_page_p);
-	} else if (!allocate) {
-		return NULL;
-	} else {
-		int pn;
-		unsigned long addr = SMMU_PDN_TO_ADDR(pdn);
+	unsigned long id;
 
-		/* Vacant - allocate a new page table */
-		dev_dbg(as->smmu->dev, "New PTBL pdn: %lx\n", pdn);
+	mutex_lock(&smmu->lock);
 
-		*ptbl_page_p = alloc_page(GFP_ATOMIC);
-		if (!*ptbl_page_p) {
-			dev_err(as->smmu->dev,
-				"failed to allocate smmu_device page table\n");
-			return NULL;
-		}
-		SetPageReserved(*ptbl_page_p);
-		ptbl = (unsigned long *)page_address(*ptbl_page_p);
-		for (pn = 0; pn < SMMU_PTBL_COUNT;
-		     pn++, addr += SMMU_PAGE_SIZE) {
-			ptbl[pn] = _PTE_VACANT(addr);
-		}
-		FLUSH_CPU_DCACHE(ptbl, *ptbl_page_p, SMMU_PTBL_SIZE);
-		pdir[pdn] = SMMU_MK_PDE(*ptbl_page_p,
-					as->pde_attr | _PDE_NEXT);
-		FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]);
-		flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn],
-				  as->pdir_page, 1);
+	id = find_first_zero_bit(smmu->asids, smmu->soc->num_asids);
+	if (id >= smmu->soc->num_asids) {
+		mutex_unlock(&smmu->lock);
+		return -ENOSPC;
 	}
-	*count = &as->pte_count[pdn];
 
-	return &ptbl[ptn % SMMU_PTBL_COUNT];
+	set_bit(id, smmu->asids);
+	*idp = id;
+
+	mutex_unlock(&smmu->lock);
+	return 0;
 }
 
-#ifdef CONFIG_SMMU_SIG_DEBUG
-static void put_signature(struct smmu_as *as,
-			  dma_addr_t iova, unsigned long pfn)
+static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id)
 {
-	struct page *page;
-	unsigned long *vaddr;
-
-	page = pfn_to_page(pfn);
-	vaddr = page_address(page);
-	if (!vaddr)
-		return;
-
-	vaddr[0] = iova;
-	vaddr[1] = pfn << PAGE_SHIFT;
-	FLUSH_CPU_DCACHE(vaddr, page, sizeof(vaddr[0]) * 2);
+	mutex_lock(&smmu->lock);
+	clear_bit(id, smmu->asids);
+	mutex_unlock(&smmu->lock);
 }
-#else
-static inline void put_signature(struct smmu_as *as,
-				 unsigned long addr, unsigned long pfn)
+
+static bool tegra_smmu_capable(enum iommu_cap cap)
 {
+	return false;
 }
-#endif
 
-/*
- * Caller must not hold as->lock
- */
-static int alloc_pdir(struct smmu_as *as)
+static int tegra_smmu_domain_init(struct iommu_domain *domain)
 {
-	unsigned long *pdir, flags;
-	int pdn, err = 0;
-	u32 val;
-	struct smmu_device *smmu = as->smmu;
-	struct page *page;
-	unsigned int *cnt;
+	struct tegra_smmu_as *as;
+	unsigned int i;
+	uint32_t *pd;
 
-	/*
-	 * do the allocation, then grab as->lock
-	 */
-	cnt = devm_kzalloc(smmu->dev,
-			   sizeof(cnt[0]) * SMMU_PDIR_COUNT,
-			   GFP_KERNEL);
-	page = alloc_page(GFP_KERNEL | __GFP_DMA);
+	as = kzalloc(sizeof(*as), GFP_KERNEL);
+	if (!as)
+		return -ENOMEM;
 
-	spin_lock_irqsave(&as->lock, flags);
+	as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE;
+	as->domain = domain;
 
-	if (as->pdir_page) {
-		/* We raced, free the redundant */
-		err = -EAGAIN;
-		goto err_out;
+	as->pd = alloc_page(GFP_KERNEL | __GFP_DMA);
+	if (!as->pd) {
+		kfree(as);
+		return -ENOMEM;
 	}
 
-	if (!page || !cnt) {
-		dev_err(smmu->dev, "failed to allocate at %s\n", __func__);
-		err = -ENOMEM;
-		goto err_out;
+	as->count = alloc_page(GFP_KERNEL);
+	if (!as->count) {
+		__free_page(as->pd);
+		kfree(as);
+		return -ENOMEM;
 	}
 
-	as->pdir_page = page;
-	as->pte_count = cnt;
+	/* clear PDEs */
+	pd = page_address(as->pd);
+	SetPageReserved(as->pd);
 
-	SetPageReserved(as->pdir_page);
-	pdir = page_address(as->pdir_page);
+	for (i = 0; i < SMMU_NUM_PDE; i++)
+		pd[i] = 0;
 
-	for (pdn = 0; pdn < SMMU_PDIR_COUNT; pdn++)
-		pdir[pdn] = _PDE_VACANT(pdn);
-	FLUSH_CPU_DCACHE(pdir, as->pdir_page, SMMU_PDIR_SIZE);
-	val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pdir, as->pdir_page);
-	smmu_write(smmu, val, SMMU_PTC_FLUSH);
-	FLUSH_SMMU_REGS(as->smmu);
-	val = SMMU_TLB_FLUSH_VA_MATCH_ALL |
-		SMMU_TLB_FLUSH_ASID_MATCH__ENABLE |
-		(as->asid << SMMU_TLB_FLUSH_ASID_SHIFT);
-	smmu_write(smmu, val, SMMU_TLB_FLUSH);
-	FLUSH_SMMU_REGS(as->smmu);
+	/* clear PDE usage counters */
+	pd = page_address(as->count);
+	SetPageReserved(as->count);
 
-	spin_unlock_irqrestore(&as->lock, flags);
-
-	return 0;
+	for (i = 0; i < SMMU_NUM_PDE; i++)
+		pd[i] = 0;
 
-err_out:
-	spin_unlock_irqrestore(&as->lock, flags);
+	domain->priv = as;
 
-	devm_kfree(smmu->dev, cnt);
-	if (page)
-		__free_page(page);
-	return err;
+	return 0;
 }
 
-static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova)
+static void tegra_smmu_domain_destroy(struct iommu_domain *domain)
 {
-	unsigned long *pte;
-	struct page *page;
-	unsigned int *count;
+	struct tegra_smmu_as *as = domain->priv;
 
-	pte = locate_pte(as, iova, false, &page, &count);
-	if (WARN_ON(!pte))
-		return;
+	/* TODO: free page directory and page tables */
+	ClearPageReserved(as->pd);
 
-	if (WARN_ON(*pte == _PTE_VACANT(iova)))
-		return;
-
-	*pte = _PTE_VACANT(iova);
-	FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
-	flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0);
-	if (!--(*count))
-		free_ptbl(as, iova);
+	kfree(as);
 }
 
-static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova,
-				 unsigned long pfn)
+static const struct tegra_smmu_swgroup *
+tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup)
 {
-	struct smmu_device *smmu = as->smmu;
-	unsigned long *pte;
-	unsigned int *count;
-	struct page *page;
+	const struct tegra_smmu_swgroup *group = NULL;
+	unsigned int i;
 
-	pte = locate_pte(as, iova, true, &page, &count);
-	if (WARN_ON(!pte))
-		return;
+	for (i = 0; i < smmu->soc->num_swgroups; i++) {
+		if (smmu->soc->swgroups[i].swgroup == swgroup) {
+			group = &smmu->soc->swgroups[i];
+			break;
+		}
+	}
 
-	if (*pte == _PTE_VACANT(iova))
-		(*count)++;
-	*pte = SMMU_PFN_TO_PTE(pfn, as->pte_attr);
-	if (unlikely((*pte == _PTE_VACANT(iova))))
-		(*count)--;
-	FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
-	flush_ptc_and_tlb(smmu, as, iova, pte, page, 0);
-	put_signature(as, iova, pfn);
+	return group;
 }
 
-static int smmu_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			  phys_addr_t pa, size_t bytes, int prot)
+static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup,
+			      unsigned int asid)
 {
-	struct smmu_as *as = domain->priv;
-	unsigned long pfn = __phys_to_pfn(pa);
-	unsigned long flags;
+	const struct tegra_smmu_swgroup *group;
+	unsigned int i;
+	u32 value;
 
-	dev_dbg(as->smmu->dev, "[%d] %08lx:%pa\n", as->asid, iova, &pa);
+	for (i = 0; i < smmu->soc->num_clients; i++) {
+		const struct tegra_mc_client *client = &smmu->soc->clients[i];
 
-	if (!pfn_valid(pfn))
-		return -ENOMEM;
-
-	spin_lock_irqsave(&as->lock, flags);
-	__smmu_iommu_map_pfn(as, iova, pfn);
-	spin_unlock_irqrestore(&as->lock, flags);
-	return 0;
-}
-
-static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t bytes)
-{
-	struct smmu_as *as = domain->priv;
-	unsigned long flags;
+		if (client->swgroup != swgroup)
+			continue;
 
-	dev_dbg(as->smmu->dev, "[%d] %08lx\n", as->asid, iova);
+		value = smmu_readl(smmu, client->smmu.reg);
+		value |= BIT(client->smmu.bit);
+		smmu_writel(smmu, value, client->smmu.reg);
+	}
 
-	spin_lock_irqsave(&as->lock, flags);
-	__smmu_iommu_unmap(as, iova);
-	spin_unlock_irqrestore(&as->lock, flags);
-	return SMMU_PAGE_SIZE;
+	group = tegra_smmu_find_swgroup(smmu, swgroup);
+	if (group) {
+		value = smmu_readl(smmu, group->reg);
+		value &= ~SMMU_ASID_MASK;
+		value |= SMMU_ASID_VALUE(asid);
+		value |= SMMU_ASID_ENABLE;
+		smmu_writel(smmu, value, group->reg);
+	}
 }
 
-static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain,
-					   dma_addr_t iova)
+static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup,
+			       unsigned int asid)
 {
-	struct smmu_as *as = domain->priv;
-	unsigned long *pte;
-	unsigned int *count;
-	struct page *page;
-	unsigned long pfn;
-	unsigned long flags;
+	const struct tegra_smmu_swgroup *group;
+	unsigned int i;
+	u32 value;
 
-	spin_lock_irqsave(&as->lock, flags);
+	group = tegra_smmu_find_swgroup(smmu, swgroup);
+	if (group) {
+		value = smmu_readl(smmu, group->reg);
+		value &= ~SMMU_ASID_MASK;
+		value |= SMMU_ASID_VALUE(asid);
+		value &= ~SMMU_ASID_ENABLE;
+		smmu_writel(smmu, value, group->reg);
+	}
 
-	pte = locate_pte(as, iova, true, &page, &count);
-	pfn = *pte & SMMU_PFN_MASK;
-	WARN_ON(!pfn_valid(pfn));
-	dev_dbg(as->smmu->dev,
-		"iova:%08llx pfn:%08lx asid:%d\n", (unsigned long long)iova,
-		 pfn, as->asid);
+	for (i = 0; i < smmu->soc->num_clients; i++) {
+		const struct tegra_mc_client *client = &smmu->soc->clients[i];
 
-	spin_unlock_irqrestore(&as->lock, flags);
-	return PFN_PHYS(pfn);
-}
+		if (client->swgroup != swgroup)
+			continue;
 
-static bool smmu_iommu_capable(enum iommu_cap cap)
-{
-	return false;
+		value = smmu_readl(smmu, client->smmu.reg);
+		value &= ~BIT(client->smmu.bit);
+		smmu_writel(smmu, value, client->smmu.reg);
+	}
 }
 
-static int smmu_iommu_attach_dev(struct iommu_domain *domain,
-				 struct device *dev)
+static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
+				 struct tegra_smmu_as *as)
 {
-	struct smmu_as *as = domain->priv;
-	struct smmu_device *smmu = as->smmu;
-	struct smmu_client *client, *c;
-	u32 map;
+	u32 value;
 	int err;
 
-	client = devm_kzalloc(smmu->dev, sizeof(*c), GFP_KERNEL);
-	if (!client)
-		return -ENOMEM;
-	client->dev = dev;
-	client->as = as;
-	map = (unsigned long)dev->platform_data;
-	if (!map)
-		return -EINVAL;
-
-	err = smmu_client_enable_hwgrp(client, map);
-	if (err)
-		goto err_hwgrp;
-
-	spin_lock(&as->client_lock);
-	list_for_each_entry(c, &as->client, list) {
-		if (c->dev == dev) {
-			dev_err(smmu->dev,
-				"%s is already attached\n", dev_name(c->dev));
-			err = -EINVAL;
-			goto err_client;
-		}
+	if (as->use_count > 0) {
+		as->use_count++;
+		return 0;
 	}
-	list_add(&client->list, &as->client);
-	spin_unlock(&as->client_lock);
 
-	/*
-	 * Reserve "page zero" for AVP vectors using a common dummy
-	 * page.
-	 */
-	if (map & HWG_AVPC) {
-		struct page *page;
+	err = tegra_smmu_alloc_asid(smmu, &as->id);
+	if (err < 0)
+		return err;
 
-		page = as->smmu->avp_vector_page;
-		__smmu_iommu_map_pfn(as, 0, page_to_pfn(page));
+	smmu->soc->ops->flush_dcache(as->pd, 0, SMMU_SIZE_PD);
+	smmu_flush_ptc(smmu, as->pd, 0);
+	smmu_flush_tlb_asid(smmu, as->id);
 
-		pr_info("Reserve \"page zero\" for AVP vectors using a common dummy\n");
-	}
+	smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID);
+	value = SMMU_PTB_DATA_VALUE(as->pd, as->attr);
+	smmu_writel(smmu, value, SMMU_PTB_DATA);
+	smmu_flush(smmu);
 
-	dev_dbg(smmu->dev, "%s is attached\n", dev_name(dev));
-	return 0;
+	as->smmu = smmu;
+	as->use_count++;
 
-err_client:
-	smmu_client_disable_hwgrp(client);
-	spin_unlock(&as->client_lock);
-err_hwgrp:
-	devm_kfree(smmu->dev, client);
-	return err;
+	return 0;
 }
 
-static void smmu_iommu_detach_dev(struct iommu_domain *domain,
-				  struct device *dev)
+static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
+				    struct tegra_smmu_as *as)
 {
-	struct smmu_as *as = domain->priv;
-	struct smmu_device *smmu = as->smmu;
-	struct smmu_client *c;
-
-	spin_lock(&as->client_lock);
-
-	list_for_each_entry(c, &as->client, list) {
-		if (c->dev == dev) {
-			smmu_client_disable_hwgrp(c);
-			list_del(&c->list);
-			devm_kfree(smmu->dev, c);
-			c->as = NULL;
-			dev_dbg(smmu->dev,
-				"%s is detached\n", dev_name(c->dev));
-			goto out;
-		}
-	}
-	dev_err(smmu->dev, "Couldn't find %s\n", dev_name(dev));
-out:
-	spin_unlock(&as->client_lock);
+	if (--as->use_count > 0)
+		return;
+
+	tegra_smmu_free_asid(smmu, as->id);
+	as->smmu = NULL;
 }
 
-static int smmu_iommu_domain_init(struct iommu_domain *domain)
+static int tegra_smmu_attach_dev(struct iommu_domain *domain,
+				 struct device *dev)
 {
-	int i, err = -EAGAIN;
-	unsigned long flags;
-	struct smmu_as *as;
-	struct smmu_device *smmu = smmu_handle;
+	struct tegra_smmu *smmu = dev->archdata.iommu;
+	struct tegra_smmu_as *as = domain->priv;
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	unsigned int index = 0;
+	int err = 0;
 
-	/* Look for a free AS with lock held */
-	for  (i = 0; i < smmu->num_as; i++) {
-		as = &smmu->as[i];
+	while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
+					   &args)) {
+		unsigned int swgroup = args.args[0];
 
-		if (as->pdir_page)
+		if (args.np != smmu->dev->of_node) {
+			of_node_put(args.np);
 			continue;
+		}
 
-		err = alloc_pdir(as);
-		if (!err)
-			goto found;
+		of_node_put(args.np);
 
-		if (err != -EAGAIN)
-			break;
+		err = tegra_smmu_as_prepare(smmu, as);
+		if (err < 0)
+			return err;
+
+		tegra_smmu_enable(smmu, swgroup, as->id);
+		index++;
 	}
-	if (i == smmu->num_as)
-		dev_err(smmu->dev,  "no free AS\n");
-	return err;
 
-found:
-	spin_lock_irqsave(&smmu->lock, flags);
+	if (index == 0)
+		return -ENODEV;
 
-	/* Update PDIR register */
-	smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
-	smmu_write(smmu,
-		   SMMU_MK_PDIR(as->pdir_page, as->pdir_attr), SMMU_PTB_DATA);
-	FLUSH_SMMU_REGS(smmu);
+	return 0;
+}
 
-	spin_unlock_irqrestore(&smmu->lock, flags);
+static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
+{
+	struct tegra_smmu_as *as = domain->priv;
+	struct device_node *np = dev->of_node;
+	struct tegra_smmu *smmu = as->smmu;
+	struct of_phandle_args args;
+	unsigned int index = 0;
 
-	domain->priv = as;
+	while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
+					   &args)) {
+		unsigned int swgroup = args.args[0];
 
-	domain->geometry.aperture_start = smmu->iovmm_base;
-	domain->geometry.aperture_end   = smmu->iovmm_base +
-		smmu->page_count * SMMU_PAGE_SIZE - 1;
-	domain->geometry.force_aperture = true;
+		if (args.np != smmu->dev->of_node) {
+			of_node_put(args.np);
+			continue;
+		}
 
-	dev_dbg(smmu->dev, "smmu_as@%p\n", as);
+		of_node_put(args.np);
 
-	return 0;
+		tegra_smmu_disable(smmu, swgroup, as->id);
+		tegra_smmu_as_unprepare(smmu, as);
+		index++;
+	}
 }
 
-static void smmu_iommu_domain_destroy(struct iommu_domain *domain)
+static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
+		       struct page **pagep)
 {
-	struct smmu_as *as = domain->priv;
-	struct smmu_device *smmu = as->smmu;
-	unsigned long flags;
+	u32 *pd = page_address(as->pd), *pt, *count;
+	u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff;
+	u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff;
+	struct tegra_smmu *smmu = as->smmu;
+	struct page *page;
+	unsigned int i;
+
+	if (pd[pde] == 0) {
+		page = alloc_page(GFP_KERNEL | __GFP_DMA);
+		if (!page)
+			return NULL;
 
-	spin_lock_irqsave(&as->lock, flags);
+		pt = page_address(page);
+		SetPageReserved(page);
 
-	if (as->pdir_page) {
-		spin_lock(&smmu->lock);
-		smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
-		smmu_write(smmu, SMMU_PTB_DATA_RESET_VAL, SMMU_PTB_DATA);
-		FLUSH_SMMU_REGS(smmu);
-		spin_unlock(&smmu->lock);
+		for (i = 0; i < SMMU_NUM_PTE; i++)
+			pt[i] = 0;
 
-		free_pdir(as);
-	}
+		smmu->soc->ops->flush_dcache(page, 0, SMMU_SIZE_PT);
 
-	if (!list_empty(&as->client)) {
-		struct smmu_client *c;
+		pd[pde] = SMMU_MK_PDE(page, SMMU_PDE_ATTR | SMMU_PDE_NEXT);
 
-		list_for_each_entry(c, &as->client, list)
-			smmu_iommu_detach_dev(domain, c->dev);
+		smmu->soc->ops->flush_dcache(as->pd, pde << 2, 4);
+		smmu_flush_ptc(smmu, as->pd, pde << 2);
+		smmu_flush_tlb_section(smmu, as->id, iova);
+		smmu_flush(smmu);
+	} else {
+		page = pfn_to_page(pd[pde] & SMMU_PFN_MASK);
+		pt = page_address(page);
 	}
 
-	spin_unlock_irqrestore(&as->lock, flags);
+	*pagep = page;
 
-	domain->priv = NULL;
-	dev_dbg(smmu->dev, "smmu_as@%p\n", as);
-}
+	/* Keep track of entries in this page table. */
+	count = page_address(as->count);
+	if (pt[pte] == 0)
+		count[pde]++;
 
-static const struct iommu_ops smmu_iommu_ops = {
-	.capable	= smmu_iommu_capable,
-	.domain_init	= smmu_iommu_domain_init,
-	.domain_destroy	= smmu_iommu_domain_destroy,
-	.attach_dev	= smmu_iommu_attach_dev,
-	.detach_dev	= smmu_iommu_detach_dev,
-	.map		= smmu_iommu_map,
-	.unmap		= smmu_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
-	.iova_to_phys	= smmu_iommu_iova_to_phys,
-	.pgsize_bitmap	= SMMU_IOMMU_PGSIZES,
-};
-
-/* Should be in the order of enum */
-static const char * const smmu_debugfs_mc[] = { "mc", };
-static const char * const smmu_debugfs_cache[] = {  "tlb", "ptc", };
+	return &pt[pte];
+}
 
-static ssize_t smmu_debugfs_stats_write(struct file *file,
-					const char __user *buffer,
-					size_t count, loff_t *pos)
+static void as_put_pte(struct tegra_smmu_as *as, dma_addr_t iova)
 {
-	struct smmu_debugfs_info *info;
-	struct smmu_device *smmu;
-	int i;
-	enum {
-		_OFF = 0,
-		_ON,
-		_RESET,
-	};
-	const char * const command[] = {
-		[_OFF]		= "off",
-		[_ON]		= "on",
-		[_RESET]	= "reset",
-	};
-	char str[] = "reset";
-	u32 val;
-	size_t offs;
+	u32 pde = (iova >> SMMU_PDE_SHIFT) & 0x3ff;
+	u32 pte = (iova >> SMMU_PTE_SHIFT) & 0x3ff;
+	u32 *count = page_address(as->count);
+	u32 *pd = page_address(as->pd), *pt;
+	struct page *page;
 
-	count = min_t(size_t, count, sizeof(str));
-	if (copy_from_user(str, buffer, count))
-		return -EINVAL;
+	page = pfn_to_page(pd[pde] & SMMU_PFN_MASK);
+	pt = page_address(page);
 
-	for (i = 0; i < ARRAY_SIZE(command); i++)
-		if (strncmp(str, command[i],
-			    strlen(command[i])) == 0)
-			break;
+	/*
+	 * When no entries in this page table are used anymore, return the
+	 * memory page to the system.
+	 */
+	if (pt[pte] != 0) {
+		if (--count[pde] == 0) {
+			ClearPageReserved(page);
+			__free_page(page);
+			pd[pde] = 0;
+		}
 
-	if (i == ARRAY_SIZE(command))
-		return -EINVAL;
-
-	info = file_inode(file)->i_private;
-	smmu = info->smmu;
-
-	offs = SMMU_CACHE_CONFIG(info->cache);
-	val = smmu_read(smmu, offs);
-	switch (i) {
-	case _OFF:
-		val &= ~SMMU_CACHE_CONFIG_STATS_ENABLE;
-		val &= ~SMMU_CACHE_CONFIG_STATS_TEST;
-		smmu_write(smmu, val, offs);
-		break;
-	case _ON:
-		val |= SMMU_CACHE_CONFIG_STATS_ENABLE;
-		val &= ~SMMU_CACHE_CONFIG_STATS_TEST;
-		smmu_write(smmu, val, offs);
-		break;
-	case _RESET:
-		val |= SMMU_CACHE_CONFIG_STATS_TEST;
-		smmu_write(smmu, val, offs);
-		val &= ~SMMU_CACHE_CONFIG_STATS_TEST;
-		smmu_write(smmu, val, offs);
-		break;
-	default:
-		BUG();
-		break;
+		pt[pte] = 0;
 	}
-
-	dev_dbg(smmu->dev, "%s() %08x, %08x @%08x\n", __func__,
-		val, smmu_read(smmu, offs), offs);
-
-	return count;
 }
 
-static int smmu_debugfs_stats_show(struct seq_file *s, void *v)
+static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
+			  phys_addr_t paddr, size_t size, int prot)
 {
-	struct smmu_debugfs_info *info = s->private;
-	struct smmu_device *smmu = info->smmu;
-	int i;
-	const char * const stats[] = { "hit", "miss", };
+	struct tegra_smmu_as *as = domain->priv;
+	struct tegra_smmu *smmu = as->smmu;
+	unsigned long offset;
+	struct page *page;
+	u32 *pte;
 
+	pte = as_get_pte(as, iova, &page);
+	if (!pte)
+		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(stats); i++) {
-		u32 val;
-		size_t offs;
+	*pte = __phys_to_pfn(paddr) | SMMU_PTE_ATTR;
+	offset = offset_in_page(pte);
 
-		offs = SMMU_STATS_CACHE_COUNT(info->mc, info->cache, i);
-		val = smmu_read(smmu, offs);
-		seq_printf(s, "%s:%08x ", stats[i], val);
+	smmu->soc->ops->flush_dcache(page, offset, 4);
+	smmu_flush_ptc(smmu, page, offset);
+	smmu_flush_tlb_group(smmu, as->id, iova);
+	smmu_flush(smmu);
 
-		dev_dbg(smmu->dev, "%s() %s %08x @%08x\n", __func__,
-			stats[i], val, offs);
-	}
-	seq_printf(s, "\n");
 	return 0;
 }
 
-static int smmu_debugfs_stats_open(struct inode *inode, struct file *file)
+static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+			       size_t size)
 {
-	return single_open(file, smmu_debugfs_stats_show, inode->i_private);
-}
+	struct tegra_smmu_as *as = domain->priv;
+	struct tegra_smmu *smmu = as->smmu;
+	unsigned long offset;
+	struct page *page;
+	u32 *pte;
 
-static const struct file_operations smmu_debugfs_stats_fops = {
-	.open		= smmu_debugfs_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= smmu_debugfs_stats_write,
-};
+	pte = as_get_pte(as, iova, &page);
+	if (!pte)
+		return 0;
 
-static void smmu_debugfs_delete(struct smmu_device *smmu)
-{
-	debugfs_remove_recursive(smmu->debugfs_root);
-	kfree(smmu->debugfs_info);
+	offset = offset_in_page(pte);
+	as_put_pte(as, iova);
+
+	smmu->soc->ops->flush_dcache(page, offset, 4);
+	smmu_flush_ptc(smmu, page, offset);
+	smmu_flush_tlb_group(smmu, as->id, iova);
+	smmu_flush(smmu);
+
+	return size;
 }
 
-static void smmu_debugfs_create(struct smmu_device *smmu)
+static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
+					   dma_addr_t iova)
 {
-	int i;
-	size_t bytes;
-	struct dentry *root;
-
-	bytes = ARRAY_SIZE(smmu_debugfs_mc) * ARRAY_SIZE(smmu_debugfs_cache) *
-		sizeof(*smmu->debugfs_info);
-	smmu->debugfs_info = kmalloc(bytes, GFP_KERNEL);
-	if (!smmu->debugfs_info)
-		return;
-
-	root = debugfs_create_dir(dev_name(smmu->dev), NULL);
-	if (!root)
-		goto err_out;
-	smmu->debugfs_root = root;
-
-	for (i = 0; i < ARRAY_SIZE(smmu_debugfs_mc); i++) {
-		int j;
-		struct dentry *mc;
-
-		mc = debugfs_create_dir(smmu_debugfs_mc[i], root);
-		if (!mc)
-			goto err_out;
-
-		for (j = 0; j < ARRAY_SIZE(smmu_debugfs_cache); j++) {
-			struct dentry *cache;
-			struct smmu_debugfs_info *info;
-
-			info = smmu->debugfs_info;
-			info += i * ARRAY_SIZE(smmu_debugfs_mc) + j;
-			info->smmu = smmu;
-			info->mc = i;
-			info->cache = j;
-
-			cache = debugfs_create_file(smmu_debugfs_cache[j],
-						    S_IWUGO | S_IRUGO, mc,
-						    (void *)info,
-						    &smmu_debugfs_stats_fops);
-			if (!cache)
-				goto err_out;
-		}
-	}
+	struct tegra_smmu_as *as = domain->priv;
+	struct page *page;
+	unsigned long pfn;
+	u32 *pte;
 
-	return;
+	pte = as_get_pte(as, iova, &page);
+	pfn = *pte & SMMU_PFN_MASK;
 
-err_out:
-	smmu_debugfs_delete(smmu);
+	return PFN_PHYS(pfn);
 }
 
-static int tegra_smmu_suspend(struct device *dev)
+static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
 {
-	struct smmu_device *smmu = dev_get_drvdata(dev);
+	struct platform_device *pdev;
+	struct tegra_mc *mc;
 
-	smmu->translation_enable_0 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_0);
-	smmu->translation_enable_1 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_1);
-	smmu->translation_enable_2 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_2);
-	smmu->asid_security = smmu_read(smmu, SMMU_ASID_SECURITY);
-	return 0;
+	pdev = of_find_device_by_node(np);
+	if (!pdev)
+		return NULL;
+
+	mc = platform_get_drvdata(pdev);
+	if (!mc)
+		return NULL;
+
+	return mc->smmu;
 }
 
-static int tegra_smmu_resume(struct device *dev)
+static int tegra_smmu_add_device(struct device *dev)
 {
-	struct smmu_device *smmu = dev_get_drvdata(dev);
-	unsigned long flags;
-	int err;
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	unsigned int index = 0;
 
-	spin_lock_irqsave(&smmu->lock, flags);
-	err = smmu_setup_regs(smmu);
-	spin_unlock_irqrestore(&smmu->lock, flags);
-	return err;
+	while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
+					  &args) == 0) {
+		struct tegra_smmu *smmu;
+
+		smmu = tegra_smmu_find(args.np);
+		if (smmu) {
+			/*
+			 * Only a single IOMMU master interface is currently
+			 * supported by the Linux kernel, so abort after the
+			 * first match.
+			 */
+			dev->archdata.iommu = smmu;
+			break;
+		}
+
+		index++;
+	}
+
+	return 0;
 }
 
-static int tegra_smmu_probe(struct platform_device *pdev)
+static void tegra_smmu_remove_device(struct device *dev)
 {
-	struct smmu_device *smmu;
-	struct device *dev = &pdev->dev;
-	int i, asids, err = 0;
-	dma_addr_t uninitialized_var(base);
-	size_t bytes, uninitialized_var(size);
+	dev->archdata.iommu = NULL;
+}
 
-	if (smmu_handle)
-		return -EIO;
+static const struct iommu_ops tegra_smmu_ops = {
+	.capable = tegra_smmu_capable,
+	.domain_init = tegra_smmu_domain_init,
+	.domain_destroy = tegra_smmu_domain_destroy,
+	.attach_dev = tegra_smmu_attach_dev,
+	.detach_dev = tegra_smmu_detach_dev,
+	.add_device = tegra_smmu_add_device,
+	.remove_device = tegra_smmu_remove_device,
+	.map = tegra_smmu_map,
+	.unmap = tegra_smmu_unmap,
+	.map_sg = default_iommu_map_sg,
+	.iova_to_phys = tegra_smmu_iova_to_phys,
 
-	BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT);
+	.pgsize_bitmap = SZ_4K,
+};
 
-	if (of_property_read_u32(dev->of_node, "nvidia,#asids", &asids))
-		return -ENODEV;
+static void tegra_smmu_ahb_enable(void)
+{
+	static const struct of_device_id ahb_match[] = {
+		{ .compatible = "nvidia,tegra30-ahb", },
+		{ }
+	};
+	struct device_node *ahb;
 
-	bytes = sizeof(*smmu) + asids * sizeof(*smmu->as);
-	smmu = devm_kzalloc(dev, bytes, GFP_KERNEL);
-	if (!smmu) {
-		dev_err(dev, "failed to allocate smmu_device\n");
-		return -ENOMEM;
+	ahb = of_find_matching_node(NULL, ahb_match);
+	if (ahb) {
+		tegra_ahb_enable_smmu(ahb);
+		of_node_put(ahb);
 	}
+}
 
-	smmu->nregs = pdev->num_resources;
-	smmu->regs = devm_kzalloc(dev, 2 * smmu->nregs * sizeof(*smmu->regs),
-				  GFP_KERNEL);
-	smmu->rege = smmu->regs + smmu->nregs;
-	if (!smmu->regs)
-		return -ENOMEM;
-	for (i = 0; i < smmu->nregs; i++) {
-		struct resource *res;
-
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		smmu->regs[i] = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(smmu->regs[i]))
-			return PTR_ERR(smmu->regs[i]);
-		smmu->rege[i] = smmu->regs[i] + resource_size(res) - 1;
-	}
-	/* Same as "mc" 1st regiter block start address */
-	smmu->regbase = (void __iomem *)((u32)smmu->regs[0] & PAGE_MASK);
+struct tegra_smmu *tegra_smmu_probe(struct device *dev,
+				    const struct tegra_smmu_soc *soc,
+				    struct tegra_mc *mc)
+{
+	struct tegra_smmu *smmu;
+	size_t size;
+	u32 value;
+	int err;
 
-	err = of_get_dma_window(dev->of_node, NULL, 0, NULL, &base, &size);
-	if (err)
-		return -ENODEV;
+	/* This can happen on Tegra20 which doesn't have an SMMU */
+	if (!soc)
+		return NULL;
 
-	if (size & SMMU_PAGE_MASK)
-		return -EINVAL;
+	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
+	if (!smmu)
+		return ERR_PTR(-ENOMEM);
 
-	size >>= SMMU_PAGE_SHIFT;
-	if (!size)
-		return -EINVAL;
+	/*
+	 * This is a bit of a hack. Ideally we'd want to simply return this
+	 * value. However the IOMMU registration process will attempt to add
+	 * all devices to the IOMMU when bus_set_iommu() is called. In order
+	 * not to rely on global variables to track the IOMMU instance, we
+	 * set it here so that it can be looked up from the .add_device()
+	 * callback via the IOMMU device's .drvdata field.
+	 */
+	mc->smmu = smmu;
 
-	smmu->ahb = of_parse_phandle(dev->of_node, "nvidia,ahb", 0);
-	if (!smmu->ahb)
-		return -ENODEV;
+	size = BITS_TO_LONGS(soc->num_asids) * sizeof(long);
 
-	smmu->dev = dev;
-	smmu->num_as = asids;
-	smmu->iovmm_base = base;
-	smmu->page_count = size;
-
-	smmu->translation_enable_0 = ~0;
-	smmu->translation_enable_1 = ~0;
-	smmu->translation_enable_2 = ~0;
-	smmu->asid_security = 0;
-
-	for (i = 0; i < smmu->num_as; i++) {
-		struct smmu_as *as = &smmu->as[i];
-
-		as->smmu = smmu;
-		as->asid = i;
-		as->pdir_attr = _PDIR_ATTR;
-		as->pde_attr = _PDE_ATTR;
-		as->pte_attr = _PTE_ATTR;
-
-		spin_lock_init(&as->lock);
-		spin_lock_init(&as->client_lock);
-		INIT_LIST_HEAD(&as->client);
-	}
-	spin_lock_init(&smmu->lock);
-	err = smmu_setup_regs(smmu);
-	if (err)
-		return err;
-	platform_set_drvdata(pdev, smmu);
+	smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL);
+	if (!smmu->asids)
+		return ERR_PTR(-ENOMEM);
 
-	smmu->avp_vector_page = alloc_page(GFP_KERNEL);
-	if (!smmu->avp_vector_page)
-		return -ENOMEM;
+	mutex_init(&smmu->lock);
 
-	smmu_debugfs_create(smmu);
-	smmu_handle = smmu;
-	bus_set_iommu(&platform_bus_type, &smmu_iommu_ops);
-	return 0;
-}
+	smmu->regs = mc->regs;
+	smmu->soc = soc;
+	smmu->dev = dev;
+	smmu->mc = mc;
 
-static int tegra_smmu_remove(struct platform_device *pdev)
-{
-	struct smmu_device *smmu = platform_get_drvdata(pdev);
-	int i;
+	value = SMMU_PTC_CONFIG_ENABLE | SMMU_PTC_CONFIG_INDEX_MAP(0x3f);
 
-	smmu_debugfs_delete(smmu);
+	if (soc->supports_request_limit)
+		value |= SMMU_PTC_CONFIG_REQ_LIMIT(8);
 
-	smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG);
-	for (i = 0; i < smmu->num_as; i++)
-		free_pdir(&smmu->as[i]);
-	__free_page(smmu->avp_vector_page);
-	smmu_handle = NULL;
-	return 0;
-}
+	smmu_writel(smmu, value, SMMU_PTC_CONFIG);
 
-static const struct dev_pm_ops tegra_smmu_pm_ops = {
-	.suspend	= tegra_smmu_suspend,
-	.resume		= tegra_smmu_resume,
-};
+	value = SMMU_TLB_CONFIG_HIT_UNDER_MISS |
+		SMMU_TLB_CONFIG_ACTIVE_LINES(0x20);
 
-static const struct of_device_id tegra_smmu_of_match[] = {
-	{ .compatible = "nvidia,tegra30-smmu", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, tegra_smmu_of_match);
-
-static struct platform_driver tegra_smmu_driver = {
-	.probe		= tegra_smmu_probe,
-	.remove		= tegra_smmu_remove,
-	.driver = {
-		.owner	= THIS_MODULE,
-		.name	= "tegra-smmu",
-		.pm	= &tegra_smmu_pm_ops,
-		.of_match_table = tegra_smmu_of_match,
-	},
-};
+	if (soc->supports_round_robin_arbitration)
+		value |= SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION;
 
-static int tegra_smmu_init(void)
-{
-	return platform_driver_register(&tegra_smmu_driver);
-}
+	smmu_writel(smmu, value, SMMU_TLB_CONFIG);
 
-static void __exit tegra_smmu_exit(void)
-{
-	platform_driver_unregister(&tegra_smmu_driver);
-}
+	smmu_flush_ptc(smmu, NULL, 0);
+	smmu_flush_tlb(smmu);
+	smmu_writel(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG);
+	smmu_flush(smmu);
+
+	tegra_smmu_ahb_enable();
 
-subsys_initcall(tegra_smmu_init);
-module_exit(tegra_smmu_exit);
+	err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops);
+	if (err < 0)
+		return ERR_PTR(err);
 
-MODULE_DESCRIPTION("IOMMU API for SMMU in Tegra30");
-MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
-MODULE_ALIAS("platform:tegra-smmu");
-MODULE_LICENSE("GPL v2");
+	return smmu;
+}
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 6d91c27..08bd4cf 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -61,16 +61,6 @@ config TEGRA20_MC
 	  analysis, especially for IOMMU/GART(Graphics Address
 	  Relocation Table) module.
 
-config TEGRA30_MC
-	bool "Tegra30 Memory Controller(MC) driver"
-	default y
-	depends on ARCH_TEGRA_3x_SOC
-	help
-	  This driver is for the Memory Controller(MC) module available
-	  in Tegra30 SoCs, mainly for a address translation fault
-	  analysis, especially for IOMMU/SMMU(System Memory Management
-	  Unit) module.
-
 config FSL_CORENET_CF
 	tristate "Freescale CoreNet Error Reporting"
 	depends on FSL_SOC_BOOKE
@@ -85,4 +75,6 @@ config FSL_IFC
 	bool
 	depends on FSL_SOC
 
+source "drivers/memory/tegra/Kconfig"
+
 endif
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index c32d319..ad98bb2 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -12,4 +12,5 @@ obj-$(CONFIG_FSL_CORENET_CF)	+= fsl-corenet-cf.o
 obj-$(CONFIG_FSL_IFC)		+= fsl_ifc.o
 obj-$(CONFIG_MVEBU_DEVBUS)	+= mvebu-devbus.o
 obj-$(CONFIG_TEGRA20_MC)	+= tegra20-mc.o
-obj-$(CONFIG_TEGRA30_MC)	+= tegra30-mc.o
+
+obj-$(CONFIG_TEGRA_MC)		+= tegra/
diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
new file mode 100644
index 0000000..5710876
--- /dev/null
+++ b/drivers/memory/tegra/Kconfig
@@ -0,0 +1,7 @@
+config TEGRA_MC
+	bool "NVIDIA Tegra Memory Controller support"
+	default y
+	depends on ARCH_TEGRA
+	help
+	  This driver supports the Memory Controller (MC) hardware found on
+	  NVIDIA Tegra SoCs.
diff --git a/drivers/memory/tegra/Makefile b/drivers/memory/tegra/Makefile
new file mode 100644
index 0000000..0d9f497
--- /dev/null
+++ b/drivers/memory/tegra/Makefile
@@ -0,0 +1,7 @@
+tegra-mc-y := mc.o
+
+tegra-mc-$(CONFIG_ARCH_TEGRA_3x_SOC)  += tegra30.o
+tegra-mc-$(CONFIG_ARCH_TEGRA_114_SOC) += tegra114.o
+tegra-mc-$(CONFIG_ARCH_TEGRA_124_SOC) += tegra124.o
+
+obj-$(CONFIG_TEGRA_MC) += tegra-mc.o
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
new file mode 100644
index 0000000..fe3c44e
--- /dev/null
+++ b/drivers/memory/tegra/mc.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "mc.h"
+
+#define MC_INTSTATUS 0x000
+#define  MC_INT_DECERR_MTS (1 << 16)
+#define  MC_INT_SECERR_SEC (1 << 13)
+#define  MC_INT_DECERR_VPR (1 << 12)
+#define  MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
+#define  MC_INT_INVALID_SMMU_PAGE (1 << 10)
+#define  MC_INT_ARBITRATION_EMEM (1 << 9)
+#define  MC_INT_SECURITY_VIOLATION (1 << 8)
+#define  MC_INT_DECERR_EMEM (1 << 6)
+
+#define MC_INTMASK 0x004
+
+#define MC_ERR_STATUS 0x08
+#define  MC_ERR_STATUS_TYPE_SHIFT 28
+#define  MC_ERR_STATUS_TYPE_INVALID_SMMU_PAGE (6 << MC_ERR_STATUS_TYPE_SHIFT)
+#define  MC_ERR_STATUS_TYPE_MASK (0x7 << MC_ERR_STATUS_TYPE_SHIFT)
+#define  MC_ERR_STATUS_READABLE (1 << 27)
+#define  MC_ERR_STATUS_WRITABLE (1 << 26)
+#define  MC_ERR_STATUS_NONSECURE (1 << 25)
+#define  MC_ERR_STATUS_ADR_HI_SHIFT 20
+#define  MC_ERR_STATUS_ADR_HI_MASK 0x3
+#define  MC_ERR_STATUS_SECURITY (1 << 17)
+#define  MC_ERR_STATUS_RW (1 << 16)
+#define  MC_ERR_STATUS_CLIENT_MASK 0x7f
+
+#define MC_ERR_ADR 0x0c
+
+#define MC_EMEM_ARB_CFG 0x90
+#define  MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE(x)	(((x) & 0x1ff) << 0)
+#define  MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE_MASK	0x1ff
+#define MC_EMEM_ARB_MISC0 0xd8
+
+static const struct of_device_id tegra_mc_of_match[] = {
+#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+	{ .compatible = "nvidia,tegra30-mc", .data = &tegra30_mc_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_114_SOC
+	{ .compatible = "nvidia,tegra114-mc", .data = &tegra114_mc_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_124_SOC
+	{ .compatible = "nvidia,tegra124-mc", .data = &tegra124_mc_soc },
+#endif
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tegra_mc_of_match);
+
+static int tegra_mc_setup_latency_allowance(struct tegra_mc *mc)
+{
+	unsigned long long tick;
+	unsigned int i;
+	u32 value;
+
+	/* compute the number of MC clock cycles per tick */
+	tick = mc->tick * clk_get_rate(mc->clk);
+	do_div(tick, NSEC_PER_SEC);
+
+	value = readl(mc->regs + MC_EMEM_ARB_CFG);
+	value &= ~MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE_MASK;
+	value |= MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE(tick);
+	writel(value, mc->regs + MC_EMEM_ARB_CFG);
+
+	/* write latency allowance defaults */
+	for (i = 0; i < mc->soc->num_clients; i++) {
+		const struct tegra_mc_la *la = &mc->soc->clients[i].la;
+		u32 value;
+
+		value = readl(mc->regs + la->reg);
+		value &= ~(la->mask << la->shift);
+		value |= (la->def & la->mask) << la->shift;
+		writel(value, mc->regs + la->reg);
+	}
+
+	return 0;
+}
+
+static const char *const status_names[32] = {
+	[ 1] = "External interrupt",
+	[ 6] = "EMEM address decode error",
+	[ 8] = "Security violation",
+	[ 9] = "EMEM arbitration error",
+	[10] = "Page fault",
+	[11] = "Invalid APB ASID update",
+	[12] = "VPR violation",
+	[13] = "Secure carveout violation",
+	[16] = "MTS carveout violation",
+};
+
+static const char *const error_names[8] = {
+	[2] = "EMEM decode error",
+	[3] = "TrustZone violation",
+	[4] = "Carveout violation",
+	[6] = "SMMU translation error",
+};
+
+static irqreturn_t tegra_mc_irq(int irq, void *data)
+{
+	struct tegra_mc *mc = data;
+	unsigned long status, mask;
+	unsigned int bit;
+
+	/* mask all interrupts to avoid flooding */
+	status = mc_readl(mc, MC_INTSTATUS);
+	mask = mc_readl(mc, MC_INTMASK);
+
+	for_each_set_bit(bit, &status, 32) {
+		const char *error = status_names[bit] ?: "unknown";
+		const char *client = "unknown", *desc;
+		const char *direction, *secure;
+		phys_addr_t addr = 0;
+		unsigned int i;
+		char perm[7];
+		u8 id, type;
+		u32 value;
+
+		value = mc_readl(mc, MC_ERR_STATUS);
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+		if (mc->soc->num_address_bits > 32) {
+			addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
+				MC_ERR_STATUS_ADR_HI_MASK);
+			addr <<= 32;
+		}
+#endif
+
+		if (value & MC_ERR_STATUS_RW)
+			direction = "write";
+		else
+			direction = "read";
+
+		if (value & MC_ERR_STATUS_SECURITY)
+			secure = "secure ";
+		else
+			secure = "";
+
+		id = value & MC_ERR_STATUS_CLIENT_MASK;
+
+		for (i = 0; i < mc->soc->num_clients; i++) {
+			if (mc->soc->clients[i].id == id) {
+				client = mc->soc->clients[i].name;
+				break;
+			}
+		}
+
+		type = (value & MC_ERR_STATUS_TYPE_MASK) >>
+		       MC_ERR_STATUS_TYPE_SHIFT;
+		desc = error_names[type];
+
+		switch (value & MC_ERR_STATUS_TYPE_MASK) {
+		case MC_ERR_STATUS_TYPE_INVALID_SMMU_PAGE:
+			perm[0] = ' ';
+			perm[1] = '[';
+
+			if (value & MC_ERR_STATUS_READABLE)
+				perm[2] = 'R';
+			else
+				perm[2] = '-';
+
+			if (value & MC_ERR_STATUS_WRITABLE)
+				perm[3] = 'W';
+			else
+				perm[3] = '-';
+
+			if (value & MC_ERR_STATUS_NONSECURE)
+				perm[4] = '-';
+			else
+				perm[4] = 'S';
+
+			perm[5] = ']';
+			perm[6] = '\0';
+			break;
+
+		default:
+			perm[0] = '\0';
+			break;
+		}
+
+		value = mc_readl(mc, MC_ERR_ADR);
+		addr |= value;
+
+		dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s (%s%s)\n",
+				    client, secure, direction, &addr, error,
+				    desc, perm);
+	}
+
+	/* clear interrupts */
+	mc_writel(mc, status, MC_INTSTATUS);
+
+	return IRQ_HANDLED;
+}
+
+static int tegra_mc_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct resource *res;
+	struct tegra_mc *mc;
+	u32 value;
+	int err;
+
+	match = of_match_node(tegra_mc_of_match, pdev->dev.of_node);
+	if (!match)
+		return -ENODEV;
+
+	mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, mc);
+	mc->soc = match->data;
+	mc->dev = &pdev->dev;
+
+	/* length of MC tick in nanoseconds */
+	mc->tick = 30;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mc->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mc->regs))
+		return PTR_ERR(mc->regs);
+
+	mc->clk = devm_clk_get(&pdev->dev, "mc");
+	if (IS_ERR(mc->clk)) {
+		dev_err(&pdev->dev, "failed to get MC clock: %ld\n",
+			PTR_ERR(mc->clk));
+		return PTR_ERR(mc->clk);
+	}
+
+	err = tegra_mc_setup_latency_allowance(mc);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to setup latency allowance: %d\n",
+			err);
+		return err;
+	}
+
+	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_SMMU)) {
+		mc->smmu = tegra_smmu_probe(&pdev->dev, mc->soc->smmu, mc);
+		if (IS_ERR(mc->smmu)) {
+			dev_err(&pdev->dev, "failed to probe SMMU: %ld\n",
+				PTR_ERR(mc->smmu));
+			return PTR_ERR(mc->smmu);
+		}
+	}
+
+	mc->irq = platform_get_irq(pdev, 0);
+	if (mc->irq < 0) {
+		dev_err(&pdev->dev, "interrupt not specified\n");
+		return mc->irq;
+	}
+
+	err = devm_request_irq(&pdev->dev, mc->irq, tegra_mc_irq, IRQF_SHARED,
+			       dev_name(&pdev->dev), mc);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", mc->irq,
+			err);
+		return err;
+	}
+
+	value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+		MC_INT_ARBITRATION_EMEM | MC_INT_SECURITY_VIOLATION |
+		MC_INT_DECERR_EMEM;
+	mc_writel(mc, value, MC_INTMASK);
+
+	return 0;
+}
+
+static struct platform_driver tegra_mc_driver = {
+	.driver = {
+		.name = "tegra-mc",
+		.of_match_table = tegra_mc_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.prevent_deferred_probe = true,
+	.probe = tegra_mc_probe,
+};
+
+static int tegra_mc_init(void)
+{
+	return platform_driver_register(&tegra_mc_driver);
+}
+arch_initcall(tegra_mc_init);
+
+MODULE_AUTHOR("Thierry Reding <treding@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra Memory Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
new file mode 100644
index 0000000..d5d2114
--- /dev/null
+++ b/drivers/memory/tegra/mc.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MEMORY_TEGRA_MC_H
+#define MEMORY_TEGRA_MC_H
+
+#include <linux/io.h>
+#include <linux/types.h>
+
+#include <soc/tegra/mc.h>
+
+static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
+{
+	return readl(mc->regs + offset);
+}
+
+static inline void mc_writel(struct tegra_mc *mc, u32 value,
+			     unsigned long offset)
+{
+	writel(value, mc->regs + offset);
+}
+
+#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+extern const struct tegra_mc_soc tegra30_mc_soc;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_114_SOC
+extern const struct tegra_mc_soc tegra114_mc_soc;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_124_SOC
+extern const struct tegra_mc_soc tegra124_mc_soc;
+#endif
+
+#endif /* MEMORY_TEGRA_MC_H */
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
new file mode 100644
index 0000000..511e9a2
--- /dev/null
+++ b/drivers/memory/tegra/tegra114.c
@@ -0,0 +1,948 @@
+/*
+ * Copyright (C) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+
+#include <dt-bindings/memory/tegra114-mc.h>
+
+#include "mc.h"
+
+static const struct tegra_mc_client tegra114_mc_clients[] = {
+	{
+		.id = 0x00,
+		.name = "ptcr",
+		.swgroup = TEGRA_SWGROUP_PTC,
+	}, {
+		.id = 0x01,
+		.name = "display0a",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x2e8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x02,
+		.name = "display0ab",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x2f4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x03,
+		.name = "display0b",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x2e8,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x04,
+		.name = "display0bb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x2f4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x05,
+		.name = "display0c",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x2ec,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x06,
+		.name = "display0cb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 6,
+		},
+		.la = {
+			.reg = 0x2f8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x09,
+		.name = "eppup",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 9,
+		},
+		.la = {
+			.reg = 0x300,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x33,
+		},
+	}, {
+		.id = 0x0a,
+		.name = "g2pr",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 10,
+		},
+		.la = {
+			.reg = 0x308,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x09,
+		},
+	}, {
+		.id = 0x0b,
+		.name = "g2sr",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x308,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x09,
+		},
+	}, {
+		.id = 0x0f,
+		.name = "avpcarm7r",
+		.swgroup = TEGRA_SWGROUP_AVPC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 15,
+		},
+		.la = {
+			.reg = 0x2e4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x10,
+		.name = "displayhc",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x2f0,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x68,
+		},
+	}, {
+		.id = 0x11,
+		.name = "displayhcb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x2fc,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x68,
+		},
+	}, {
+		.id = 0x12,
+		.name = "fdcdrd",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 18,
+		},
+		.la = {
+			.reg = 0x334,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0c,
+		},
+	}, {
+		.id = 0x13,
+		.name = "fdcdrd2",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 19,
+		},
+		.la = {
+			.reg = 0x33c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0c,
+		},
+	}, {
+		.id = 0x14,
+		.name = "g2dr",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 20,
+		},
+		.la = {
+			.reg = 0x30c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0a,
+		},
+	}, {
+		.id = 0x15,
+		.name = "hdar",
+		.swgroup = TEGRA_SWGROUP_HDA,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x318,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x16,
+		.name = "host1xdmar",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x310,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x17,
+		.name = "host1xr",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 23,
+		},
+		.la = {
+			.reg = 0x310,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x18,
+		.name = "idxsrd",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 24,
+		},
+		.la = {
+			.reg = 0x334,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0b,
+		},
+	}, {
+		.id = 0x1c,
+		.name = "msencsrd",
+		.swgroup = TEGRA_SWGROUP_MSENC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 28,
+		},
+		.la = {
+			.reg = 0x328,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x1d,
+		.name = "ppcsahbdmar",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 29,
+		},
+		.la = {
+			.reg = 0x344,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x1e,
+		.name = "ppcsahbslvr",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 30,
+		},
+		.la = {
+			.reg = 0x344,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xe8,
+		},
+	}, {
+		.id = 0x20,
+		.name = "texl2srd",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 0,
+		},
+		.la = {
+			.reg = 0x338,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0c,
+		},
+	}, {
+		.id = 0x22,
+		.name = "vdebsevr",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x354,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x23,
+		.name = "vdember",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x354,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x24,
+		.name = "vdemcer",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x358,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xb8,
+		},
+	}, {
+		.id = 0x25,
+		.name = "vdetper",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x358,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xee,
+		},
+	}, {
+		.id = 0x26,
+		.name = "mpcorelpr",
+		.swgroup = TEGRA_SWGROUP_MPCORELP,
+		.la = {
+			.reg = 0x324,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x27,
+		.name = "mpcorer",
+		.swgroup = TEGRA_SWGROUP_MPCORE,
+		.la = {
+			.reg = 0x320,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x28,
+		.name = "eppu",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 8,
+		},
+		.la = {
+			.reg = 0x300,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x33,
+		},
+	}, {
+		.id = 0x29,
+		.name = "eppv",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 9,
+		},
+		.la = {
+			.reg = 0x304,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x6c,
+		},
+	}, {
+		.id = 0x2a,
+		.name = "eppy",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 10,
+		},
+		.la = {
+			.reg = 0x304,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x6c,
+		},
+	}, {
+		.id = 0x2b,
+		.name = "msencswr",
+		.swgroup = TEGRA_SWGROUP_MSENC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x328,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x2c,
+		.name = "viwsb",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 12,
+		},
+		.la = {
+			.reg = 0x364,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x47,
+		},
+	}, {
+		.id = 0x2d,
+		.name = "viwu",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 13,
+		},
+		.la = {
+			.reg = 0x368,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x2e,
+		.name = "viwv",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 14,
+		},
+		.la = {
+			.reg = 0x368,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x2f,
+		.name = "viwy",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 15,
+		},
+		.la = {
+			.reg = 0x36c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x47,
+		},
+	}, {
+		.id = 0x30,
+		.name = "g2dw",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x30c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x9,
+		},
+	}, {
+		.id = 0x32,
+		.name = "avpcarm7w",
+		.swgroup = TEGRA_SWGROUP_AVPC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 18,
+		},
+		.la = {
+			.reg = 0x2e4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0e,
+		},
+	}, {
+		.id = 0x33,
+		.name = "fdcdwr",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 19,
+		},
+		.la = {
+			.reg = 0x338,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x34,
+		.name = "fdcwr2",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 20,
+		},
+		.la = {
+			.reg = 0x340,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x35,
+		.name = "hdaw",
+		.swgroup = TEGRA_SWGROUP_HDA,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x318,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x36,
+		.name = "host1xw",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x314,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x25,
+		},
+	}, {
+		.id = 0x37,
+		.name = "ispw",
+		.swgroup = TEGRA_SWGROUP_ISP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 23,
+		},
+		.la = {
+			.reg = 0x31c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x38,
+		.name = "mpcorelpw",
+		.swgroup = TEGRA_SWGROUP_MPCORELP,
+		.la = {
+			.reg = 0x324,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x39,
+		.name = "mpcorew",
+		.swgroup = TEGRA_SWGROUP_MPCORE,
+		.la = {
+			.reg = 0x320,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0e,
+		},
+	}, {
+		.id = 0x3b,
+		.name = "ppcsahbdmaw",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 27,
+		},
+		.la = {
+			.reg = 0x348,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x3c,
+		.name = "ppcsahbslvw",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 28,
+		},
+		.la = {
+			.reg = 0x348,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xe8,
+		},
+	}, {
+		.id = 0x3e,
+		.name = "vdebsevw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 30,
+		},
+		.la = {
+			.reg = 0x35c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x3f,
+		.name = "vdedbgw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 31,
+		},
+		.la = {
+			.reg = 0x35c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x40,
+		.name = "vdembew",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 0,
+		},
+		.la = {
+			.reg = 0x360,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x89,
+		},
+	}, {
+		.id = 0x41,
+		.name = "vdetpmw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x360,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x59,
+		},
+	}, {
+		.id = 0x4a,
+		.name = "xusb_hostr",
+		.swgroup = TEGRA_SWGROUP_XUSB_HOST,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 10,
+		},
+		.la = {
+			.reg = 0x37c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x4b,
+		.name = "xusb_hostw",
+		.swgroup = TEGRA_SWGROUP_XUSB_HOST,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x37c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x4c,
+		.name = "xusb_devr",
+		.swgroup = TEGRA_SWGROUP_XUSB_DEV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 12,
+		},
+		.la = {
+			.reg = 0x380,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x4d,
+		.name = "xusb_devw",
+		.swgroup = TEGRA_SWGROUP_XUSB_DEV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 13,
+		},
+		.la = {
+			.reg = 0x380,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x4e,
+		.name = "fdcdwr3",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 14,
+		},
+		.la = {
+			.reg = 0x388,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x4f,
+		.name = "fdcdrd3",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 15,
+		},
+		.la = {
+			.reg = 0x384,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0c,
+		},
+	}, {
+		.id = 0x50,
+		.name = "fdcwr4",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x388,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x51,
+		.name = "fdcrd4",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x384,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0c,
+		},
+	}, {
+		.id = 0x52,
+		.name = "emucifr",
+		.swgroup = TEGRA_SWGROUP_EMUCIF,
+		.la = {
+			.reg = 0x38c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x53,
+		.name = "emucifw",
+		.swgroup = TEGRA_SWGROUP_EMUCIF,
+		.la = {
+			.reg = 0x38c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0e,
+		},
+	}, {
+		.id = 0x54,
+		.name = "tsecsrd",
+		.swgroup = TEGRA_SWGROUP_TSEC,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 20,
+		},
+		.la = {
+			.reg = 0x390,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x55,
+		.name = "tsecswr",
+		.swgroup = TEGRA_SWGROUP_TSEC,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x390,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	},
+};
+
+static const struct tegra_smmu_swgroup tegra114_swgroups[] = {
+	{ .swgroup = TEGRA_SWGROUP_DC,        .reg = 0x240 },
+	{ .swgroup = TEGRA_SWGROUP_DCB,       .reg = 0x244 },
+	{ .swgroup = TEGRA_SWGROUP_EPP,       .reg = 0x248 },
+	{ .swgroup = TEGRA_SWGROUP_G2,        .reg = 0x24c },
+	{ .swgroup = TEGRA_SWGROUP_AVPC,      .reg = 0x23c },
+	{ .swgroup = TEGRA_SWGROUP_NV,        .reg = 0x268 },
+	{ .swgroup = TEGRA_SWGROUP_HDA,       .reg = 0x254 },
+	{ .swgroup = TEGRA_SWGROUP_HC,        .reg = 0x250 },
+	{ .swgroup = TEGRA_SWGROUP_MSENC,     .reg = 0x264 },
+	{ .swgroup = TEGRA_SWGROUP_PPCS,      .reg = 0x270 },
+	{ .swgroup = TEGRA_SWGROUP_VDE,       .reg = 0x27c },
+	{ .swgroup = TEGRA_SWGROUP_VI,        .reg = 0x280 },
+	{ .swgroup = TEGRA_SWGROUP_ISP,       .reg = 0x258 },
+	{ .swgroup = TEGRA_SWGROUP_XUSB_HOST, .reg = 0x288 },
+	{ .swgroup = TEGRA_SWGROUP_XUSB_DEV,  .reg = 0x28c },
+	{ .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
+};
+
+static void tegra114_flush_dcache(struct page *page, unsigned long offset,
+				  size_t size)
+{
+	phys_addr_t phys = page_to_phys(page) + offset;
+	void *virt = page_address(page) + offset;
+
+	__cpuc_flush_dcache_area(virt, size);
+	outer_flush_range(phys, phys + size);
+}
+
+static const struct tegra_smmu_ops tegra114_smmu_ops = {
+	.flush_dcache = tegra114_flush_dcache,
+};
+
+static const struct tegra_smmu_soc tegra114_smmu_soc = {
+	.clients = tegra114_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra114_mc_clients),
+	.swgroups = tegra114_swgroups,
+	.num_swgroups = ARRAY_SIZE(tegra114_swgroups),
+	.supports_round_robin_arbitration = false,
+	.supports_request_limit = false,
+	.num_asids = 4,
+	.ops = &tegra114_smmu_ops,
+};
+
+const struct tegra_mc_soc tegra114_mc_soc = {
+	.clients = tegra114_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra114_mc_clients),
+	.num_address_bits = 32,
+	.atom_size = 32,
+	.smmu = &tegra114_smmu_soc,
+};
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
new file mode 100644
index 0000000..278d40b
--- /dev/null
+++ b/drivers/memory/tegra/tegra124.c
@@ -0,0 +1,995 @@
+/*
+ * Copyright (C) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+
+#include <dt-bindings/memory/tegra124-mc.h>
+
+#include "mc.h"
+
+static const struct tegra_mc_client tegra124_mc_clients[] = {
+	{
+		.id = 0x00,
+		.name = "ptcr",
+		.swgroup = TEGRA_SWGROUP_PTC,
+	}, {
+		.id = 0x01,
+		.name = "display0a",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x2e8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xc2,
+		},
+	}, {
+		.id = 0x02,
+		.name = "display0ab",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x2f4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xc6,
+		},
+	}, {
+		.id = 0x03,
+		.name = "display0b",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x2e8,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x04,
+		.name = "display0bb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x2f4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x05,
+		.name = "display0c",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x2ec,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x06,
+		.name = "display0cb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 6,
+		},
+		.la = {
+			.reg = 0x2f8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x0e,
+		.name = "afir",
+		.swgroup = TEGRA_SWGROUP_AFI,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 14,
+		},
+		.la = {
+			.reg = 0x2e0,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x13,
+		},
+	}, {
+		.id = 0x0f,
+		.name = "avpcarm7r",
+		.swgroup = TEGRA_SWGROUP_AVPC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 15,
+		},
+		.la = {
+			.reg = 0x2e4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x10,
+		.name = "displayhc",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x2f0,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x11,
+		.name = "displayhcb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x2fc,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x15,
+		.name = "hdar",
+		.swgroup = TEGRA_SWGROUP_HDA,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x318,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x24,
+		},
+	}, {
+		.id = 0x16,
+		.name = "host1xdmar",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x310,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x1e,
+		},
+	}, {
+		.id = 0x17,
+		.name = "host1xr",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 23,
+		},
+		.la = {
+			.reg = 0x310,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x1c,
+		.name = "msencsrd",
+		.swgroup = TEGRA_SWGROUP_MSENC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 28,
+		},
+		.la = {
+			.reg = 0x328,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x23,
+		},
+	}, {
+		.id = 0x1d,
+		.name = "ppcsahbdmar",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 29,
+		},
+		.la = {
+			.reg = 0x344,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x49,
+		},
+	}, {
+		.id = 0x1e,
+		.name = "ppcsahbslvr",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 30,
+		},
+		.la = {
+			.reg = 0x344,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x1a,
+		},
+	}, {
+		.id = 0x1f,
+		.name = "satar",
+		.swgroup = TEGRA_SWGROUP_SATA,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 31,
+		},
+		.la = {
+			.reg = 0x350,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x65,
+		},
+	}, {
+		.id = 0x22,
+		.name = "vdebsevr",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x354,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4f,
+		},
+	}, {
+		.id = 0x23,
+		.name = "vdember",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x354,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x3d,
+		},
+	}, {
+		.id = 0x24,
+		.name = "vdemcer",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x358,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x66,
+		},
+	}, {
+		.id = 0x25,
+		.name = "vdetper",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x358,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xa5,
+		},
+	}, {
+		.id = 0x26,
+		.name = "mpcorelpr",
+		.swgroup = TEGRA_SWGROUP_MPCORELP,
+		.la = {
+			.reg = 0x324,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x27,
+		.name = "mpcorer",
+		.swgroup = TEGRA_SWGROUP_MPCORE,
+		.la = {
+			.reg = 0x320,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x2b,
+		.name = "msencswr",
+		.swgroup = TEGRA_SWGROUP_MSENC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x328,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x31,
+		.name = "afiw",
+		.swgroup = TEGRA_SWGROUP_AFI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x2e0,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x32,
+		.name = "avpcarm7w",
+		.swgroup = TEGRA_SWGROUP_AVPC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 18,
+		},
+		.la = {
+			.reg = 0x2e4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x35,
+		.name = "hdaw",
+		.swgroup = TEGRA_SWGROUP_HDA,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x318,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x36,
+		.name = "host1xw",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x314,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x38,
+		.name = "mpcorelpw",
+		.swgroup = TEGRA_SWGROUP_MPCORELP,
+		.la = {
+			.reg = 0x324,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x39,
+		.name = "mpcorew",
+		.swgroup = TEGRA_SWGROUP_MPCORE,
+		.la = {
+			.reg = 0x320,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x3b,
+		.name = "ppcsahbdmaw",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 27,
+		},
+		.la = {
+			.reg = 0x348,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x3c,
+		.name = "ppcsahbslvw",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 28,
+		},
+		.la = {
+			.reg = 0x348,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x3d,
+		.name = "sataw",
+		.swgroup = TEGRA_SWGROUP_SATA,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 29,
+		},
+		.la = {
+			.reg = 0x350,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x65,
+		},
+	}, {
+		.id = 0x3e,
+		.name = "vdebsevw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 30,
+		},
+		.la = {
+			.reg = 0x35c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x3f,
+		.name = "vdedbgw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 31,
+		},
+		.la = {
+			.reg = 0x35c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x40,
+		.name = "vdembew",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 0,
+		},
+		.la = {
+			.reg = 0x360,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x41,
+		.name = "vdetpmw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x360,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x44,
+		.name = "ispra",
+		.swgroup = TEGRA_SWGROUP_ISP2,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x370,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x18,
+		},
+	}, {
+		.id = 0x46,
+		.name = "ispwa",
+		.swgroup = TEGRA_SWGROUP_ISP2,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 6,
+		},
+		.la = {
+			.reg = 0x374,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x47,
+		.name = "ispwb",
+		.swgroup = TEGRA_SWGROUP_ISP2,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 7,
+		},
+		.la = {
+			.reg = 0x374,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x4a,
+		.name = "xusb_hostr",
+		.swgroup = TEGRA_SWGROUP_XUSB_HOST,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 10,
+		},
+		.la = {
+			.reg = 0x37c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x39,
+		},
+	}, {
+		.id = 0x4b,
+		.name = "xusb_hostw",
+		.swgroup = TEGRA_SWGROUP_XUSB_HOST,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x37c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x4c,
+		.name = "xusb_devr",
+		.swgroup = TEGRA_SWGROUP_XUSB_DEV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 12,
+		},
+		.la = {
+			.reg = 0x380,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x39,
+		},
+	}, {
+		.id = 0x4d,
+		.name = "xusb_devw",
+		.swgroup = TEGRA_SWGROUP_XUSB_DEV,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 13,
+		},
+		.la = {
+			.reg = 0x380,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x4e,
+		.name = "isprab",
+		.swgroup = TEGRA_SWGROUP_ISP2B,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 14,
+		},
+		.la = {
+			.reg = 0x384,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x18,
+		},
+	}, {
+		.id = 0x50,
+		.name = "ispwab",
+		.swgroup = TEGRA_SWGROUP_ISP2B,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x388,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x51,
+		.name = "ispwbb",
+		.swgroup = TEGRA_SWGROUP_ISP2B,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x388,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x54,
+		.name = "tsecsrd",
+		.swgroup = TEGRA_SWGROUP_TSEC,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 20,
+		},
+		.la = {
+			.reg = 0x390,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x9b,
+		},
+	}, {
+		.id = 0x55,
+		.name = "tsecswr",
+		.swgroup = TEGRA_SWGROUP_TSEC,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x390,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x56,
+		.name = "a9avpscr",
+		.swgroup = TEGRA_SWGROUP_A9AVP,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x3a4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x57,
+		.name = "a9avpscw",
+		.swgroup = TEGRA_SWGROUP_A9AVP,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 23,
+		},
+		.la = {
+			.reg = 0x3a4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x58,
+		.name = "gpusrd",
+		.swgroup = TEGRA_SWGROUP_GPU,
+		.smmu = {
+			/* read-only */
+			.reg = 0x230,
+			.bit = 24,
+		},
+		.la = {
+			.reg = 0x3c8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x1a,
+		},
+	}, {
+		.id = 0x59,
+		.name = "gpuswr",
+		.swgroup = TEGRA_SWGROUP_GPU,
+		.smmu = {
+			/* read-only */
+			.reg = 0x230,
+			.bit = 25,
+		},
+		.la = {
+			.reg = 0x3c8,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x5a,
+		.name = "displayt",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 26,
+		},
+		.la = {
+			.reg = 0x2f0,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x60,
+		.name = "sdmmcra",
+		.swgroup = TEGRA_SWGROUP_SDMMC1A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 0,
+		},
+		.la = {
+			.reg = 0x3b8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x49,
+		},
+	}, {
+		.id = 0x61,
+		.name = "sdmmcraa",
+		.swgroup = TEGRA_SWGROUP_SDMMC2A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x3bc,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x49,
+		},
+	}, {
+		.id = 0x62,
+		.name = "sdmmcr",
+		.swgroup = TEGRA_SWGROUP_SDMMC3A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x3c0,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x49,
+		},
+	}, {
+		.id = 0x63,
+		.swgroup = TEGRA_SWGROUP_SDMMC4A,
+		.name = "sdmmcrab",
+		.smmu = {
+			.reg = 0x234,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x3c4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x49,
+		},
+	}, {
+		.id = 0x64,
+		.name = "sdmmcwa",
+		.swgroup = TEGRA_SWGROUP_SDMMC1A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x3b8,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x65,
+		.name = "sdmmcwaa",
+		.swgroup = TEGRA_SWGROUP_SDMMC2A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x3bc,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x66,
+		.name = "sdmmcw",
+		.swgroup = TEGRA_SWGROUP_SDMMC3A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 6,
+		},
+		.la = {
+			.reg = 0x3c0,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x67,
+		.name = "sdmmcwab",
+		.swgroup = TEGRA_SWGROUP_SDMMC4A,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 7,
+		},
+		.la = {
+			.reg = 0x3c4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x6c,
+		.name = "vicsrd",
+		.swgroup = TEGRA_SWGROUP_VIC,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 12,
+		},
+		.la = {
+			.reg = 0x394,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x1a,
+		},
+	}, {
+		.id = 0x6d,
+		.name = "vicswr",
+		.swgroup = TEGRA_SWGROUP_VIC,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 13,
+		},
+		.la = {
+			.reg = 0x394,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x72,
+		.name = "viw",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 18,
+		},
+		.la = {
+			.reg = 0x398,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x73,
+		.name = "displayd",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x234,
+			.bit = 19,
+		},
+		.la = {
+			.reg = 0x3c8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	},
+};
+
+static const struct tegra_smmu_swgroup tegra124_swgroups[] = {
+	{ .swgroup = TEGRA_SWGROUP_DC,        .reg = 0x240 },
+	{ .swgroup = TEGRA_SWGROUP_DCB,       .reg = 0x244 },
+	{ .swgroup = TEGRA_SWGROUP_AFI,       .reg = 0x238 },
+	{ .swgroup = TEGRA_SWGROUP_AVPC,      .reg = 0x23c },
+	{ .swgroup = TEGRA_SWGROUP_HDA,       .reg = 0x254 },
+	{ .swgroup = TEGRA_SWGROUP_HC,        .reg = 0x250 },
+	{ .swgroup = TEGRA_SWGROUP_MSENC,     .reg = 0x264 },
+	{ .swgroup = TEGRA_SWGROUP_PPCS,      .reg = 0x270 },
+	{ .swgroup = TEGRA_SWGROUP_SATA,      .reg = 0x274 },
+	{ .swgroup = TEGRA_SWGROUP_VDE,       .reg = 0x27c },
+	{ .swgroup = TEGRA_SWGROUP_ISP2,      .reg = 0x258 },
+	{ .swgroup = TEGRA_SWGROUP_XUSB_HOST, .reg = 0x288 },
+	{ .swgroup = TEGRA_SWGROUP_XUSB_DEV,  .reg = 0x28c },
+	{ .swgroup = TEGRA_SWGROUP_ISP2B,     .reg = 0xaa4 },
+	{ .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
+	{ .swgroup = TEGRA_SWGROUP_A9AVP,     .reg = 0x290 },
+	{ .swgroup = TEGRA_SWGROUP_GPU,       .reg = 0xaac },
+	{ .swgroup = TEGRA_SWGROUP_SDMMC1A,   .reg = 0xa94 },
+	{ .swgroup = TEGRA_SWGROUP_SDMMC2A,   .reg = 0xa98 },
+	{ .swgroup = TEGRA_SWGROUP_SDMMC3A,   .reg = 0xa9c },
+	{ .swgroup = TEGRA_SWGROUP_SDMMC4A,   .reg = 0xaa0 },
+	{ .swgroup = TEGRA_SWGROUP_VIC,       .reg = 0x284 },
+	{ .swgroup = TEGRA_SWGROUP_VI,        .reg = 0x280 },
+};
+
+#ifdef CONFIG_ARCH_TEGRA_124_SOC
+static void tegra124_flush_dcache(struct page *page, unsigned long offset,
+				  size_t size)
+{
+	phys_addr_t phys = page_to_phys(page) + offset;
+	void *virt = page_address(page) + offset;
+
+	__cpuc_flush_dcache_area(virt, size);
+	outer_flush_range(phys, phys + size);
+}
+
+static const struct tegra_smmu_ops tegra124_smmu_ops = {
+	.flush_dcache = tegra124_flush_dcache,
+};
+
+static const struct tegra_smmu_soc tegra124_smmu_soc = {
+	.clients = tegra124_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra124_mc_clients),
+	.swgroups = tegra124_swgroups,
+	.num_swgroups = ARRAY_SIZE(tegra124_swgroups),
+	.supports_round_robin_arbitration = true,
+	.supports_request_limit = true,
+	.num_asids = 128,
+	.ops = &tegra124_smmu_ops,
+};
+
+const struct tegra_mc_soc tegra124_mc_soc = {
+	.clients = tegra124_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra124_mc_clients),
+	.num_address_bits = 34,
+	.atom_size = 32,
+	.smmu = &tegra124_smmu_soc,
+};
+#endif /* CONFIG_ARCH_TEGRA_124_SOC */
diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
new file mode 100644
index 0000000..71fe937
--- /dev/null
+++ b/drivers/memory/tegra/tegra30.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (C) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+
+#include <dt-bindings/memory/tegra30-mc.h>
+
+#include "mc.h"
+
+static const struct tegra_mc_client tegra30_mc_clients[] = {
+	{
+		.id = 0x00,
+		.name = "ptcr",
+		.swgroup = TEGRA_SWGROUP_PTC,
+	}, {
+		.id = 0x01,
+		.name = "display0a",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x2e8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x02,
+		.name = "display0ab",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x2f4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x03,
+		.name = "display0b",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x2e8,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x04,
+		.name = "display0bb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x2f4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x05,
+		.name = "display0c",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x2ec,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x06,
+		.name = "display0cb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 6,
+		},
+		.la = {
+			.reg = 0x2f8,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x07,
+		.name = "display1b",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 7,
+		},
+		.la = {
+			.reg = 0x2ec,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x08,
+		.name = "display1bb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 8,
+		},
+		.la = {
+			.reg = 0x2f8,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x4e,
+		},
+	}, {
+		.id = 0x09,
+		.name = "eppup",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 9,
+		},
+		.la = {
+			.reg = 0x300,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x17,
+		},
+	}, {
+		.id = 0x0a,
+		.name = "g2pr",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 10,
+		},
+		.la = {
+			.reg = 0x308,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x09,
+		},
+	}, {
+		.id = 0x0b,
+		.name = "g2sr",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x308,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x09,
+		},
+	}, {
+		.id = 0x0c,
+		.name = "mpeunifbr",
+		.swgroup = TEGRA_SWGROUP_MPE,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 12,
+		},
+		.la = {
+			.reg = 0x328,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x0d,
+		.name = "viruv",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 13,
+		},
+		.la = {
+			.reg = 0x364,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x2c,
+		},
+	}, {
+		.id = 0x0e,
+		.name = "afir",
+		.swgroup = TEGRA_SWGROUP_AFI,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 14,
+		},
+		.la = {
+			.reg = 0x2e0,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x0f,
+		.name = "avpcarm7r",
+		.swgroup = TEGRA_SWGROUP_AVPC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 15,
+		},
+		.la = {
+			.reg = 0x2e4,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x10,
+		.name = "displayhc",
+		.swgroup = TEGRA_SWGROUP_DC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x2f0,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x11,
+		.name = "displayhcb",
+		.swgroup = TEGRA_SWGROUP_DCB,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x2fc,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x12,
+		.name = "fdcdrd",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 18,
+		},
+		.la = {
+			.reg = 0x334,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0a,
+		},
+	}, {
+		.id = 0x13,
+		.name = "fdcdrd2",
+		.swgroup = TEGRA_SWGROUP_NV2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 19,
+		},
+		.la = {
+			.reg = 0x33c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0a,
+		},
+	}, {
+		.id = 0x14,
+		.name = "g2dr",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 20,
+		},
+		.la = {
+			.reg = 0x30c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x0a,
+		},
+	}, {
+		.id = 0x15,
+		.name = "hdar",
+		.swgroup = TEGRA_SWGROUP_HDA,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x318,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x16,
+		.name = "host1xdmar",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x310,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x05,
+		},
+	}, {
+		.id = 0x17,
+		.name = "host1xr",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 23,
+		},
+		.la = {
+			.reg = 0x310,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x50,
+		},
+	}, {
+		.id = 0x18,
+		.name = "idxsrd",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 24,
+		},
+		.la = {
+			.reg = 0x334,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x13,
+		},
+	}, {
+		.id = 0x19,
+		.name = "idxsrd2",
+		.swgroup = TEGRA_SWGROUP_NV2,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 25,
+		},
+		.la = {
+			.reg = 0x33c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x13,
+		},
+	}, {
+		.id = 0x1a,
+		.name = "mpe_ipred",
+		.swgroup = TEGRA_SWGROUP_MPE,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 26,
+		},
+		.la = {
+			.reg = 0x328,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x80,
+		},
+	}, {
+		.id = 0x1b,
+		.name = "mpeamemrd",
+		.swgroup = TEGRA_SWGROUP_MPE,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 27,
+		},
+		.la = {
+			.reg = 0x32c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x42,
+		},
+	}, {
+		.id = 0x1c,
+		.name = "mpecsrd",
+		.swgroup = TEGRA_SWGROUP_MPE,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 28,
+		},
+		.la = {
+			.reg = 0x32c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x1d,
+		.name = "ppcsahbdmar",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 29,
+		},
+		.la = {
+			.reg = 0x344,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x1e,
+		.name = "ppcsahbslvr",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 30,
+		},
+		.la = {
+			.reg = 0x344,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x12,
+		},
+	}, {
+		.id = 0x1f,
+		.name = "satar",
+		.swgroup = TEGRA_SWGROUP_SATA,
+		.smmu = {
+			.reg = 0x228,
+			.bit = 31,
+		},
+		.la = {
+			.reg = 0x350,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x33,
+		},
+	}, {
+		.id = 0x20,
+		.name = "texsrd",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 0,
+		},
+		.la = {
+			.reg = 0x338,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x13,
+		},
+	}, {
+		.id = 0x21,
+		.name = "texsrd2",
+		.swgroup = TEGRA_SWGROUP_NV2,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x340,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x13,
+		},
+	}, {
+		.id = 0x22,
+		.name = "vdebsevr",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 2,
+		},
+		.la = {
+			.reg = 0x354,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x23,
+		.name = "vdember",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 3,
+		},
+		.la = {
+			.reg = 0x354,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xd0,
+		},
+	}, {
+		.id = 0x24,
+		.name = "vdemcer",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 4,
+		},
+		.la = {
+			.reg = 0x358,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x2a,
+		},
+	}, {
+		.id = 0x25,
+		.name = "vdetper",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 5,
+		},
+		.la = {
+			.reg = 0x358,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x74,
+		},
+	}, {
+		.id = 0x26,
+		.name = "mpcorelpr",
+		.swgroup = TEGRA_SWGROUP_MPCORELP,
+		.la = {
+			.reg = 0x324,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x27,
+		.name = "mpcorer",
+		.swgroup = TEGRA_SWGROUP_MPCORE,
+		.la = {
+			.reg = 0x320,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x04,
+		},
+	}, {
+		.id = 0x28,
+		.name = "eppu",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 8,
+		},
+		.la = {
+			.reg = 0x300,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x6c,
+		},
+	}, {
+		.id = 0x29,
+		.name = "eppv",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 9,
+		},
+		.la = {
+			.reg = 0x304,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x6c,
+		},
+	}, {
+		.id = 0x2a,
+		.name = "eppy",
+		.swgroup = TEGRA_SWGROUP_EPP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 10,
+		},
+		.la = {
+			.reg = 0x304,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x6c,
+		},
+	}, {
+		.id = 0x2b,
+		.name = "mpeunifbw",
+		.swgroup = TEGRA_SWGROUP_MPE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 11,
+		},
+		.la = {
+			.reg = 0x330,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x13,
+		},
+	}, {
+		.id = 0x2c,
+		.name = "viwsb",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 12,
+		},
+		.la = {
+			.reg = 0x364,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x12,
+		},
+	}, {
+		.id = 0x2d,
+		.name = "viwu",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 13,
+		},
+		.la = {
+			.reg = 0x368,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xb2,
+		},
+	}, {
+		.id = 0x2e,
+		.name = "viwv",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 14,
+		},
+		.la = {
+			.reg = 0x368,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xb2,
+		},
+	}, {
+		.id = 0x2f,
+		.name = "viwy",
+		.swgroup = TEGRA_SWGROUP_VI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 15,
+		},
+		.la = {
+			.reg = 0x36c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x12,
+		},
+	}, {
+		.id = 0x30,
+		.name = "g2dw",
+		.swgroup = TEGRA_SWGROUP_G2,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 16,
+		},
+		.la = {
+			.reg = 0x30c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x9,
+		},
+	}, {
+		.id = 0x31,
+		.name = "afiw",
+		.swgroup = TEGRA_SWGROUP_AFI,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 17,
+		},
+		.la = {
+			.reg = 0x2e0,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0c,
+		},
+	}, {
+		.id = 0x32,
+		.name = "avpcarm7w",
+		.swgroup = TEGRA_SWGROUP_AVPC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 18,
+		},
+		.la = {
+			.reg = 0x2e4,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0e,
+		},
+	}, {
+		.id = 0x33,
+		.name = "fdcdwr",
+		.swgroup = TEGRA_SWGROUP_NV,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 19,
+		},
+		.la = {
+			.reg = 0x338,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0a,
+		},
+	}, {
+		.id = 0x34,
+		.name = "fdcwr2",
+		.swgroup = TEGRA_SWGROUP_NV2,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 20,
+		},
+		.la = {
+			.reg = 0x340,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0a,
+		},
+	}, {
+		.id = 0x35,
+		.name = "hdaw",
+		.swgroup = TEGRA_SWGROUP_HDA,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 21,
+		},
+		.la = {
+			.reg = 0x318,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x36,
+		.name = "host1xw",
+		.swgroup = TEGRA_SWGROUP_HC,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 22,
+		},
+		.la = {
+			.reg = 0x314,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x37,
+		.name = "ispw",
+		.swgroup = TEGRA_SWGROUP_ISP,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 23,
+		},
+		.la = {
+			.reg = 0x31c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x38,
+		.name = "mpcorelpw",
+		.swgroup = TEGRA_SWGROUP_MPCORELP,
+		.la = {
+			.reg = 0x324,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0e,
+		},
+	}, {
+		.id = 0x39,
+		.name = "mpcorew",
+		.swgroup = TEGRA_SWGROUP_MPCORE,
+		.la = {
+			.reg = 0x320,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x0e,
+		},
+	}, {
+		.id = 0x3a,
+		.name = "mpecswr",
+		.swgroup = TEGRA_SWGROUP_MPE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 26,
+		},
+		.la = {
+			.reg = 0x330,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x3b,
+		.name = "ppcsahbdmaw",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 27,
+		},
+		.la = {
+			.reg = 0x348,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x10,
+		},
+	}, {
+		.id = 0x3c,
+		.name = "ppcsahbslvw",
+		.swgroup = TEGRA_SWGROUP_PPCS,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 28,
+		},
+		.la = {
+			.reg = 0x348,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x06,
+		},
+	}, {
+		.id = 0x3d,
+		.name = "sataw",
+		.swgroup = TEGRA_SWGROUP_SATA,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 29,
+		},
+		.la = {
+			.reg = 0x350,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x33,
+		},
+	}, {
+		.id = 0x3e,
+		.name = "vdebsevw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 30,
+		},
+		.la = {
+			.reg = 0x35c,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x3f,
+		.name = "vdedbgw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x22c,
+			.bit = 31,
+		},
+		.la = {
+			.reg = 0x35c,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0xff,
+		},
+	}, {
+		.id = 0x40,
+		.name = "vdembew",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 0,
+		},
+		.la = {
+			.reg = 0x360,
+			.shift = 0,
+			.mask = 0xff,
+			.def = 0x42,
+		},
+	}, {
+		.id = 0x41,
+		.name = "vdetpmw",
+		.swgroup = TEGRA_SWGROUP_VDE,
+		.smmu = {
+			.reg = 0x230,
+			.bit = 1,
+		},
+		.la = {
+			.reg = 0x360,
+			.shift = 16,
+			.mask = 0xff,
+			.def = 0x2a,
+		},
+	},
+};
+
+static const struct tegra_smmu_swgroup tegra30_swgroups[] = {
+	{ .swgroup = TEGRA_SWGROUP_DC,   .reg = 0x240 },
+	{ .swgroup = TEGRA_SWGROUP_DCB,  .reg = 0x244 },
+	{ .swgroup = TEGRA_SWGROUP_EPP,  .reg = 0x248 },
+	{ .swgroup = TEGRA_SWGROUP_G2,   .reg = 0x24c },
+	{ .swgroup = TEGRA_SWGROUP_MPE,  .reg = 0x264 },
+	{ .swgroup = TEGRA_SWGROUP_VI,   .reg = 0x280 },
+	{ .swgroup = TEGRA_SWGROUP_AFI,  .reg = 0x238 },
+	{ .swgroup = TEGRA_SWGROUP_AVPC, .reg = 0x23c },
+	{ .swgroup = TEGRA_SWGROUP_NV,   .reg = 0x268 },
+	{ .swgroup = TEGRA_SWGROUP_NV2,  .reg = 0x26c },
+	{ .swgroup = TEGRA_SWGROUP_HDA,  .reg = 0x254 },
+	{ .swgroup = TEGRA_SWGROUP_HC,   .reg = 0x250 },
+	{ .swgroup = TEGRA_SWGROUP_PPCS, .reg = 0x270 },
+	{ .swgroup = TEGRA_SWGROUP_SATA, .reg = 0x278 },
+	{ .swgroup = TEGRA_SWGROUP_VDE,  .reg = 0x27c },
+	{ .swgroup = TEGRA_SWGROUP_ISP,  .reg = 0x258 },
+};
+
+static void tegra30_flush_dcache(struct page *page, unsigned long offset,
+				 size_t size)
+{
+	phys_addr_t phys = page_to_phys(page) + offset;
+	void *virt = page_address(page) + offset;
+
+	__cpuc_flush_dcache_area(virt, size);
+	outer_flush_range(phys, phys + size);
+}
+
+static const struct tegra_smmu_ops tegra30_smmu_ops = {
+	.flush_dcache = tegra30_flush_dcache,
+};
+
+static const struct tegra_smmu_soc tegra30_smmu_soc = {
+	.clients = tegra30_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra30_mc_clients),
+	.swgroups = tegra30_swgroups,
+	.num_swgroups = ARRAY_SIZE(tegra30_swgroups),
+	.supports_round_robin_arbitration = false,
+	.supports_request_limit = false,
+	.num_asids = 4,
+	.ops = &tegra30_smmu_ops,
+};
+
+const struct tegra_mc_soc tegra30_mc_soc = {
+	.clients = tegra30_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra30_mc_clients),
+	.num_address_bits = 32,
+	.atom_size = 16,
+	.smmu = &tegra30_smmu_soc,
+};
diff --git a/drivers/memory/tegra30-mc.c b/drivers/memory/tegra30-mc.c
deleted file mode 100644
index ef79345..0000000
--- a/drivers/memory/tegra30-mc.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Tegra30 Memory Controller
- *
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ratelimit.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-
-#define DRV_NAME "tegra30-mc"
-
-#define MC_INTSTATUS			0x0
-#define MC_INTMASK			0x4
-
-#define MC_INT_ERR_SHIFT		6
-#define MC_INT_ERR_MASK			(0x1f << MC_INT_ERR_SHIFT)
-#define MC_INT_DECERR_EMEM		BIT(MC_INT_ERR_SHIFT)
-#define MC_INT_SECURITY_VIOLATION	BIT(MC_INT_ERR_SHIFT + 2)
-#define MC_INT_ARBITRATION_EMEM		BIT(MC_INT_ERR_SHIFT + 3)
-#define MC_INT_INVALID_SMMU_PAGE	BIT(MC_INT_ERR_SHIFT + 4)
-
-#define MC_ERR_STATUS			0x8
-#define MC_ERR_ADR			0xc
-
-#define MC_ERR_TYPE_SHIFT		28
-#define MC_ERR_TYPE_MASK		(7 << MC_ERR_TYPE_SHIFT)
-#define MC_ERR_TYPE_DECERR_EMEM		2
-#define MC_ERR_TYPE_SECURITY_TRUSTZONE	3
-#define MC_ERR_TYPE_SECURITY_CARVEOUT	4
-#define MC_ERR_TYPE_INVALID_SMMU_PAGE	6
-
-#define MC_ERR_INVALID_SMMU_PAGE_SHIFT	25
-#define MC_ERR_INVALID_SMMU_PAGE_MASK	(7 << MC_ERR_INVALID_SMMU_PAGE_SHIFT)
-#define MC_ERR_RW_SHIFT			16
-#define MC_ERR_RW			BIT(MC_ERR_RW_SHIFT)
-#define MC_ERR_SECURITY			BIT(MC_ERR_RW_SHIFT + 1)
-
-#define SECURITY_VIOLATION_TYPE		BIT(30)	/* 0=TRUSTZONE, 1=CARVEOUT */
-
-#define MC_EMEM_ARB_CFG			0x90
-#define MC_EMEM_ARB_OUTSTANDING_REQ	0x94
-#define MC_EMEM_ARB_TIMING_RCD		0x98
-#define MC_EMEM_ARB_TIMING_RP		0x9c
-#define MC_EMEM_ARB_TIMING_RC		0xa0
-#define MC_EMEM_ARB_TIMING_RAS		0xa4
-#define MC_EMEM_ARB_TIMING_FAW		0xa8
-#define MC_EMEM_ARB_TIMING_RRD		0xac
-#define MC_EMEM_ARB_TIMING_RAP2PRE	0xb0
-#define MC_EMEM_ARB_TIMING_WAP2PRE	0xb4
-#define MC_EMEM_ARB_TIMING_R2R		0xb8
-#define MC_EMEM_ARB_TIMING_W2W		0xbc
-#define MC_EMEM_ARB_TIMING_R2W		0xc0
-#define MC_EMEM_ARB_TIMING_W2R		0xc4
-
-#define MC_EMEM_ARB_DA_TURNS		0xd0
-#define MC_EMEM_ARB_DA_COVERS		0xd4
-#define MC_EMEM_ARB_MISC0		0xd8
-#define MC_EMEM_ARB_MISC1		0xdc
-
-#define MC_EMEM_ARB_RING3_THROTTLE	0xe4
-#define MC_EMEM_ARB_OVERRIDE		0xe8
-
-#define MC_TIMING_CONTROL		0xfc
-
-#define MC_CLIENT_ID_MASK		0x7f
-
-#define NUM_MC_REG_BANKS		4
-
-struct tegra30_mc {
-	void __iomem *regs[NUM_MC_REG_BANKS];
-	struct device *dev;
-	u32 ctx[0];
-};
-
-static inline u32 mc_readl(struct tegra30_mc *mc, u32 offs)
-{
-	u32 val = 0;
-
-	if (offs < 0x10)
-		val = readl(mc->regs[0] + offs);
-	else if (offs < 0x1f0)
-		val = readl(mc->regs[1] + offs - 0x3c);
-	else if (offs < 0x228)
-		val = readl(mc->regs[2] + offs - 0x200);
-	else if (offs < 0x400)
-		val = readl(mc->regs[3] + offs - 0x284);
-
-	return val;
-}
-
-static inline void mc_writel(struct tegra30_mc *mc, u32 val, u32 offs)
-{
-	if (offs < 0x10)
-		writel(val, mc->regs[0] + offs);
-	else if (offs < 0x1f0)
-		writel(val, mc->regs[1] + offs - 0x3c);
-	else if (offs < 0x228)
-		writel(val, mc->regs[2] + offs - 0x200);
-	else if (offs < 0x400)
-		writel(val, mc->regs[3] + offs - 0x284);
-}
-
-static const char * const tegra30_mc_client[] = {
-	"csr_ptcr",
-	"cbr_display0a",
-	"cbr_display0ab",
-	"cbr_display0b",
-	"cbr_display0bb",
-	"cbr_display0c",
-	"cbr_display0cb",
-	"cbr_display1b",
-	"cbr_display1bb",
-	"cbr_eppup",
-	"cbr_g2pr",
-	"cbr_g2sr",
-	"cbr_mpeunifbr",
-	"cbr_viruv",
-	"csr_afir",
-	"csr_avpcarm7r",
-	"csr_displayhc",
-	"csr_displayhcb",
-	"csr_fdcdrd",
-	"csr_fdcdrd2",
-	"csr_g2dr",
-	"csr_hdar",
-	"csr_host1xdmar",
-	"csr_host1xr",
-	"csr_idxsrd",
-	"csr_idxsrd2",
-	"csr_mpe_ipred",
-	"csr_mpeamemrd",
-	"csr_mpecsrd",
-	"csr_ppcsahbdmar",
-	"csr_ppcsahbslvr",
-	"csr_satar",
-	"csr_texsrd",
-	"csr_texsrd2",
-	"csr_vdebsevr",
-	"csr_vdember",
-	"csr_vdemcer",
-	"csr_vdetper",
-	"csr_mpcorelpr",
-	"csr_mpcorer",
-	"cbw_eppu",
-	"cbw_eppv",
-	"cbw_eppy",
-	"cbw_mpeunifbw",
-	"cbw_viwsb",
-	"cbw_viwu",
-	"cbw_viwv",
-	"cbw_viwy",
-	"ccw_g2dw",
-	"csw_afiw",
-	"csw_avpcarm7w",
-	"csw_fdcdwr",
-	"csw_fdcdwr2",
-	"csw_hdaw",
-	"csw_host1xw",
-	"csw_ispw",
-	"csw_mpcorelpw",
-	"csw_mpcorew",
-	"csw_mpecswr",
-	"csw_ppcsahbdmaw",
-	"csw_ppcsahbslvw",
-	"csw_sataw",
-	"csw_vdebsevw",
-	"csw_vdedbgw",
-	"csw_vdembew",
-	"csw_vdetpmw",
-};
-
-static void tegra30_mc_decode(struct tegra30_mc *mc, int n)
-{
-	u32 err, addr;
-	const char * const mc_int_err[] = {
-		"MC_DECERR",
-		"Unknown",
-		"MC_SECURITY_ERR",
-		"MC_ARBITRATION_EMEM",
-		"MC_SMMU_ERR",
-	};
-	const char * const err_type[] = {
-		"Unknown",
-		"Unknown",
-		"DECERR_EMEM",
-		"SECURITY_TRUSTZONE",
-		"SECURITY_CARVEOUT",
-		"Unknown",
-		"INVALID_SMMU_PAGE",
-		"Unknown",
-	};
-	char attr[6];
-	int cid, perm, type, idx;
-	const char *client = "Unknown";
-
-	idx = n - MC_INT_ERR_SHIFT;
-	if ((idx < 0) || (idx >= ARRAY_SIZE(mc_int_err)) || (idx == 1)) {
-		dev_err_ratelimited(mc->dev, "Unknown interrupt status %08lx\n",
-				    BIT(n));
-		return;
-	}
-
-	err = mc_readl(mc, MC_ERR_STATUS);
-
-	type = (err & MC_ERR_TYPE_MASK) >> MC_ERR_TYPE_SHIFT;
-	perm = (err & MC_ERR_INVALID_SMMU_PAGE_MASK) >>
-		MC_ERR_INVALID_SMMU_PAGE_SHIFT;
-	if (type == MC_ERR_TYPE_INVALID_SMMU_PAGE)
-		sprintf(attr, "%c-%c-%c",
-			(perm & BIT(2)) ? 'R' : '-',
-			(perm & BIT(1)) ? 'W' : '-',
-			(perm & BIT(0)) ? 'S' : '-');
-	else
-		attr[0] = '\0';
-
-	cid = err & MC_CLIENT_ID_MASK;
-	if (cid < ARRAY_SIZE(tegra30_mc_client))
-		client = tegra30_mc_client[cid];
-
-	addr = mc_readl(mc, MC_ERR_ADR);
-
-	dev_err_ratelimited(mc->dev, "%s (0x%08x): 0x%08x %s (%s %s %s %s)\n",
-			   mc_int_err[idx], err, addr, client,
-			   (err & MC_ERR_SECURITY) ? "secure" : "non-secure",
-			   (err & MC_ERR_RW) ? "write" : "read",
-			   err_type[type], attr);
-}
-
-static const u32 tegra30_mc_ctx[] = {
-	MC_EMEM_ARB_CFG,
-	MC_EMEM_ARB_OUTSTANDING_REQ,
-	MC_EMEM_ARB_TIMING_RCD,
-	MC_EMEM_ARB_TIMING_RP,
-	MC_EMEM_ARB_TIMING_RC,
-	MC_EMEM_ARB_TIMING_RAS,
-	MC_EMEM_ARB_TIMING_FAW,
-	MC_EMEM_ARB_TIMING_RRD,
-	MC_EMEM_ARB_TIMING_RAP2PRE,
-	MC_EMEM_ARB_TIMING_WAP2PRE,
-	MC_EMEM_ARB_TIMING_R2R,
-	MC_EMEM_ARB_TIMING_W2W,
-	MC_EMEM_ARB_TIMING_R2W,
-	MC_EMEM_ARB_TIMING_W2R,
-	MC_EMEM_ARB_DA_TURNS,
-	MC_EMEM_ARB_DA_COVERS,
-	MC_EMEM_ARB_MISC0,
-	MC_EMEM_ARB_MISC1,
-	MC_EMEM_ARB_RING3_THROTTLE,
-	MC_EMEM_ARB_OVERRIDE,
-	MC_INTMASK,
-};
-
-#ifdef CONFIG_PM
-static int tegra30_mc_suspend(struct device *dev)
-{
-	int i;
-	struct tegra30_mc *mc = dev_get_drvdata(dev);
-
-	for (i = 0; i < ARRAY_SIZE(tegra30_mc_ctx); i++)
-		mc->ctx[i] = mc_readl(mc, tegra30_mc_ctx[i]);
-	return 0;
-}
-
-static int tegra30_mc_resume(struct device *dev)
-{
-	int i;
-	struct tegra30_mc *mc = dev_get_drvdata(dev);
-
-	for (i = 0; i < ARRAY_SIZE(tegra30_mc_ctx); i++)
-		mc_writel(mc, mc->ctx[i], tegra30_mc_ctx[i]);
-
-	mc_writel(mc, 1, MC_TIMING_CONTROL);
-	/* Read-back to ensure that write reached */
-	mc_readl(mc, MC_TIMING_CONTROL);
-	return 0;
-}
-#endif
-
-static UNIVERSAL_DEV_PM_OPS(tegra30_mc_pm,
-			    tegra30_mc_suspend,
-			    tegra30_mc_resume, NULL);
-
-static const struct of_device_id tegra30_mc_of_match[] = {
-	{ .compatible = "nvidia,tegra30-mc", },
-	{},
-};
-
-static irqreturn_t tegra30_mc_isr(int irq, void *data)
-{
-	u32 stat, mask, bit;
-	struct tegra30_mc *mc = data;
-
-	stat = mc_readl(mc, MC_INTSTATUS);
-	mask = mc_readl(mc, MC_INTMASK);
-	mask &= stat;
-	if (!mask)
-		return IRQ_NONE;
-	while ((bit = ffs(mask)) != 0) {
-		tegra30_mc_decode(mc, bit - 1);
-		mask &= ~BIT(bit - 1);
-	}
-
-	mc_writel(mc, stat, MC_INTSTATUS);
-	return IRQ_HANDLED;
-}
-
-static int tegra30_mc_probe(struct platform_device *pdev)
-{
-	struct resource *irq;
-	struct tegra30_mc *mc;
-	size_t bytes;
-	int err, i;
-	u32 intmask;
-
-	bytes = sizeof(*mc) + sizeof(u32) * ARRAY_SIZE(tegra30_mc_ctx);
-	mc = devm_kzalloc(&pdev->dev, bytes, GFP_KERNEL);
-	if (!mc)
-		return -ENOMEM;
-	mc->dev = &pdev->dev;
-
-	for (i = 0; i < ARRAY_SIZE(mc->regs); i++) {
-		struct resource *res;
-
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		mc->regs[i] = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(mc->regs[i]))
-			return PTR_ERR(mc->regs[i]);
-	}
-
-	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!irq)
-		return -ENODEV;
-	err = devm_request_irq(&pdev->dev, irq->start, tegra30_mc_isr,
-			       IRQF_SHARED, dev_name(&pdev->dev), mc);
-	if (err)
-		return -ENODEV;
-
-	platform_set_drvdata(pdev, mc);
-
-	intmask = MC_INT_INVALID_SMMU_PAGE |
-		MC_INT_DECERR_EMEM | MC_INT_SECURITY_VIOLATION;
-	mc_writel(mc, intmask, MC_INTMASK);
-	return 0;
-}
-
-static struct platform_driver tegra30_mc_driver = {
-	.probe = tegra30_mc_probe,
-	.driver = {
-		.name = DRV_NAME,
-		.owner = THIS_MODULE,
-		.of_match_table = tegra30_mc_of_match,
-		.pm = &tegra30_mc_pm,
-	},
-};
-module_platform_driver(tegra30_mc_driver);
-
-MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
-MODULE_DESCRIPTION("Tegra30 MC driver");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/dt-bindings/memory/tegra114-mc.h b/include/dt-bindings/memory/tegra114-mc.h
new file mode 100644
index 0000000..8f48985
--- /dev/null
+++ b/include/dt-bindings/memory/tegra114-mc.h
@@ -0,0 +1,25 @@
+#ifndef DT_BINDINGS_MEMORY_TEGRA114_MC_H
+#define DT_BINDINGS_MEMORY_TEGRA114_MC_H
+
+#define TEGRA_SWGROUP_PTC	0
+#define TEGRA_SWGROUP_DC	1
+#define TEGRA_SWGROUP_DCB	2
+#define TEGRA_SWGROUP_EPP	3
+#define TEGRA_SWGROUP_G2	4
+#define TEGRA_SWGROUP_AVPC	5
+#define TEGRA_SWGROUP_NV	6
+#define TEGRA_SWGROUP_HDA	7
+#define TEGRA_SWGROUP_HC	8
+#define TEGRA_SWGROUP_MSENC	9
+#define TEGRA_SWGROUP_PPCS	10
+#define TEGRA_SWGROUP_VDE	11
+#define TEGRA_SWGROUP_MPCORELP	12
+#define TEGRA_SWGROUP_MPCORE	13
+#define TEGRA_SWGROUP_VI	14
+#define TEGRA_SWGROUP_ISP	15
+#define TEGRA_SWGROUP_XUSB_HOST	16
+#define TEGRA_SWGROUP_XUSB_DEV	17
+#define TEGRA_SWGROUP_EMUCIF	18
+#define TEGRA_SWGROUP_TSEC	19
+
+#endif
diff --git a/include/dt-bindings/memory/tegra124-mc.h b/include/dt-bindings/memory/tegra124-mc.h
new file mode 100644
index 0000000..7d8ee79
--- /dev/null
+++ b/include/dt-bindings/memory/tegra124-mc.h
@@ -0,0 +1,31 @@
+#ifndef DT_BINDINGS_MEMORY_TEGRA124_MC_H
+#define DT_BINDINGS_MEMORY_TEGRA124_MC_H
+
+#define TEGRA_SWGROUP_PTC	0
+#define TEGRA_SWGROUP_DC	1
+#define TEGRA_SWGROUP_DCB	2
+#define TEGRA_SWGROUP_AFI	3
+#define TEGRA_SWGROUP_AVPC	4
+#define TEGRA_SWGROUP_HDA	5
+#define TEGRA_SWGROUP_HC	6
+#define TEGRA_SWGROUP_MSENC	7
+#define TEGRA_SWGROUP_PPCS	8
+#define TEGRA_SWGROUP_SATA	9
+#define TEGRA_SWGROUP_VDE	10
+#define TEGRA_SWGROUP_MPCORELP	11
+#define TEGRA_SWGROUP_MPCORE	12
+#define TEGRA_SWGROUP_ISP2	13
+#define TEGRA_SWGROUP_XUSB_HOST	14
+#define TEGRA_SWGROUP_XUSB_DEV	15
+#define TEGRA_SWGROUP_ISP2B	16
+#define TEGRA_SWGROUP_TSEC	17
+#define TEGRA_SWGROUP_A9AVP	18
+#define TEGRA_SWGROUP_GPU	19
+#define TEGRA_SWGROUP_SDMMC1A	20
+#define TEGRA_SWGROUP_SDMMC2A	21
+#define TEGRA_SWGROUP_SDMMC3A	22
+#define TEGRA_SWGROUP_SDMMC4A	23
+#define TEGRA_SWGROUP_VIC	24
+#define TEGRA_SWGROUP_VI	25
+
+#endif
diff --git a/include/dt-bindings/memory/tegra30-mc.h b/include/dt-bindings/memory/tegra30-mc.h
new file mode 100644
index 0000000..502beb0
--- /dev/null
+++ b/include/dt-bindings/memory/tegra30-mc.h
@@ -0,0 +1,24 @@
+#ifndef DT_BINDINGS_MEMORY_TEGRA30_MC_H
+#define DT_BINDINGS_MEMORY_TEGRA30_MC_H
+
+#define TEGRA_SWGROUP_PTC	0
+#define TEGRA_SWGROUP_DC	1
+#define TEGRA_SWGROUP_DCB	2
+#define TEGRA_SWGROUP_EPP	3
+#define TEGRA_SWGROUP_G2	4
+#define TEGRA_SWGROUP_MPE	5
+#define TEGRA_SWGROUP_VI	6
+#define TEGRA_SWGROUP_AFI	7
+#define TEGRA_SWGROUP_AVPC	8
+#define TEGRA_SWGROUP_NV	9
+#define TEGRA_SWGROUP_NV2	10
+#define TEGRA_SWGROUP_HDA	11
+#define TEGRA_SWGROUP_HC	12
+#define TEGRA_SWGROUP_PPCS	13
+#define TEGRA_SWGROUP_SATA	14
+#define TEGRA_SWGROUP_VDE	15
+#define TEGRA_SWGROUP_MPCORELP	16
+#define TEGRA_SWGROUP_MPCORE	17
+#define TEGRA_SWGROUP_ISP	18
+
+#endif
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
new file mode 100644
index 0000000..63deb8d
--- /dev/null
+++ b/include/soc/tegra/mc.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2014 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __SOC_TEGRA_MC_H__
+#define __SOC_TEGRA_MC_H__
+
+#include <linux/types.h>
+
+struct clk;
+struct device;
+struct page;
+
+struct tegra_smmu_enable {
+	unsigned int reg;
+	unsigned int bit;
+};
+
+/* latency allowance */
+struct tegra_mc_la {
+	unsigned int reg;
+	unsigned int shift;
+	unsigned int mask;
+	unsigned int def;
+};
+
+struct tegra_mc_client {
+	unsigned int id;
+	const char *name;
+	unsigned int swgroup;
+
+	unsigned int fifo_size;
+
+	struct tegra_smmu_enable smmu;
+	struct tegra_mc_la la;
+};
+
+struct tegra_smmu_swgroup {
+	unsigned int swgroup;
+	unsigned int reg;
+};
+
+struct tegra_smmu_ops {
+	void (*flush_dcache)(struct page *page, unsigned long offset,
+			     size_t size);
+};
+
+struct tegra_smmu_soc {
+	const struct tegra_mc_client *clients;
+	unsigned int num_clients;
+
+	const struct tegra_smmu_swgroup *swgroups;
+	unsigned int num_swgroups;
+
+	bool supports_round_robin_arbitration;
+	bool supports_request_limit;
+
+	unsigned int num_asids;
+
+	const struct tegra_smmu_ops *ops;
+};
+
+struct tegra_mc;
+struct tegra_smmu;
+
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+struct tegra_smmu *tegra_smmu_probe(struct device *dev,
+				    const struct tegra_smmu_soc *soc,
+				    struct tegra_mc *mc);
+#else
+static inline struct tegra_smmu *
+tegra_smmu_probe(struct device *dev, const struct tegra_smmu_soc *soc,
+		 struct tegra_mc *mc)
+{
+	return NULL;
+}
+#endif
+
+struct tegra_mc_soc {
+	const struct tegra_mc_client *clients;
+	unsigned int num_clients;
+
+	const unsigned int *emem_regs;
+	unsigned int num_emem_regs;
+
+	unsigned int num_address_bits;
+	unsigned int atom_size;
+
+	const struct tegra_smmu_soc *smmu;
+};
+
+struct tegra_mc {
+	struct device *dev;
+	struct tegra_smmu *smmu;
+	void __iomem *regs;
+	struct clk *clk;
+	int irq;
+
+	const struct tegra_mc_soc *soc;
+	unsigned long tick;
+};
+
+#endif /* __SOC_TEGRA_MC_H__ */
-- 
cgit v0.10.2


From 0b46b8a718c6e90910a1b1b0fe797be3c167e186 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Sun, 23 Nov 2014 23:02:44 -0800
Subject: clocksource: arch_timer: Fix code to use physical timers when
 requested

This is a bug fix for using physical arch timers when
the arch_timer_use_virtual boolean is false.  It restores the
arch_counter_get_cntpct() function after removal in

0d651e4e "clocksource: arch_timer: use virtual counters"

We need this on certain ARMv7 systems which are architected like this:

* The firmware doesn't know and doesn't care about hypervisor mode and
  we don't want to add the complexity of hypervisor there.

* The firmware isn't involved in SMP bringup or resume.

* The ARCH timer come up with an uninitialized offset between the
  virtual and physical counters.  Each core gets a different random
  offset.

* The device boots in "Secure SVC" mode.

* Nothing has touched the reset value of CNTHCTL.PL1PCEN or
  CNTHCTL.PL1PCTEN (both default to 1 at reset)

One example of such as system is RK3288 where it is much simpler to
use the physical counter since there's nobody managing the offset and
each time a core goes down and comes back up it will get reinitialized
to some other random value.

Fixes: 0d651e4e65e9 ("clocksource: arch_timer: use virtual counters")
Cc: stable@vger.kernel.org
Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>

diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 92793ba..d4ebf56 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -78,6 +78,15 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
+static inline u64 arch_counter_get_cntpct(void)
+{
+	u64 cval;
+
+	isb();
+	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
+	return cval;
+}
+
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index f190971..b1fa4e6 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -104,6 +104,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
 	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
 }
 
+static inline u64 arch_counter_get_cntpct(void)
+{
+	/*
+	 * AArch64 kernel and user space mandate the use of CNTVCT.
+	 */
+	BUG();
+	return 0;
+}
+
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 2133f9d..55256e4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -462,7 +462,10 @@ static void __init arch_counter_register(unsigned type)
 
 	/* Register the CP15 based counter if we have one */
 	if (type & ARCH_CP15_TIMER) {
-		arch_timer_read_counter = arch_counter_get_cntvct;
+		if (arch_timer_use_virtual)
+			arch_timer_read_counter = arch_counter_get_cntvct;
+		else
+			arch_timer_read_counter = arch_counter_get_cntpct;
 	} else {
 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
 
-- 
cgit v0.10.2


From 65b5732d241b8b39e07653794eefffd0d8028cbb Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Wed, 8 Oct 2014 00:33:47 -0700
Subject: clocksource: arch_timer: Allow the device tree to specify
 uninitialized timer registers

Some 32-bit (ARMv7) systems are architected like this:

* The firmware doesn't know and doesn't care about hypervisor mode and
  we don't want to add the complexity of hypervisor there.

* The firmware isn't involved in SMP bringup or resume.

* The ARCH timer come up with an uninitialized offset (CNTVOFF)
  between the virtual and physical counters.  Each core gets a
  different random offset.

* The device boots in "Secure SVC" mode.

* Nothing has touched the reset value of CNTHCTL.PL1PCEN or
  CNTHCTL.PL1PCTEN (both default to 1 at reset)

On systems like the above, it doesn't make sense to use the virtual
counter.  There's nobody managing the offset and each time a core goes
down and comes back up it will get reinitialized to some other random
value.

This adds an optional property which can inform the kernel of this
situation, and firmware is free to remove the property if it is going
to initialize the CNTVOFF registers when each CPU comes out of reset.

Currently, the best course of action in this case is to use the
physical timer, which is why it is important that CNTHCTL hasn't been
changed from its reset value and it's a reasonable assumption given
that the firmware has never entered HYP mode.

Note that it's been said that on ARMv8 systems the firmware and
kernel really can't be architected as described above.  That means
using the physical timer like this really only makes sense for ARMv7
systems.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Olof Johansson <olof@lixom.net>

diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index 37b2caf..256b4d8 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -22,6 +22,14 @@ to deliver its interrupts via SPIs.
 - always-on : a boolean property. If present, the timer is powered through an
   always-on power domain, therefore it never loses context.
 
+** Optional properties:
+
+- arm,cpu-registers-not-fw-configured : Firmware does not initialize
+  any of the generic timer CPU registers, which contain their
+  architecturally-defined reset values. Only supported for 32-bit
+  systems which follow the ARMv7 architected reset values.
+
+
 Example:
 
 	timer {
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 55256e4..6967cb0 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -705,6 +705,14 @@ static void __init arch_timer_init(struct device_node *np)
 	arch_timer_detect_rate(NULL, np);
 
 	/*
+	 * If we cannot rely on firmware initializing the timer registers then
+	 * we should use the physical timers instead.
+	 */
+	if (IS_ENABLED(CONFIG_ARM) &&
+	    of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+			arch_timer_use_virtual = false;
+
+	/*
 	 * If HYP mode is available, we know that the physical timer
 	 * has been configured to be accessible from PL1. Use it, so
 	 * that a guest can use the virtual timer instead.
-- 
cgit v0.10.2