From cb9d7707cd9be57830f31616233f6a872ca8416d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 12 Jul 2010 21:50:59 +0100
Subject: ARM: 6222/1: add memory types for the TCMs

The earlier TCM memory regions were mapped as MT_MEMORY_UNCACHED
which doesn't really work on platforms supporting the new v6
features like the NX bit. Add unique MT_MEMORY_[I|D]TCM types
instead.

Cc: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index 742c2aaeb..d2fedb5 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -27,6 +27,8 @@ struct map_desc {
 #define MT_MEMORY		9
 #define MT_ROM			10
 #define MT_MEMORY_NONCACHED	11
+#define MT_MEMORY_DTCM		12
+#define MT_MEMORY_ITCM		13
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index e503038..0c62aa2 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -13,7 +13,6 @@
 #include <linux/ioport.h>
 #include <linux/genalloc.h>
 #include <linux/string.h> /* memcpy */
-#include <asm/page.h> /* PAGE_SHIFT */
 #include <asm/cputype.h>
 #include <asm/mach/map.h>
 #include <mach/memory.h>
@@ -53,7 +52,7 @@ static struct map_desc dtcm_iomap[] __initdata = {
 		.virtual	= DTCM_OFFSET,
 		.pfn		= __phys_to_pfn(DTCM_OFFSET),
 		.length		= (DTCM_END - DTCM_OFFSET + 1),
-		.type		= MT_UNCACHED
+		.type		= MT_MEMORY_DTCM
 	}
 };
 
@@ -62,7 +61,7 @@ static struct map_desc itcm_iomap[] __initdata = {
 		.virtual	= ITCM_OFFSET,
 		.pfn		= __phys_to_pfn(ITCM_OFFSET),
 		.length		= (ITCM_END - ITCM_OFFSET + 1),
-		.type		= MT_UNCACHED
+		.type		= MT_MEMORY_ITCM
 	}
 };
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 2858941..e534801 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -258,6 +258,19 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_DTCM] = {
+		.prot_pte	= L_PTE_PRESENT | L_PTE_YOUNG |
+		                  L_PTE_DIRTY | L_PTE_WRITE,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
+		.domain		= DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_ITCM] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_USER | L_PTE_EXEC,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_IO,
+	},
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
-- 
cgit v0.10.2


From 598509779e5b8037d371df764d7438744a24b61f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 12 Jul 2010 21:51:41 +0100
Subject: ARM: 6223/1: support multiple TCM banks

CPUs v6 and up support multiple TCM banks, for example an ITCM of
8k is supplied in two 4k banks. This makes the TCM work on the
1176JZF-S devchip.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index 0c62aa2..f2ead32 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -92,14 +92,24 @@ void tcm_free(void *addr, size_t len)
 }
 EXPORT_SYMBOL(tcm_free);
 
-
-static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size)
+static void __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
+				  u32 offset, u32 expected_size)
 {
 	const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128,
 				    256, 512, 1024, -1, -1, -1, -1 };
 	u32 tcm_region;
 	int tcm_size;
 
+	/*
+	 * If there are more than one TCM bank of this type,
+	 * select the TCM bank to operate on in the TCM selection
+	 * register.
+	 */
+	if (banks > 1)
+		asm("mcr	p15, 0, %0, c9, c2, 0"
+		    : /* No output operands */
+		    : "r" (bank));
+
 	/* Read the special TCM region register c9, 0 */
 	if (!type)
 		asm("mrc	p15, 0, %0, c9, c1, 0"
@@ -110,21 +120,23 @@ static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size)
 
 	tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f];
 	if (tcm_size < 0) {
-		pr_err("CPU: %sTCM of unknown size!\n",
-			type ? "I" : "D");
+		pr_err("CPU: %sTCM%d of unknown size!\n",
+		       type ? "I" : "D", bank);
 	} else {
-		pr_info("CPU: found %sTCM %dk @ %08x, %senabled\n",
+		pr_info("CPU: found %sTCM%d %dk @ %08x, %senabled\n",
 			type ? "I" : "D",
+			bank,
 			tcm_size,
 			(tcm_region & 0xfffff000U),
 			(tcm_region & 1) ? "" : "not ");
 	}
 
-	if (tcm_size != expected_size) {
-		pr_crit("CPU: %sTCM was detected %dk but expected %dk!\n",
-		       type ? "I" : "D",
-		       tcm_size,
-		       expected_size);
+	if (tcm_size != (expected_size >> 10)) {
+		pr_crit("CPU: %sTCM%d was detected %dk but expected %dk!\n",
+			type ? "I" : "D",
+			bank,
+			tcm_size,
+			(expected_size >> 10));
 		/* Adjust to the expected size? what can we do... */
 	}
 
@@ -140,26 +152,37 @@ static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size)
 		    : /* No output operands */
 		    : "r" (tcm_region));
 
-	pr_debug("CPU: moved %sTCM %dk to %08x, enabled\n",
-		 type ? "I" : "D",
-		 tcm_size,
-		 (tcm_region & 0xfffff000U));
+	pr_info("CPU: moved %sTCM%d %dk to %08x, enabled\n",
+		type ? "I" : "D",
+		bank,
+		tcm_size,
+		(tcm_region & 0xfffff000U));
 }
 
+/* We expect to find what is configured for the platform */
+#define DTCM_EXPECTED (DTCM_END - DTCM_OFFSET + 1)
+#define ITCM_EXPECTED (ITCM_END - ITCM_OFFSET + 1)
+
 /*
  * This initializes the TCM memory
  */
 void __init tcm_init(void)
 {
 	u32 tcm_status = read_cpuid_tcmstatus();
+	u8 dtcm_banks = (tcm_status >> 16) & 0x03;
+	u32 dtcm_banksize = DTCM_EXPECTED / dtcm_banks;
+	u8 itcm_banks = (tcm_status & 0x03);
+	u32 itcm_banksize = ITCM_EXPECTED / itcm_banks;
 	char *start;
 	char *end;
 	char *ram;
+	int i;
 
 	/* Setup DTCM if present */
-	if (tcm_status & (1 << 16)) {
-		setup_tcm_bank(0, DTCM_OFFSET,
-			       (DTCM_END - DTCM_OFFSET + 1) >> 10);
+	for (i = 0; i < dtcm_banks; i++) {
+		setup_tcm_bank(0, i, dtcm_banks,
+			       DTCM_OFFSET + (i * dtcm_banksize),
+			       dtcm_banksize);
 		request_resource(&iomem_resource, &dtcm_res);
 		iotable_init(dtcm_iomap, 1);
 		/* Copy data from RAM to DTCM */
@@ -171,9 +194,10 @@ void __init tcm_init(void)
 	}
 
 	/* Setup ITCM if present */
-	if (tcm_status & 1) {
-		setup_tcm_bank(1, ITCM_OFFSET,
-			       (ITCM_END - ITCM_OFFSET + 1) >> 10);
+	for (i = 0; i < itcm_banks; i++) {
+		setup_tcm_bank(1, i, itcm_banks,
+			       ITCM_OFFSET + (i * itcm_banksize),
+			       itcm_banksize);
 		request_resource(&iomem_resource, &itcm_res);
 		iotable_init(itcm_iomap, 1);
 		/* Copy code from RAM to ITCM */
@@ -207,7 +231,7 @@ static int __init setup_tcm_pool(void)
 	pr_debug("Setting up TCM memory pool\n");
 
 	/* Add the rest of DTCM to the TCM pool */
-	if (tcm_status & (1 << 16)) {
+	if (tcm_status & (0x03 << 16)) {
 		if (dtcm_pool_start < DTCM_END) {
 			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
 					   DTCM_END - dtcm_pool_start + 1, -1);
@@ -224,7 +248,7 @@ static int __init setup_tcm_pool(void)
 	}
 
 	/* Add the rest of ITCM to the TCM pool */
-	if (tcm_status & 1) {
+	if (tcm_status & 0x03) {
 		if (itcm_pool_start < ITCM_END) {
 			ret = gen_pool_add(tcm_pool, itcm_pool_start,
 					   ITCM_END - itcm_pool_start + 1, -1);
-- 
cgit v0.10.2


From 07d2a5c721c6aa2bd69812a74c8b3b116abf3e56 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 12 Jul 2010 21:52:34 +0100
Subject: ARM: 6224/1: print TCM whereabouts in init message

If TCM is in use, we should display it in the virtual memory
layout along with everything else.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index f6a9994..526af48 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -611,6 +611,14 @@ void __init mem_init(void)
 
 	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
 			"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#ifdef CONFIG_HAVE_TCM
+#ifdef DTCM_OFFSET
+			"    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
+#ifdef ITCM_OFFSET
+			"    ITCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
+#endif
 			"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_MMU
 			"    DMA     : 0x%08lx - 0x%08lx   (%4ld MB)\n"
@@ -627,6 +635,14 @@ void __init mem_init(void)
 
 			MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
 				(PAGE_SIZE)),
+#ifdef CONFIG_HAVE_TCM
+#ifdef DTCM_OFFSET
+			MLK(UL(DTCM_OFFSET), UL(DTCM_END + 1)),
+#endif
+#ifdef ITCM_OFFSET
+			MLK(UL(ITCM_OFFSET), UL(ITCM_END + 1)),
+#endif
+#endif
 			MLK(FIXADDR_START, FIXADDR_TOP),
 #ifdef CONFIG_MMU
 			MLM(CONSISTENT_BASE, CONSISTENT_END),
-- 
cgit v0.10.2


From 1dbd30e9890fd69e50b17edd70ca583546b0fe4e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 12 Jul 2010 21:53:28 +0100
Subject: ARM: 6225/1: make TCM allocation static and common for all archs

This changes the TCM handling so that a fixed area is reserved at
0xfffe0000-0xfffeffff for TCM. This areas is used by XScale but
XScale does not have TCM so the mechanisms are mutually exclusive.

This change is needed to make TCM detection more dynamic while
still being able to compile code into it, and is a must for the
unified ARM goals: the current TCM allocation at different places
in memory for each machine would be a nightmare if you want to
compile a single image for more than one machine with TCM so it
has to be nailed down in one place.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index eb0fae1..771d48d 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -33,7 +33,13 @@ ffff0000	ffff0fff	CPU vector page.
 
 fffe0000	fffeffff	XScale cache flush area.  This is used
 				in proc-xscale.S to flush the whole data
-				cache.  Free for other usage on non-XScale.
+				cache. (XScale does not have TCM.)
+
+fffe8000	fffeffff	DTCM mapping area for platforms with
+				DTCM mounted inside the CPU.
+
+fffe0000	fffe7fff	ITCM mapping area for platforms with
+				ITCM mounted inside the CPU.
 
 fff00000	fffdffff	Fixmap mapping region.  Addresses provided
 				by fix_to_virt() will be located here.
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
index 77fd937..7c15871 100644
--- a/Documentation/arm/tcm.txt
+++ b/Documentation/arm/tcm.txt
@@ -19,8 +19,8 @@ defines a CPUID_TCM register that you can read out from the
 system control coprocessor. Documentation from ARM can be found
 at http://infocenter.arm.com, search for "TCM Status Register"
 to see documents for all CPUs. Reading this register you can
-determine if ITCM (bit 0) and/or DTCM (bit 16) is present in the
-machine.
+determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
+in the machine.
 
 There is further a TCM region register (search for "TCM Region
 Registers" at the ARM site) that can report and modify the location
@@ -35,7 +35,15 @@ The TCM memory can then be remapped to another address again using
 the MMU, but notice that the TCM if often used in situations where
 the MMU is turned off. To avoid confusion the current Linux
 implementation will map the TCM 1 to 1 from physical to virtual
-memory in the location specified by the machine.
+memory in the location specified by the kernel. Currently Linux
+will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
+on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
+
+Newer versions of the region registers also support dividing these
+TCMs in two separate banks, so for example an 8KiB ITCM is divided
+into two 4KiB banks with its own control registers. The idea is to
+be able to lock and hide one of the banks for use by the secure
+world (TrustZone).
 
 TCM is used for a few things:
 
@@ -65,18 +73,18 @@ in <asm/tcm.h>. Using this interface it is possible to:
   memory. Such a heap is great for things like saving
   device state when shutting off device power domains.
 
-A machine that has TCM memory shall select HAVE_TCM in
-arch/arm/Kconfig for itself, and then the
-rest of the functionality will depend on the physical
-location and size of ITCM and DTCM to be defined in
-mach/memory.h for the machine. Code that needs to use
-TCM shall #include <asm/tcm.h> If the TCM is not located
-at the place given in memory.h it will be moved using
-the TCM Region registers.
+A machine that has TCM memory shall select HAVE_TCM from
+arch/arm/Kconfig for itself. Code that needs to use TCM shall
+#include <asm/tcm.h>
 
 Functions to go into itcm can be tagged like this:
 int __tcmfunc foo(int bar);
 
+Since these are marked to become long_calls and you may want
+to have functions called locally inside the TCM without
+wasting space, there is also the __tcmlocalfunc prefix that
+will make the call relative.
+
 Variables to go into dtcm can be tagged like this:
 int __tcmdata foo;
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4312ee5..ab08d97 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -124,6 +124,15 @@
 #endif /* !CONFIG_MMU */
 
 /*
+ * We fix the TCM memories max 32 KiB ITCM resp DTCM at these
+ * locations
+ */
+#ifdef CONFIG_HAVE_TCM
+#define ITCM_OFFSET	UL(0xfffe0000)
+#define DTCM_OFFSET	UL(0xfffe8000)
+#endif
+
+/*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index f2ead32..26685c2 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -18,32 +18,30 @@
 #include <mach/memory.h>
 #include "tcm.h"
 
-/* Scream and warn about misuse */
-#if !defined(ITCM_OFFSET) || !defined(ITCM_END) || \
-    !defined(DTCM_OFFSET) || !defined(DTCM_END)
-#error "TCM support selected but offsets not defined!"
-#endif
-
 static struct gen_pool *tcm_pool;
 
 /* TCM section definitions from the linker */
 extern char __itcm_start, __sitcm_text, __eitcm_text;
 extern char __dtcm_start, __sdtcm_data, __edtcm_data;
 
+/* These will be increased as we run */
+u32 dtcm_end = DTCM_OFFSET;
+u32 itcm_end = ITCM_OFFSET;
+
 /*
  * TCM memory resources
  */
 static struct resource dtcm_res = {
 	.name = "DTCM RAM",
 	.start = DTCM_OFFSET,
-	.end = DTCM_END,
+	.end = DTCM_OFFSET,
 	.flags = IORESOURCE_MEM
 };
 
 static struct resource itcm_res = {
 	.name = "ITCM RAM",
 	.start = ITCM_OFFSET,
-	.end = ITCM_END,
+	.end = ITCM_OFFSET,
 	.flags = IORESOURCE_MEM
 };
 
@@ -51,7 +49,7 @@ static struct map_desc dtcm_iomap[] __initdata = {
 	{
 		.virtual	= DTCM_OFFSET,
 		.pfn		= __phys_to_pfn(DTCM_OFFSET),
-		.length		= (DTCM_END - DTCM_OFFSET + 1),
+		.length		= 0,
 		.type		= MT_MEMORY_DTCM
 	}
 };
@@ -60,7 +58,7 @@ static struct map_desc itcm_iomap[] __initdata = {
 	{
 		.virtual	= ITCM_OFFSET,
 		.pfn		= __phys_to_pfn(ITCM_OFFSET),
-		.length		= (ITCM_END - ITCM_OFFSET + 1),
+		.length		= 0,
 		.type		= MT_MEMORY_ITCM
 	}
 };
@@ -92,8 +90,8 @@ void tcm_free(void *addr, size_t len)
 }
 EXPORT_SYMBOL(tcm_free);
 
-static void __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
-				  u32 offset, u32 expected_size)
+static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
+				  u32 *offset)
 {
 	const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128,
 				    256, 512, 1024, -1, -1, -1, -1 };
@@ -120,8 +118,13 @@ static void __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 
 	tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f];
 	if (tcm_size < 0) {
-		pr_err("CPU: %sTCM%d of unknown size!\n",
+		pr_err("CPU: %sTCM%d of unknown size\n",
 		       type ? "I" : "D", bank);
+		return -EINVAL;
+	} else if (tcm_size > 32) {
+		pr_err("CPU: %sTCM%d larger than 32k found\n",
+		       type ? "I" : "D", bank);
+		return -EINVAL;
 	} else {
 		pr_info("CPU: found %sTCM%d %dk @ %08x, %senabled\n",
 			type ? "I" : "D",
@@ -131,17 +134,8 @@ static void __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 			(tcm_region & 1) ? "" : "not ");
 	}
 
-	if (tcm_size != (expected_size >> 10)) {
-		pr_crit("CPU: %sTCM%d was detected %dk but expected %dk!\n",
-			type ? "I" : "D",
-			bank,
-			tcm_size,
-			(expected_size >> 10));
-		/* Adjust to the expected size? what can we do... */
-	}
-
 	/* Force move the TCM bank to where we want it, enable */
-	tcm_region = offset | (tcm_region & 0x00000ffeU) | 1;
+	tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1;
 
 	if (!type)
 		asm("mcr	p15, 0, %0, c9, c1, 0"
@@ -152,17 +146,17 @@ static void __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 		    : /* No output operands */
 		    : "r" (tcm_region));
 
+	/* Increase offset */
+	*offset += (tcm_size << 10);
+
 	pr_info("CPU: moved %sTCM%d %dk to %08x, enabled\n",
 		type ? "I" : "D",
 		bank,
 		tcm_size,
 		(tcm_region & 0xfffff000U));
+	return 0;
 }
 
-/* We expect to find what is configured for the platform */
-#define DTCM_EXPECTED (DTCM_END - DTCM_OFFSET + 1)
-#define ITCM_EXPECTED (ITCM_END - ITCM_OFFSET + 1)
-
 /*
  * This initializes the TCM memory
  */
@@ -170,40 +164,51 @@ void __init tcm_init(void)
 {
 	u32 tcm_status = read_cpuid_tcmstatus();
 	u8 dtcm_banks = (tcm_status >> 16) & 0x03;
-	u32 dtcm_banksize = DTCM_EXPECTED / dtcm_banks;
 	u8 itcm_banks = (tcm_status & 0x03);
-	u32 itcm_banksize = ITCM_EXPECTED / itcm_banks;
 	char *start;
 	char *end;
 	char *ram;
+	int ret;
 	int i;
 
 	/* Setup DTCM if present */
-	for (i = 0; i < dtcm_banks; i++) {
-		setup_tcm_bank(0, i, dtcm_banks,
-			       DTCM_OFFSET + (i * dtcm_banksize),
-			       dtcm_banksize);
+	if (dtcm_banks > 0) {
+		for (i = 0; i < dtcm_banks; i++) {
+			ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end);
+			if (ret)
+				return;
+		}
+		dtcm_res.end = dtcm_end - 1;
 		request_resource(&iomem_resource, &dtcm_res);
+		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
 		iotable_init(dtcm_iomap, 1);
 		/* Copy data from RAM to DTCM */
 		start = &__sdtcm_data;
 		end   = &__edtcm_data;
 		ram   = &__dtcm_start;
+		/* This means you compiled more code than fits into DTCM */
+		BUG_ON((end - start) > (dtcm_end - DTCM_OFFSET));
 		memcpy(start, ram, (end-start));
 		pr_debug("CPU DTCM: copied data from %p - %p\n", start, end);
 	}
 
 	/* Setup ITCM if present */
-	for (i = 0; i < itcm_banks; i++) {
-		setup_tcm_bank(1, i, itcm_banks,
-			       ITCM_OFFSET + (i * itcm_banksize),
-			       itcm_banksize);
+	if (itcm_banks > 0) {
+		for (i = 0; i < itcm_banks; i++) {
+			ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end);
+			if (ret)
+				return;
+		}
+		itcm_res.end = itcm_end - 1;
 		request_resource(&iomem_resource, &itcm_res);
+		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
 		iotable_init(itcm_iomap, 1);
 		/* Copy code from RAM to ITCM */
 		start = &__sitcm_text;
 		end   = &__eitcm_text;
 		ram   = &__itcm_start;
+		/* This means you compiled more code than fits into ITCM */
+		BUG_ON((end - start) > (itcm_end - ITCM_OFFSET));
 		memcpy(start, ram, (end-start));
 		pr_debug("CPU ITCM: copied code from %p - %p\n", start, end);
 	}
@@ -232,9 +237,9 @@ static int __init setup_tcm_pool(void)
 
 	/* Add the rest of DTCM to the TCM pool */
 	if (tcm_status & (0x03 << 16)) {
-		if (dtcm_pool_start < DTCM_END) {
+		if (dtcm_pool_start < dtcm_end) {
 			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
-					   DTCM_END - dtcm_pool_start + 1, -1);
+					   dtcm_end - dtcm_pool_start, -1);
 			if (ret) {
 				pr_err("CPU DTCM: could not add DTCM " \
 				       "remainder to pool!\n");
@@ -242,16 +247,16 @@ static int __init setup_tcm_pool(void)
 			}
 			pr_debug("CPU DTCM: Added %08x bytes @ %08x to " \
 				 "the TCM memory pool\n",
-				 DTCM_END - dtcm_pool_start + 1,
+				 dtcm_end - dtcm_pool_start,
 				 dtcm_pool_start);
 		}
 	}
 
 	/* Add the rest of ITCM to the TCM pool */
 	if (tcm_status & 0x03) {
-		if (itcm_pool_start < ITCM_END) {
+		if (itcm_pool_start < itcm_end) {
 			ret = gen_pool_add(tcm_pool, itcm_pool_start,
-					   ITCM_END - itcm_pool_start + 1, -1);
+					   itcm_end - itcm_pool_start, -1);
 			if (ret) {
 				pr_err("CPU ITCM: could not add ITCM " \
 				       "remainder to pool!\n");
@@ -259,7 +264,7 @@ static int __init setup_tcm_pool(void)
 			}
 			pr_debug("CPU ITCM: Added %08x bytes @ %08x to " \
 				 "the TCM memory pool\n",
-				 ITCM_END - itcm_pool_start + 1,
+				 itcm_end - itcm_pool_start,
 				 itcm_pool_start);
 		}
 	}
diff --git a/arch/arm/mach-u300/include/mach/memory.h b/arch/arm/mach-u300/include/mach/memory.h
index ab000df..bf134bc 100644
--- a/arch/arm/mach-u300/include/mach/memory.h
+++ b/arch/arm/mach-u300/include/mach/memory.h
@@ -35,14 +35,6 @@
 #endif
 
 /*
- * TCM memory whereabouts
- */
-#define ITCM_OFFSET	0xffff2000
-#define ITCM_END	0xffff3fff
-#define DTCM_OFFSET	0xffff4000
-#define DTCM_END	0xffff5fff
-
-/*
  * We enable a real big DMA buffer if need be.
  */
 #define CONSISTENT_DMA_SIZE SZ_4M
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 526af48..e00404e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -529,6 +529,11 @@ void __init mem_init(void)
 {
 	unsigned long reserved_pages, free_pages;
 	int i, node;
+#ifdef CONFIG_HAVE_TCM
+	/* These pointers are filled in on TCM detection */
+	extern u32 dtcm_end;
+	extern u32 itcm_end;
+#endif
 
 #ifndef CONFIG_DISCONTIGMEM
 	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
@@ -612,13 +617,9 @@ void __init mem_init(void)
 	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
 			"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HAVE_TCM
-#ifdef DTCM_OFFSET
 			"    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#endif
-#ifdef ITCM_OFFSET
 			"    ITCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #endif
-#endif
 			"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_MMU
 			"    DMA     : 0x%08lx - 0x%08lx   (%4ld MB)\n"
@@ -636,12 +637,8 @@ void __init mem_init(void)
 			MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
 				(PAGE_SIZE)),
 #ifdef CONFIG_HAVE_TCM
-#ifdef DTCM_OFFSET
-			MLK(UL(DTCM_OFFSET), UL(DTCM_END + 1)),
-#endif
-#ifdef ITCM_OFFSET
-			MLK(UL(ITCM_OFFSET), UL(ITCM_END + 1)),
-#endif
+			MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
+			MLK(ITCM_OFFSET, (unsigned long) itcm_end),
 #endif
 			MLK(FIXADDR_START, FIXADDR_TOP),
 #ifdef CONFIG_MMU
-- 
cgit v0.10.2


From c58bbd39f876955be6e072748fdfe2b671f9d939 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 8 Jul 2010 10:59:59 +0100
Subject: ARM: 6213/1: atomic64_test: add ARM as supported architecture

ARM has support for the atomic64_dec_if_positive operation
so ensure that it is tested by the atomic64_test routine.

Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 250ed11..44524cc 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -114,7 +114,7 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 
 #if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
-    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H)
+    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
 	INIT(onestwos);
 	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
 	r -= one;
-- 
cgit v0.10.2


From ec489aa8f993f8d2ec962ce113071faac482aa27 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 2 Jun 2010 08:13:52 +0100
Subject: ARM: 6157/2: PL011 TX/RX split of LCR for ST-Ericssons derivative

In the ST-Ericsson version of the PL011 the TX and RX have different
control registers.

Cc: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: Marcin Mielczarczyk <marcin.mielczarczyk@tieto.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index eb4cb48..5644cf2 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -69,9 +69,11 @@
 struct uart_amba_port {
 	struct uart_port	port;
 	struct clk		*clk;
-	unsigned int		im;	/* interrupt mask */
+	unsigned int		im;		/* interrupt mask */
 	unsigned int		old_status;
-	unsigned int		ifls;	/* vendor-specific */
+	unsigned int		ifls;		/* vendor-specific */
+	unsigned int		lcrh_tx;	/* vendor-specific */
+	unsigned int		lcrh_rx;	/* vendor-specific */
 	bool			autorts;
 };
 
@@ -79,16 +81,22 @@ struct uart_amba_port {
 struct vendor_data {
 	unsigned int		ifls;
 	unsigned int		fifosize;
+	unsigned int		lcrh_tx;
+	unsigned int		lcrh_rx;
 };
 
 static struct vendor_data vendor_arm = {
 	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
 	.fifosize		= 16,
+	.lcrh_tx		= UART011_LCRH,
+	.lcrh_rx		= UART011_LCRH,
 };
 
 static struct vendor_data vendor_st = {
 	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
 	.fifosize		= 64,
+	.lcrh_tx		= ST_UART011_LCRH_TX,
+	.lcrh_rx		= ST_UART011_LCRH_RX,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -327,12 +335,12 @@ static void pl011_break_ctl(struct uart_port *port, int break_state)
 	unsigned int lcr_h;
 
 	spin_lock_irqsave(&uap->port.lock, flags);
-	lcr_h = readw(uap->port.membase + UART011_LCRH);
+	lcr_h = readw(uap->port.membase + uap->lcrh_tx);
 	if (break_state == -1)
 		lcr_h |= UART01x_LCRH_BRK;
 	else
 		lcr_h &= ~UART01x_LCRH_BRK;
-	writew(lcr_h, uap->port.membase + UART011_LCRH);
+	writew(lcr_h, uap->port.membase + uap->lcrh_tx);
 	spin_unlock_irqrestore(&uap->port.lock, flags);
 }
 
@@ -393,7 +401,17 @@ static int pl011_startup(struct uart_port *port)
 	writew(cr, uap->port.membase + UART011_CR);
 	writew(0, uap->port.membase + UART011_FBRD);
 	writew(1, uap->port.membase + UART011_IBRD);
-	writew(0, uap->port.membase + UART011_LCRH);
+	writew(0, uap->port.membase + uap->lcrh_rx);
+	if (uap->lcrh_tx != uap->lcrh_rx) {
+		int i;
+		/*
+		 * Wait 10 PCLKs before writing LCRH_TX register,
+		 * to get this delay write read only register 10 times
+		 */
+		for (i = 0; i < 10; ++i)
+			writew(0xff, uap->port.membase + UART011_MIS);
+		writew(0, uap->port.membase + uap->lcrh_tx);
+	}
 	writew(0, uap->port.membase + UART01x_DR);
 	while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
 		barrier();
@@ -422,10 +440,19 @@ static int pl011_startup(struct uart_port *port)
 	return retval;
 }
 
+static void pl011_shutdown_channel(struct uart_amba_port *uap,
+					unsigned int lcrh)
+{
+      unsigned long val;
+
+      val = readw(uap->port.membase + lcrh);
+      val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
+      writew(val, uap->port.membase + lcrh);
+}
+
 static void pl011_shutdown(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
-	unsigned long val;
 
 	/*
 	 * disable all interrupts
@@ -450,9 +477,9 @@ static void pl011_shutdown(struct uart_port *port)
 	/*
 	 * disable break condition and fifos
 	 */
-	val = readw(uap->port.membase + UART011_LCRH);
-	val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
-	writew(val, uap->port.membase + UART011_LCRH);
+	pl011_shutdown_channel(uap, uap->lcrh_rx);
+	if (uap->lcrh_rx != uap->lcrh_tx)
+		pl011_shutdown_channel(uap, uap->lcrh_tx);
 
 	/*
 	 * Shut down the clock producer
@@ -561,7 +588,17 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * NOTE: MUST BE WRITTEN AFTER UARTLCR_M & UARTLCR_L
 	 * ----------^----------^----------^----------^-----
 	 */
-	writew(lcr_h, port->membase + UART011_LCRH);
+	writew(lcr_h, port->membase + uap->lcrh_rx);
+	if (uap->lcrh_rx != uap->lcrh_tx) {
+		int i;
+		/*
+		 * Wait 10 PCLKs before writing LCRH_TX register,
+		 * to get this delay write read only register 10 times
+		 */
+		for (i = 0; i < 10; ++i)
+			writew(0xff, uap->port.membase + UART011_MIS);
+		writew(lcr_h, port->membase + uap->lcrh_tx);
+	}
 	writew(old_cr, port->membase + UART011_CR);
 
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -688,7 +725,7 @@ pl011_console_get_options(struct uart_amba_port *uap, int *baud,
 	if (readw(uap->port.membase + UART011_CR) & UART01x_CR_UARTEN) {
 		unsigned int lcr_h, ibrd, fbrd;
 
-		lcr_h = readw(uap->port.membase + UART011_LCRH);
+		lcr_h = readw(uap->port.membase + uap->lcrh_tx);
 
 		*parity = 'n';
 		if (lcr_h & UART01x_LCRH_PEN) {
@@ -800,6 +837,8 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	}
 
 	uap->ifls = vendor->ifls;
+	uap->lcrh_rx = vendor->lcrh_rx;
+	uap->lcrh_tx = vendor->lcrh_tx;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 5a5a7fd..93c96a6 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -38,10 +38,12 @@
 #define UART01x_FR		0x18	/* Flag register (Read only). */
 #define UART010_IIR		0x1C	/* Interrupt indentification register (Read). */
 #define UART010_ICR		0x1C	/* Interrupt clear register (Write). */
+#define ST_UART011_LCRH_RX	0x1C    /* Rx line control register. */
 #define UART01x_ILPR		0x20	/* IrDA low power counter register. */
 #define UART011_IBRD		0x24	/* Integer baud rate divisor register. */
 #define UART011_FBRD		0x28	/* Fractional baud rate divisor register. */
 #define UART011_LCRH		0x2c	/* Line control register. */
+#define ST_UART011_LCRH_TX	0x2c    /* Tx Line control register. */
 #define UART011_CR		0x30	/* Control register. */
 #define UART011_IFLS		0x34	/* Interrupt fifo level select. */
 #define UART011_IMSC		0x38	/* Interrupt mask. */
-- 
cgit v0.10.2


From ac3e3fb424d44109dda3b1a3459e1b30fa60ac4a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 2 Jun 2010 20:40:22 +0100
Subject: ARM: 6158/2: PL011 baudrate extension for ST-Ericssons derivative

Implementation of the ST-Ericsson baudrate extension in the PL011
block. In this modified variant it is possible to change the
sampling factor from 16 to 8, and thanks to this we can get higher
baudrates while still using the same peripheral clock.

Also replace the simple division to determine the baud divisor
with DIV_ROUND_CLOSEST() rather than a simple integer division.

Cc: Alessandro Rubini <rubini@unipv.it>
Cc: Jerzy Kasenberg <jerzy.kasenberg@tieto.com>
Signed-off-by: Marcin Mielczarczyk <marcin.mielczarczyk@tieto.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 5644cf2..f67e09d 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -74,6 +74,7 @@ struct uart_amba_port {
 	unsigned int		ifls;		/* vendor-specific */
 	unsigned int		lcrh_tx;	/* vendor-specific */
 	unsigned int		lcrh_rx;	/* vendor-specific */
+	bool			oversampling;   /* vendor-specific */
 	bool			autorts;
 };
 
@@ -83,6 +84,7 @@ struct vendor_data {
 	unsigned int		fifosize;
 	unsigned int		lcrh_tx;
 	unsigned int		lcrh_rx;
+	bool			oversampling;
 };
 
 static struct vendor_data vendor_arm = {
@@ -90,6 +92,7 @@ static struct vendor_data vendor_arm = {
 	.fifosize		= 16,
 	.lcrh_tx		= UART011_LCRH,
 	.lcrh_rx		= UART011_LCRH,
+	.oversampling		= false,
 };
 
 static struct vendor_data vendor_st = {
@@ -97,6 +100,7 @@ static struct vendor_data vendor_st = {
 	.fifosize		= 64,
 	.lcrh_tx		= ST_UART011_LCRH_TX,
 	.lcrh_rx		= ST_UART011_LCRH_RX,
+	.oversampling		= true,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -499,8 +503,13 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 	/*
 	 * Ask the core to calculate the divisor for us.
 	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-	quot = port->uartclk * 4 / baud;
+	baud = uart_get_baud_rate(port, termios, old, 0,
+				  port->uartclk/(uap->oversampling ? 8 : 16));
+
+	if (baud > port->uartclk/16)
+		quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud);
+	else
+		quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud);
 
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
@@ -579,6 +588,13 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 		uap->autorts = false;
 	}
 
+	if (uap->oversampling) {
+		if (baud > port->uartclk/16)
+			old_cr |= ST_UART011_CR_OVSFACT;
+		else
+			old_cr &= ~ST_UART011_CR_OVSFACT;
+	}
+
 	/* Set baud rate */
 	writew(quot & 0x3f, port->membase + UART011_FBRD);
 	writew(quot >> 6, port->membase + UART011_IBRD);
@@ -744,6 +760,12 @@ pl011_console_get_options(struct uart_amba_port *uap, int *baud,
 		fbrd = readw(uap->port.membase + UART011_FBRD);
 
 		*baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd);
+
+		if (uap->oversampling) {
+			if (readw(uap->port.membase + UART011_CR)
+				  & ST_UART011_CR_OVSFACT)
+				*baud *= 2;
+		}
 	}
 }
 
@@ -839,6 +861,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	uap->ifls = vendor->ifls;
 	uap->lcrh_rx = vendor->lcrh_rx;
 	uap->lcrh_tx = vendor->lcrh_tx;
+	uap->oversampling = vendor->oversampling;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 93c96a6..e1b634b 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -86,6 +86,7 @@
 #define UART010_CR_TIE 		0x0020
 #define UART010_CR_RIE 		0x0010
 #define UART010_CR_MSIE		0x0008
+#define ST_UART011_CR_OVSFACT	0x0008	/* Oversampling factor */
 #define UART01x_CR_IIRLP	0x0004	/* SIR low power mode */
 #define UART01x_CR_SIREN	0x0002	/* SIR enable */
 #define UART01x_CR_UARTEN	0x0001	/* UART enable */
-- 
cgit v0.10.2


From d746196361c9c635128249bb6cf13e709ae6abe1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 26 Jul 2010 10:29:13 +0100
Subject: ARM: use generic ioremap_page_range()

We don't need our own implementation of this, use the generic
library implementation instead.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 03f1193..ab50627 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -42,78 +42,11 @@
  */
 #define VM_ARM_SECTION_MAPPING	0x80000000
 
-static int remap_area_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
-			  unsigned long phys_addr, const struct mem_type *type)
-{
-	pgprot_t prot = __pgprot(type->prot_pte);
-	pte_t *pte;
-
-	pte = pte_alloc_kernel(pmd, addr);
-	if (!pte)
-		return -ENOMEM;
-
-	do {
-		if (!pte_none(*pte))
-			goto bad;
-
-		set_pte_ext(pte, pfn_pte(phys_addr >> PAGE_SHIFT, prot), 0);
-		phys_addr += PAGE_SIZE;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	return 0;
-
- bad:
-	printk(KERN_CRIT "remap_area_pte: page already exists\n");
-	BUG();
-}
-
-static inline int remap_area_pmd(pgd_t *pgd, unsigned long addr,
-				 unsigned long end, unsigned long phys_addr,
-				 const struct mem_type *type)
-{
-	unsigned long next;
-	pmd_t *pmd;
-	int ret = 0;
-
-	pmd = pmd_alloc(&init_mm, pgd, addr);
-	if (!pmd)
-		return -ENOMEM;
-
-	do {
-		next = pmd_addr_end(addr, end);
-		ret = remap_area_pte(pmd, addr, next, phys_addr, type);
-		if (ret)
-			return ret;
-		phys_addr += next - addr;
-	} while (pmd++, addr = next, addr != end);
-	return ret;
-}
-
-static int remap_area_pages(unsigned long start, unsigned long pfn,
-			    size_t size, const struct mem_type *type)
-{
-	unsigned long addr = start;
-	unsigned long next, end = start + size;
-	unsigned long phys_addr = __pfn_to_phys(pfn);
-	pgd_t *pgd;
-	int err = 0;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = remap_area_pmd(pgd, addr, next, phys_addr, type);
-		if (err)
-			break;
-		phys_addr += next - addr;
-	} while (pgd++, addr = next, addr != end);
-
-	return err;
-}
-
 int ioremap_page(unsigned long virt, unsigned long phys,
 		 const struct mem_type *mtype)
 {
-	return remap_area_pages(virt, __phys_to_pfn(phys), PAGE_SIZE, mtype);
+	return ioremap_page_range(virt, virt + PAGE_SIZE, phys,
+				  __pgprot(mtype->prot_pte));
 }
 EXPORT_SYMBOL(ioremap_page);
 
@@ -300,7 +233,8 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
 		err = remap_area_sections(addr, pfn, size, type);
 	} else
 #endif
-		err = remap_area_pages(addr, pfn, size, type);
+		err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn),
+					 __pgprot(type->prot_pte));
 
 	if (err) {
  		vunmap((void *)addr);
-- 
cgit v0.10.2


From 5bc23d32d86a132b5636a48dca0fa2528ef69ff9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 25 Jul 2010 08:57:02 +0100
Subject: ARM: DMA coherent allocator: align remapped addresses

The DMA coherent remap area is used to provide an uncached mapping
of memory for coherency with DMA engines.  Currently, we look for
any free hole which our allocation will fit in with page alignment.

However, this can lead to fragmentation of the area, and allows small
allocations to cross L1 entry boundaries.  This is undesirable as we
want to move towards allocating sections of memory.

Align allocations according to the size, limiting the alignment between
the page and section sizes.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 9e7742f..c704eed 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -183,6 +183,8 @@ static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
 {
 	struct arm_vmregion *c;
+	size_t align;
+	int bit;
 
 	if (!consistent_pte[0]) {
 		printk(KERN_ERR "%s: not initialised\n", __func__);
@@ -191,9 +193,20 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
 	}
 
 	/*
+	 * Align the virtual region allocation - maximum alignment is
+	 * a section size, minimum is a page size.  This helps reduce
+	 * fragmentation of the DMA space, and also prevents allocations
+	 * smaller than a section from crossing a section boundary.
+	 */
+	bit = fls(size - 1) + 1;
+	if (bit > SECTION_SHIFT)
+		bit = SECTION_SHIFT;
+	align = 1 << bit;
+
+	/*
 	 * Allocate a virtual address in the consistent mapping region.
 	 */
-	c = arm_vmregion_alloc(&consistent_head, size,
+	c = arm_vmregion_alloc(&consistent_head, align, size,
 			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 	if (c) {
 		pte_t *pte;
diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c
index 19e09bdb..935993e 100644
--- a/arch/arm/mm/vmregion.c
+++ b/arch/arm/mm/vmregion.c
@@ -35,7 +35,8 @@
  */
 
 struct arm_vmregion *
-arm_vmregion_alloc(struct arm_vmregion_head *head, size_t size, gfp_t gfp)
+arm_vmregion_alloc(struct arm_vmregion_head *head, size_t align,
+		   size_t size, gfp_t gfp)
 {
 	unsigned long addr = head->vm_start, end = head->vm_end - size;
 	unsigned long flags;
@@ -58,7 +59,7 @@ arm_vmregion_alloc(struct arm_vmregion_head *head, size_t size, gfp_t gfp)
 			goto nospc;
 		if ((addr + size) <= c->vm_start)
 			goto found;
-		addr = c->vm_end;
+		addr = ALIGN(c->vm_end, align);
 		if (addr > end)
 			goto nospc;
 	}
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
index 6b2cdbd..15e9f04 100644
--- a/arch/arm/mm/vmregion.h
+++ b/arch/arm/mm/vmregion.h
@@ -21,7 +21,7 @@ struct arm_vmregion {
 	int			vm_active;
 };
 
-struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, gfp_t);
+struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, size_t, gfp_t);
 struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *, unsigned long);
 struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *, unsigned long);
 void arm_vmregion_free(struct arm_vmregion_head *, struct arm_vmregion *);
-- 
cgit v0.10.2


From c1ba6ba3dd4c127dd1b14125ac7feed43d974436 Mon Sep 17 00:00:00 2001
From: eric miao <eric.y.miao@gmail.com>
Date: Thu, 22 Jul 2010 09:55:53 +0100
Subject: ARM: 6251/1: Make SPARSE_IRQ a hidden option

SPARSE_IRQ doesn't need to be a visible option, only those platforms
supporting that will select it.

Signed-off-by: Eric Miao <eric.miao@canonical.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8ef1e23..88b4f38 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1287,8 +1287,7 @@ config HW_PERF_EVENTS
 	  disabled, perf events will use software events only.
 
 config SPARSE_IRQ
-	bool "Support sparse irq numbering"
-	depends on EXPERIMENTAL
+	def_bool n
 	help
 	  This enables support for sparse irqs. This is useful in general
 	  as most CPUs have a fairly sparse array of IRQ vectors, which
@@ -1296,8 +1295,6 @@ config SPARSE_IRQ
 	  number of off-chip IRQs will want to treat this as
 	  experimental until they have been independently verified.
 
-	  If you don't know what to do here, say N.
-
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
-- 
cgit v0.10.2


From 3dc91aff9c3ef54b15cdaf32f61f973489fe69eb Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Thu, 22 Jul 2010 13:16:49 +0100
Subject: ARM: 6252/1: Use SIGBUS for unaligned access instead of SIGILL

POSIX specify to use signal SIGBUS with code BUS_ADRALN for invalid
address alignment.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6f98c35..53a6096 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -924,8 +924,8 @@ static int __init alignment_init(void)
 		ai_usermode = UM_FIXUP;
 	}
 
-	hook_fault_code(1, do_alignment, SIGILL, "alignment exception");
-	hook_fault_code(3, do_alignment, SIGILL, "alignment exception");
+	hook_fault_code(1, do_alignment, SIGBUS, "alignment exception");
+	hook_fault_code(3, do_alignment, SIGBUS, "alignment exception");
 
 	return 0;
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index cbfb2ed..ce6f3a4 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -463,9 +463,9 @@ static struct fsr_info {
 	 * defines these to be "precise" aborts.
 	 */
 	{ do_bad,		SIGSEGV, 0,		"vector exception"		   },
-	{ do_bad,		SIGILL,	 BUS_ADRALN,	"alignment exception"		   },
+	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
 	{ do_bad,		SIGKILL, 0,		"terminal exception"		   },
-	{ do_bad,		SIGILL,	 BUS_ADRALN,	"alignment exception"		   },
+	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
 /* Do we need runtime check ? */
 #if __LINUX_ARM_ARCH__ < 6
 	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
-- 
cgit v0.10.2


From 6338a6aa7c082f11d55712251e14178c68bf5869 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Thu, 22 Jul 2010 13:18:19 +0100
Subject: ARM: 6269/1: Add 'code' parameter for hook_fault_code()

Add one more parameter to hook_fault_code() to be able to set 'code'
field of struct fsr_info.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 5f4f480..8ba1ccf 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -83,7 +83,7 @@ void arm_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info,
 
 void hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
 				       struct pt_regs *),
-		     int sig, const char *name);
+		     int sig, int code, const char *name);
 
 #define xchg(ptr,x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 9cef059..6467d99 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -505,10 +505,10 @@ void __init pci_v3_preinit(void)
 	/*
 	 * Hook in our fault handler for PCI errors
 	 */
-	hook_fault_code(4, v3_pci_fault, SIGBUS, "external abort on linefetch");
-	hook_fault_code(6, v3_pci_fault, SIGBUS, "external abort on linefetch");
-	hook_fault_code(8, v3_pci_fault, SIGBUS, "external abort on non-linefetch");
-	hook_fault_code(10, v3_pci_fault, SIGBUS, "external abort on non-linefetch");
+	hook_fault_code(4, v3_pci_fault, SIGBUS, 0, "external abort on linefetch");
+	hook_fault_code(6, v3_pci_fault, SIGBUS, 0, "external abort on linefetch");
+	hook_fault_code(8, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
+	hook_fault_code(10, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
 
 	spin_lock_irqsave(&v3_lock, flags);
 
diff --git a/arch/arm/mach-iop13xx/pci.c b/arch/arm/mach-iop13xx/pci.c
index 6d5a908..773ea0c 100644
--- a/arch/arm/mach-iop13xx/pci.c
+++ b/arch/arm/mach-iop13xx/pci.c
@@ -987,7 +987,7 @@ void __init iop13xx_pci_init(void)
 		iop13xx_atux_setup();
 	}
 
-	hook_fault_code(16+6, iop13xx_pci_abort, SIGBUS,
+	hook_fault_code(16+6, iop13xx_pci_abort, SIGBUS, 0,
 			"imprecise external abort");
 }
 
diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c
index 90771ca..f797c5f 100644
--- a/arch/arm/mach-ixp2000/pci.c
+++ b/arch/arm/mach-ixp2000/pci.c
@@ -209,7 +209,7 @@ ixp2000_pci_preinit(void)
 			"the needed workaround has not been configured in");
 #endif
 
-	hook_fault_code(16+6, ixp2000_pci_abort_handler, SIGBUS,
+	hook_fault_code(16+6, ixp2000_pci_abort_handler, SIGBUS, 0,
 				"PCI config cycle to non-existent device");
 }
 
diff --git a/arch/arm/mach-ixp23xx/pci.c b/arch/arm/mach-ixp23xx/pci.c
index 4b0e598..563819a 100644
--- a/arch/arm/mach-ixp23xx/pci.c
+++ b/arch/arm/mach-ixp23xx/pci.c
@@ -229,7 +229,7 @@ void __init ixp23xx_pci_preinit(void)
 {
 	ixp23xx_pci_common_init();
 
-	hook_fault_code(16+6, ixp23xx_pci_abort_handler, SIGBUS,
+	hook_fault_code(16+6, ixp23xx_pci_abort_handler, SIGBUS, 0,
 			"PCI config cycle to non-existent device");
 
 	*IXP23XX_PCI_ADDR_EXT = 0x0000e000;
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index e318153..f4fbb5e 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -382,7 +382,8 @@ void __init ixp4xx_pci_preinit(void)
 
 
 	/* hook in our fault handler for PCI errors */
-	hook_fault_code(16+6, abort_handler, SIGBUS, "imprecise external abort");
+	hook_fault_code(16+6, abort_handler, SIGBUS, 0,
+			"imprecise external abort");
 
 	pr_debug("setup PCI-AHB(inbound) and AHB-PCI(outbound) address mappings\n");
 
diff --git a/arch/arm/mach-ks8695/pci.c b/arch/arm/mach-ks8695/pci.c
index 7849966..5fcd082 100644
--- a/arch/arm/mach-ks8695/pci.c
+++ b/arch/arm/mach-ks8695/pci.c
@@ -268,8 +268,8 @@ static void __init ks8695_pci_preinit(void)
 	__raw_writel(0, KS8695_PCI_VA + KS8695_PIOBAC);
 
 	/* hook in fault handlers */
-	hook_fault_code(8, ks8695_pci_fault, SIGBUS, "external abort on non-linefetch");
-	hook_fault_code(10, ks8695_pci_fault, SIGBUS, "external abort on non-linefetch");
+	hook_fault_code(8, ks8695_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
+	hook_fault_code(10, ks8695_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
 }
 
 static void ks8695_show_pciregs(void)
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 53a6096..77cfdbe 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -924,8 +924,10 @@ static int __init alignment_init(void)
 		ai_usermode = UM_FIXUP;
 	}
 
-	hook_fault_code(1, do_alignment, SIGBUS, "alignment exception");
-	hook_fault_code(3, do_alignment, SIGBUS, "alignment exception");
+	hook_fault_code(1, do_alignment, SIGBUS, BUS_ADRALN,
+			"alignment exception");
+	hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN,
+			"alignment exception");
 
 	return 0;
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index ce6f3a4..84131c8 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -508,13 +508,15 @@ static struct fsr_info {
 
 void __init
 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
-		int sig, const char *name)
+		int sig, int code, const char *name)
 {
-	if (nr >= 0 && nr < ARRAY_SIZE(fsr_info)) {
-		fsr_info[nr].fn   = fn;
-		fsr_info[nr].sig  = sig;
-		fsr_info[nr].name = name;
-	}
+	if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
+		BUG();
+
+	fsr_info[nr].fn   = fn;
+	fsr_info[nr].sig  = sig;
+	fsr_info[nr].code = code;
+	fsr_info[nr].name = name;
 }
 
 /*
diff --git a/arch/arm/plat-iop/pci.c b/arch/arm/plat-iop/pci.c
index ce31f31..43f2b15 100644
--- a/arch/arm/plat-iop/pci.c
+++ b/arch/arm/plat-iop/pci.c
@@ -359,7 +359,7 @@ static void __init iop3xx_atu_debug(void)
 	DBG("ATU: IOP3XX_ATUCMD=0x%04x\n", *IOP3XX_ATUCMD);
 	DBG("ATU: IOP3XX_ATUCR=0x%08x\n", *IOP3XX_ATUCR);
 
-	hook_fault_code(16+6, iop3xx_pci_abort, SIGBUS, "imprecise external abort");
+	hook_fault_code(16+6, iop3xx_pci_abort, SIGBUS, 0, "imprecise external abort");
 }
 
 /* for platforms that might be host-bus-adapters */
-- 
cgit v0.10.2


From 33a9c41bf5d8adae9d882513e617c4c645195e71 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Thu, 22 Jul 2010 13:20:22 +0100
Subject: ARM: 6255/1: Workaround infinity loop in handling of translation
 faults

On ARM one Linux PGD entry contains two hardware entries (see page
tables layout in pgtable.h). We normally guarantee that we always
fill both L1 entries. But create_mapping() doesn't follow the rule.
It can create inidividual L1 entries, so here we have to call
pmd_none() check in do_translation_fault() for the entry really
corresponded to address, not for the first of pair.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 84131c8..564b1c4 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -413,7 +413,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
 	pmd_k = pmd_offset(pgd_k, addr);
 	pmd   = pmd_offset(pgd, addr);
 
-	if (pmd_none(*pmd_k))
+	/*
+	 * On ARM one Linux PGD entry contains two hardware entries (see page
+	 * tables layout in pgtable.h). We normally guarantee that we always
+	 * fill both L1 entries. But create_mapping() doesn't follow the rule.
+	 * It can create inidividual L1 entries, so here we have to call
+	 * pmd_none() check for the entry really corresponded to address, not
+	 * for the first of pair.
+	 */
+	index = (addr >> SECTION_SHIFT) & 1;
+	if (pmd_none(pmd_k[index]))
 		goto bad_area;
 
 	copy_pmd(pmd, pmd_k);
-- 
cgit v0.10.2


From 993bf4ec8c2a2b7979389ab196bf2fe217117158 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Thu, 22 Jul 2010 13:23:25 +0100
Subject: ARM: 6256/1: Check arch version and modify fsr_info[] depends on it
 at runtime

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 564b1c4..5835e63 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -475,12 +475,7 @@ static struct fsr_info {
 	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
 	{ do_bad,		SIGKILL, 0,		"terminal exception"		   },
 	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
-/* Do we need runtime check ? */
-#if __LINUX_ARM_ARCH__ < 6
 	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
-#else
-	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"I-cache maintenance fault"	   },
-#endif
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"section translation fault"	   },
 	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"page translation fault"	   },
@@ -605,3 +600,14 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 	arm_notify_die("", regs, &info, ifsr, 0);
 }
 
+static int __init exceptions_init(void)
+{
+	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
+		hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
+				"I-cache maintenance fault");
+	}
+
+	return 0;
+}
+
+arch_initcall(exceptions_init);
-- 
cgit v0.10.2


From b8ab5397bcbd92e3fd4a9770e0bf59315fa38dab Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Mon, 26 Jul 2010 11:20:41 +0100
Subject: ARM: 6268/1: ARMv6K and ARMv7 use fault statuses 3 and 6 as Access
 Flag fault

Statuses 3 (0b00011) and 6 (0x00110) of DFSR are Access Flags faults on
ARMv6K and ARMv7. Let's patch fsr_info[] at runtime if we are on ARMv7
or later.

Unfortunately, we don't have runtime check for 'K' extension, so we
can't check for it.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 77cfdbe..d073b64 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -926,8 +926,18 @@ static int __init alignment_init(void)
 
 	hook_fault_code(1, do_alignment, SIGBUS, BUS_ADRALN,
 			"alignment exception");
-	hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN,
-			"alignment exception");
+
+	/*
+	 * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section
+	 * fault, not as alignment error.
+	 *
+	 * TODO: handle ARMv6K properly. Runtime check for 'K' extension is
+	 * needed.
+	 */
+	if (cpu_architecture() <= CPU_ARCH_ARMv6) {
+		hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN,
+				"alignment exception");
+	}
 
 	return 0;
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5835e63..23b0b03 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -607,6 +607,17 @@ static int __init exceptions_init(void)
 				"I-cache maintenance fault");
 	}
 
+	if (cpu_architecture() >= CPU_ARCH_ARMv7) {
+		/*
+		 * TODO: Access flag faults introduced in ARMv6K.
+		 * Runtime check for 'K' extension is needed
+		 */
+		hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
+				"section access flag fault");
+		hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
+				"section access flag fault");
+	}
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 9ca03a21e320a6bf44559323527aba704bcc8772 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 26 Jul 2010 12:22:12 +0100
Subject: ARM: Factor out common code from cpu_proc_fin()

All implementations of cpu_proc_fin() start by disabling interrupts
and then flush caches.  Rather than have every processors proc_fin()
implementation do this, move it out into generic code - and move the
cache flush past setup_mm_for_reboot() (so it can benefit from having
caches still enabled.)

This allows cpu_proc_fin() to become independent of the L1/L2 cache
types, and eventually move the L2 cache flushing into the L2 support
code.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 598ca61..3b4872c 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -74,7 +74,11 @@ void machine_kexec(struct kimage *image)
 			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
-	cpu_proc_fin();
+	local_irq_disable();
+	local_fiq_disable();
 	setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/
+	flush_cache_all();
+	cpu_proc_fin();
+	flush_cache_all();
 	cpu_reset(reboot_code_buffer_phys);
 }
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index a4a9cc8..aaf5115 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -29,6 +29,7 @@
 #include <linux/utsname.h>
 #include <linux/uaccess.h>
 
+#include <asm/cacheflush.h>
 #include <asm/leds.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -84,10 +85,9 @@ __setup("hlt", hlt_setup);
 
 void arm_machine_restart(char mode, const char *cmd)
 {
-	/*
-	 * Clean and disable cache, and turn off interrupts
-	 */
-	cpu_proc_fin();
+	/* Disable interrupts first */
+	local_irq_disable();
+	local_fiq_disable();
 
 	/*
 	 * Tell the mm system that we are going to reboot -
@@ -96,6 +96,15 @@ void arm_machine_restart(char mode, const char *cmd)
 	 */
 	setup_mm_for_reboot(mode);
 
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn off caching */
+	cpu_proc_fin();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+
 	/*
 	 * Now call the architecture specific reboot code.
 	 */
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 72507c6..203a4e9 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -79,15 +79,11 @@ ENTRY(cpu_arm1020_proc_init)
  * cpu_arm1020_proc_fin()
  */
 ENTRY(cpu_arm1020_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1020_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1020_reset(loc)
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index d278298..1a511e7 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -79,15 +79,11 @@ ENTRY(cpu_arm1020e_proc_init)
  * cpu_arm1020e_proc_fin()
  */
 ENTRY(cpu_arm1020e_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1020e_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1020e_reset(loc)
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index ce13e4a..1ffa4eb 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -68,15 +68,11 @@ ENTRY(cpu_arm1022_proc_init)
  * cpu_arm1022_proc_fin()
  */
 ENTRY(cpu_arm1022_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1022_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1022_reset(loc)
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 636672a..5697c34 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -68,15 +68,11 @@ ENTRY(cpu_arm1026_proc_init)
  * cpu_arm1026_proc_fin()
  */
 ENTRY(cpu_arm1026_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1026_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1026_reset(loc)
diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index 795dc61..64e0b32 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -184,8 +184,6 @@ ENTRY(cpu_arm7_proc_init)
 
 ENTRY(cpu_arm6_proc_fin)
 ENTRY(cpu_arm7_proc_fin)
-		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, r0
 		mov	r0, #0x31			@ ....S..DP...M
 		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
 		mov	pc, lr
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 0b62de2..9d96824 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -54,15 +54,11 @@ ENTRY(cpu_arm720_proc_init)
 		mov	pc, lr
 
 ENTRY(cpu_arm720_proc_fin)
-		stmfd	sp!, {lr}
-		mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, ip
 		mrc	p15, 0, r0, c1, c0, 0
 		bic	r0, r0, #0x1000			@ ...i............
 		bic	r0, r0, #0x000e			@ ............wca.
 		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-		mcr	p15, 0, r1, c7, c7, 0		@ invalidate cache
-		ldmfd	sp!, {pc}
+		mov	pc, lr
 
 /*
  * Function: arm720_proc_do_idle(void)
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 01860cd..6c1a9ab 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -36,15 +36,11 @@ ENTRY(cpu_arm740_switch_mm)
  * cpu_arm740_proc_fin()
  */
 ENTRY(cpu_arm740_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
 	mrc	p15, 0, r0, c1, c0, 0
 	bic	r0, r0, #0x3f000000		@ bank/f/lock/s
 	bic	r0, r0, #0x0000000c		@ w-buffer/cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mcr	p15, 0, r0, c7, c0, 0		@ invalidate cache
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm740_reset(loc)
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 1201b98..6a850db 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -36,8 +36,6 @@ ENTRY(cpu_arm7tdmi_switch_mm)
  * cpu_arm7tdmi_proc_fin()
  */
 ENTRY(cpu_arm7tdmi_proc_fin)
-		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, r0
 		mov	pc, lr
 
 /*
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 8be8199..86f80aa 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -69,19 +69,11 @@ ENTRY(cpu_arm920_proc_init)
  * cpu_arm920_proc_fin()
  */
 ENTRY(cpu_arm920_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bl	arm920_flush_kern_cache_all
-#else
-	bl	v4wt_flush_kern_cache_all
-#endif
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm920_reset(loc)
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index c0ff8e4..f76ce9b 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -71,19 +71,11 @@ ENTRY(cpu_arm922_proc_init)
  * cpu_arm922_proc_fin()
  */
 ENTRY(cpu_arm922_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bl	arm922_flush_kern_cache_all
-#else
-	bl	v4wt_flush_kern_cache_all
-#endif
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm922_reset(loc)
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 3c6cffe..657bd3f 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -92,15 +92,11 @@ ENTRY(cpu_arm925_proc_init)
  * cpu_arm925_proc_fin()
  */
 ENTRY(cpu_arm925_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm925_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm925_reset(loc)
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 75b707c..73f1f3c 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -61,15 +61,11 @@ ENTRY(cpu_arm926_proc_init)
  * cpu_arm926_proc_fin()
  */
 ENTRY(cpu_arm926_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm926_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm926_reset(loc)
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1af1657..fffb061 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -37,15 +37,11 @@ ENTRY(cpu_arm940_switch_mm)
  * cpu_arm940_proc_fin()
  */
 ENTRY(cpu_arm940_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm940_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x00001000		@ i-cache
 	bic	r0, r0, #0x00000004		@ d-cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm940_reset(loc)
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 1664b6a..249a605 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -44,15 +44,11 @@ ENTRY(cpu_arm946_switch_mm)
  * cpu_arm946_proc_fin()
  */
 ENTRY(cpu_arm946_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm946_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x00001000		@ i-cache
 	bic	r0, r0, #0x00000004		@ d-cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm946_reset(loc)
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 28545c2..db47566 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -36,8 +36,6 @@ ENTRY(cpu_arm9tdmi_switch_mm)
  * cpu_arm9tdmi_proc_fin()
  */
 ENTRY(cpu_arm9tdmi_proc_fin)
-		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, r0
 		mov	pc, lr
 
 /*
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 08f5ac2..7803fdf 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -39,17 +39,13 @@ ENTRY(cpu_fa526_proc_init)
  * cpu_fa526_proc_fin()
  */
 ENTRY(cpu_fa526_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	fa_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
 	nop
 	nop
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_fa526_reset(loc)
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 53e6323..b304d01 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -75,11 +75,6 @@ ENTRY(cpu_feroceon_proc_init)
  * cpu_feroceon_proc_fin()
  */
 ENTRY(cpu_feroceon_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	feroceon_flush_kern_cache_all
-
 #if defined(CONFIG_CACHE_FEROCEON_L2) && \
 	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
 	mov	r0, #0
@@ -91,7 +86,7 @@ ENTRY(cpu_feroceon_proc_fin)
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_feroceon_reset(loc)
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index caa3115..5f6892f 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -51,15 +51,11 @@ ENTRY(cpu_mohawk_proc_init)
  * cpu_mohawk_proc_fin()
  */
 ENTRY(cpu_mohawk_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	mohawk_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1800			@ ...iz...........
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_mohawk_reset(loc)
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 7b706b3..a201eb0 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -44,17 +44,13 @@ ENTRY(cpu_sa110_proc_init)
  * cpu_sa110_proc_fin()
  */
 ENTRY(cpu_sa110_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	v4wb_flush_kern_cache_all	@ clean caches
-1:	mov	r0, #0
+	mov	r0, #0
 	mcr	p15, 0, r0, c15, c2, 2		@ Disable clock switching
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_sa110_reset(loc)
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 5c47760..7ddc480 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -55,16 +55,12 @@ ENTRY(cpu_sa1100_proc_init)
  *  - Clean and turn off caches.
  */
 ENTRY(cpu_sa1100_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	v4wb_flush_kern_cache_all
 	mcr	p15, 0, ip, c15, c2, 2		@ Disable clock switching
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_sa1100_reset(loc)
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 2f5a3c2..22aac85 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -42,14 +42,11 @@ ENTRY(cpu_v6_proc_init)
 	mov	pc, lr
 
 ENTRY(cpu_v6_proc_fin)
-	stmfd	sp!, {lr}
-	cpsid	if				@ disable interrupts
-	bl	v6_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  *	cpu_v6_reset(loc)
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 8071bcd..6a8506d 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -45,14 +45,11 @@ ENTRY(cpu_v7_proc_init)
 ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
-	stmfd	sp!, {lr}
-	cpsid	if				@ disable interrupts
-	bl	v7_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 ENDPROC(cpu_v7_proc_fin)
 
 /*
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index e5797f1..361a51e 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -90,15 +90,11 @@ ENTRY(cpu_xsc3_proc_init)
  * cpu_xsc3_proc_fin()
  */
 ENTRY(cpu_xsc3_proc_fin)
-	str	lr, [sp, #-4]!
-	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r0
-	bl	xsc3_flush_kern_cache_all	@ clean caches
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1800			@ ...IZ...........
 	bic	r0, r0, #0x0006			@ .............CA.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldr	pc, [sp], #4
+	mov	pc, lr
 
 /*
  * cpu_xsc3_reset(loc)
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 63037e2..1407597 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -124,15 +124,11 @@ ENTRY(cpu_xscale_proc_init)
  * cpu_xscale_proc_fin()
  */
 ENTRY(cpu_xscale_proc_fin)
-	str	lr, [sp, #-4]!
-	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r0
-	bl	xscale_flush_kern_cache_all	@ clean caches
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1800			@ ...IZ...........
 	bic	r0, r0, #0x0006			@ .............CA.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldr	pc, [sp], #4
+	mov	pc, lr
 
 /*
  * cpu_xscale_reset(loc)
-- 
cgit v0.10.2


From 5388a6b266e9c3357353332ba0cd5549082887f1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 26 Jul 2010 13:19:43 +0100
Subject: ARM: SMP: Always enable clock event broadcast support

The TWD local timers are unable to wake up the CPU when it is placed
into a low power mode, eg. C3.  Therefore, we need to adapt things
such that the TWD code can cope with this.

We do this by always providing a broadcast tick function, and marking
the fact that the TWD local timer will stop in low power modes.  This
means that when the CPU is placed into a low power mode, the core
timer code marks this fact, and allows an IPI to be given to the core.

Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 88b4f38..157b08a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -56,7 +56,7 @@ config GENERIC_CLOCKEVENTS
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
 	depends on GENERIC_CLOCKEVENTS
-	default y if SMP && !LOCAL_TIMERS
+	default y if SMP
 
 config HAVE_TCM
 	bool
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index b8c3d0f..0170e24 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -429,7 +429,11 @@ static void smp_timer_broadcast(const struct cpumask *mask)
 {
 	send_ipi_message(mask, IPI_TIMER);
 }
+#else
+#define smp_timer_broadcast	NULL
+#endif
 
+#ifndef CONFIG_LOCAL_TIMERS
 static void broadcast_timer_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
 {
@@ -444,7 +448,6 @@ static void local_timer_setup(struct clock_event_device *evt)
 	evt->rating	= 400;
 	evt->mult	= 1;
 	evt->set_mode	= broadcast_timer_set_mode;
-	evt->broadcast	= smp_timer_broadcast;
 
 	clockevents_register_device(evt);
 }
@@ -456,6 +459,7 @@ void __cpuinit percpu_timer_setup(void)
 	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
 
 	evt->cpumask = cpumask_of(cpu);
+	evt->broadcast = smp_timer_broadcast;
 
 	local_timer_setup(evt);
 }
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 7c5f0c0..35882fb 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -132,7 +132,8 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	twd_calibrate_rate();
 
 	clk->name = "local_timer";
-	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+			CLOCK_EVT_FEAT_C3STOP;
 	clk->rating = 350;
 	clk->set_mode = twd_set_mode;
 	clk->set_next_event = twd_set_next_event;
-- 
cgit v0.10.2


From 3d3f78d752bfada5b6041f2f7bd0833d8bdf7a4a Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 26 Jul 2010 13:31:27 +0100
Subject: ARM: call machine_shutdown() from machine_halt(), etc

x86 calls machine_shutdown() from the various machine_*() calls which
take the machine down ready for halting, restarting, etc, and uses
this to bring the system safely to a point where those actions can be
performed.  Such actions are stopping the secondary CPUs.

So, change the ARM implementation of these to reflect what x86 does.

This solves kexec problems on ARM SMP platforms, where the secondary
CPUs were left running across the kexec call.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 3b4872c..df5958f 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -37,10 +37,6 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
-void machine_shutdown(void)
-{
-}
-
 void machine_crash_shutdown(struct pt_regs *regs)
 {
 }
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index aaf5115..2e2ec97 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -198,19 +198,29 @@ int __init reboot_setup(char *str)
 
 __setup("reboot=", reboot_setup);
 
-void machine_halt(void)
+void machine_shutdown(void)
 {
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
 }
 
+void machine_halt(void)
+{
+	machine_shutdown();
+	while (1);
+}
 
 void machine_power_off(void)
 {
+	machine_shutdown();
 	if (pm_power_off)
 		pm_power_off();
 }
 
 void machine_restart(char *cmd)
 {
+	machine_shutdown();
 	arm_pm_restart(reboot_mode, cmd);
 }
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 0170e24..40dc74f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -471,10 +471,13 @@ static DEFINE_SPINLOCK(stop_lock);
  */
 static void ipi_cpu_stop(unsigned int cpu)
 {
-	spin_lock(&stop_lock);
-	printk(KERN_CRIT "CPU%u: stopping\n", cpu);
-	dump_stack();
-	spin_unlock(&stop_lock);
+	if (system_state == SYSTEM_BOOTING ||
+	    system_state == SYSTEM_RUNNING) {
+		spin_lock(&stop_lock);
+		printk(KERN_CRIT "CPU%u: stopping\n", cpu);
+		dump_stack();
+		spin_unlock(&stop_lock);
+	}
 
 	set_cpu_online(cpu, false);
 
-- 
cgit v0.10.2


From 2c39c9e149f45ec15a6985cb06ec8f6d904bb35e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 27 Jul 2010 08:50:16 +0100
Subject: ARM: Fix section build warnings for AMBA drivers

Found in the Versatile build:

WARNING: drivers/built-in.o(.data+0x14c): Section mismatch in reference from the variable pl061_gpio_driver to the (unknown reference) .init.data:(unknown)
The variable pl061_gpio_driver references
the (unknown reference) __initdata (unknown)

WARNING: drivers/built-in.o(.data+0x40f8): Section mismatch in reference from the variable pl011_driver to the (unknown reference) .init.data:(unknown)
The variable pl011_driver references
the (unknown reference) __initdata (unknown)

WARNING: drivers/built-in.o(.data+0x5ab4): Section mismatch in reference from the variable pl031_driver to the (unknown reference) .init.data:(unknown)
The variable pl031_driver references
the (unknown reference) __initdata (unknown)

Basically, amba_id structures must not be __initdata.  Also fix:

WARNING: drivers/built-in.o(.data+0x138): Section mismatch in reference from the variable pl061_gpio_driver to the function .init.text:pl061_probe()
The variable pl061_gpio_driver references
the function __init pl061_probe()

which is an incorrectly annotated probe function.  Fix it to reflect
the other AMBA bus probe functions by removing the __init attributation.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c
index ee568c8..5005990 100644
--- a/drivers/gpio/pl061.c
+++ b/drivers/gpio/pl061.c
@@ -232,7 +232,7 @@ static void pl061_irq_handler(unsigned irq, struct irq_desc *desc)
 	desc->chip->unmask(irq);
 }
 
-static int __init pl061_probe(struct amba_device *dev, struct amba_id *id)
+static int pl061_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct pl061_platform_data *pdata;
 	struct pl061_gpio *chip;
@@ -333,7 +333,7 @@ free_mem:
 	return ret;
 }
 
-static struct amba_id pl061_ids[] __initdata = {
+static struct amba_id pl061_ids[] = {
 	{
 		.id	= 0x00041061,
 		.mask	= 0x000fffff,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 3587d99..71bbefc 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -456,7 +456,7 @@ static struct rtc_class_ops stv2_pl031_ops = {
 	.irq_set_freq = pl031_irq_set_freq,
 };
 
-static struct amba_id pl031_ids[] __initdata = {
+static struct amba_id pl031_ids[] = {
 	{
 		.id = 0x00041031,
 		.mask = 0x000fffff,
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index b09a638..50441ff 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -782,7 +782,7 @@ static int pl010_resume(struct amba_device *dev)
 	return 0;
 }
 
-static struct amba_id pl010_ids[] __initdata = {
+static struct amba_id pl010_ids[] = {
 	{
 		.id	= 0x00041010,
 		.mask	= 0x000fffff,
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index f67e09d..6ca7a44 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -930,7 +930,7 @@ static int pl011_resume(struct amba_device *dev)
 }
 #endif
 
-static struct amba_id pl011_ids[] __initdata = {
+static struct amba_id pl011_ids[] = {
 	{
 		.id	= 0x00041011,
 		.mask	= 0x000fffff,
-- 
cgit v0.10.2


From 4ce1d6cbf07271ab8f7cc47c3e27edeac08b58a7 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:44:58 +0100
Subject: ARM: 6237/1: mmci: use sg_miter API to fix multi-page sg handling

The mmci driver's SG list iteration logic assumes that each SG entry
spans only one page, and only maps and flushes one page of the sg.  This
is not a valid assumption.  Fix it by converting the driver to the
sg_miter API, which correctly handles sgs which span multiple pages.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 4917af9..d63d756 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -26,7 +26,6 @@
 #include <linux/amba/mmci.h>
 #include <linux/regulator/consumer.h>
 
-#include <asm/cacheflush.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
@@ -98,6 +97,18 @@ static void mmci_stop_data(struct mmci_host *host)
 	host->data = NULL;
 }
 
+static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
+{
+	unsigned int flags = SG_MITER_ATOMIC;
+
+	if (data->flags & MMC_DATA_READ)
+		flags |= SG_MITER_TO_SG;
+	else
+		flags |= SG_MITER_FROM_SG;
+
+	sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
+}
+
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 {
 	unsigned int datactrl, timeout, irqmask;
@@ -210,8 +221,17 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * We hit an error condition.  Ensure that any data
 		 * partially written to a page is properly coherent.
 		 */
-		if (host->sg_len && data->flags & MMC_DATA_READ)
-			flush_dcache_page(sg_page(host->sg_ptr));
+		if (data->flags & MMC_DATA_READ) {
+			struct sg_mapping_iter *sg_miter = &host->sg_miter;
+			unsigned long flags;
+
+			local_irq_save(flags);
+			if (sg_miter_next(sg_miter)) {
+				flush_dcache_page(sg_miter->page);
+				sg_miter_stop(sg_miter);
+			}
+			local_irq_restore(flags);
+		}
 	}
 	if (status & MCI_DATAEND) {
 		mmci_stop_data(host);
@@ -314,15 +334,18 @@ static int mmci_pio_write(struct mmci_host *host, char *buffer, unsigned int rem
 static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
 {
 	struct mmci_host *host = dev_id;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
 	void __iomem *base = host->base;
+	unsigned long flags;
 	u32 status;
 
 	status = readl(base + MMCISTATUS);
 
 	dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
 
+	local_irq_save(flags);
+
 	do {
-		unsigned long flags;
 		unsigned int remain, len;
 		char *buffer;
 
@@ -336,11 +359,11 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
 		if (!(status & (MCI_TXFIFOHALFEMPTY|MCI_RXDATAAVLBL)))
 			break;
 
-		/*
-		 * Map the current scatter buffer.
-		 */
-		buffer = mmci_kmap_atomic(host, &flags) + host->sg_off;
-		remain = host->sg_ptr->length - host->sg_off;
+		if (!sg_miter_next(sg_miter))
+			break;
+
+		buffer = sg_miter->addr;
+		remain = sg_miter->length;
 
 		len = 0;
 		if (status & MCI_RXACTIVE)
@@ -348,31 +371,24 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
 		if (status & MCI_TXACTIVE)
 			len = mmci_pio_write(host, buffer, remain, status);
 
-		/*
-		 * Unmap the buffer.
-		 */
-		mmci_kunmap_atomic(host, buffer, &flags);
+		sg_miter->consumed = len;
 
-		host->sg_off += len;
 		host->size -= len;
 		remain -= len;
 
 		if (remain)
 			break;
 
-		/*
-		 * If we were reading, and we have completed this
-		 * page, ensure that the data cache is coherent.
-		 */
 		if (status & MCI_RXACTIVE)
-			flush_dcache_page(sg_page(host->sg_ptr));
-
-		if (!mmci_next_sg(host))
-			break;
+			flush_dcache_page(sg_miter->page);
 
 		status = readl(base + MMCISTATUS);
 	} while (1);
 
+	sg_miter_stop(sg_miter);
+
+	local_irq_restore(flags);
+
 	/*
 	 * If we're nearing the end of the read, switch to
 	 * "any data available" mode.
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index d77062e..7cb24ab 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -171,42 +171,9 @@ struct mmci_host {
 	struct timer_list	timer;
 	unsigned int		oldstat;
 
-	unsigned int		sg_len;
-
 	/* pio stuff */
-	struct scatterlist	*sg_ptr;
-	unsigned int		sg_off;
+	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
 	struct regulator	*vcc;
 };
 
-static inline void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
-{
-	/*
-	 * Ideally, we want the higher levels to pass us a scatter list.
-	 */
-	host->sg_len = data->sg_len;
-	host->sg_ptr = data->sg;
-	host->sg_off = 0;
-}
-
-static inline int mmci_next_sg(struct mmci_host *host)
-{
-	host->sg_ptr++;
-	host->sg_off = 0;
-	return --host->sg_len;
-}
-
-static inline char *mmci_kmap_atomic(struct mmci_host *host, unsigned long *flags)
-{
-	struct scatterlist *sg = host->sg_ptr;
-
-	local_irq_save(*flags);
-	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
-}
-
-static inline void mmci_kunmap_atomic(struct mmci_host *host, void *buffer, unsigned long *flags)
-{
-	kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
-	local_irq_restore(*flags);
-}
-- 
cgit v0.10.2


From 528320db013b687c5f0150fd77eb4dc02ca328d1 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:49:49 +0100
Subject: ARM: 6238/1: mmci: fix multi block transfers

Fix the data transfer size to allow multi block transfers to work.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index d63d756..ddcfc4c 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -120,7 +120,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 		data->blksz, data->blocks, data->flags);
 
 	host->data = data;
-	host->size = data->blksz;
+	host->size = data->blksz * data->blocks;
 	host->data_xfered = 0;
 
 	mmci_init_sg(host, data);
-- 
cgit v0.10.2


From f5e2574e734650bbeb801a31cc99e628f9a027af Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:50:31 +0100
Subject: ARM: 6239/1: mmci: let core poll for card detection

Use the MMC core's ability to poll for card detection.  This also has
the advantage of doing the gpio_get_value from a workqueue instead of
timer, allowing the gpio to be on a sleeping gpiochip.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index ddcfc4c..3eaa0e9 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -567,18 +567,6 @@ static const struct mmc_host_ops mmci_ops = {
 	.get_cd		= mmci_get_cd,
 };
 
-static void mmci_check_status(unsigned long data)
-{
-	struct mmci_host *host = (struct mmci_host *)data;
-	unsigned int status = mmci_get_cd(host->mmc);
-
-	if (status ^ host->oldstat)
-		mmc_detect_change(host->mmc, 0);
-
-	host->oldstat = status;
-	mod_timer(&host->timer, jiffies + HZ);
-}
-
 static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct mmci_platform_data *plat = dev->dev.platform_data;
@@ -685,6 +673,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	if (host->vcc == NULL)
 		mmc->ocr_avail = plat->ocr_mask;
 	mmc->caps = plat->capabilities;
+	mmc->caps |= MMC_CAP_NEEDS_POLL;
 
 	/*
 	 * We can do SGIO
@@ -750,7 +739,6 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	writel(MCI_IRQENABLE, host->base + MMCIMASK0);
 
 	amba_set_drvdata(dev, mmc);
-	host->oldstat = mmci_get_cd(host->mmc);
 
 	mmc_add_host(mmc);
 
@@ -758,12 +746,6 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
 		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
 
-	init_timer(&host->timer);
-	host->timer.data = (unsigned long)host;
-	host->timer.function = mmci_check_status;
-	host->timer.expires = jiffies + HZ;
-	add_timer(&host->timer);
-
 	return 0;
 
  irq0_free:
@@ -797,8 +779,6 @@ static int __devexit mmci_remove(struct amba_device *dev)
 	if (mmc) {
 		struct mmci_host *host = mmc_priv(mmc);
 
-		del_timer_sync(&host->timer);
-
 		mmc_remove_host(mmc);
 
 		writel(0, host->base + MMCIMASK0);
-- 
cgit v0.10.2


From bb8f563c848faa113059973f68c24a3bb6a9585e Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:53:57 +0100
Subject: ARM: 6243/1: mmci: pass power_mode to the translate_vdd callback

Platforms may have some external power control which need to be
controlled from board specific code.  Rename the translate_vdd()
callback to vdd_handler() and pass it the power mode.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 3eaa0e9..7ae3eee 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -493,16 +493,9 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			/* This implicitly enables the regulator */
 			mmc_regulator_set_ocr(host->vcc, ios->vdd);
 #endif
-		/*
-		 * The translate_vdd function is not used if you have
-		 * an external regulator, or your design is really weird.
-		 * Using it would mean sending in power control BOTH using
-		 * a regulator AND the 4 MMCIPWR bits. If we don't have
-		 * a regulator, we might have some other platform specific
-		 * power control behind this translate function.
-		 */
-		if (!host->vcc && host->plat->translate_vdd)
-			pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
+		if (host->plat->vdd_handler)
+			pwr |= host->plat->vdd_handler(mmc_dev(mmc), ios->vdd,
+						       ios->power_mode);
 		/* The ST version does not have this, fall through to POWER_ON */
 		if (host->hw_designer != AMBA_VENDOR_ST) {
 			pwr |= MCI_PWR_UP;
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 7e466fe..ca84ce7 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -15,9 +15,10 @@
  * @ocr_mask: available voltages on the 4 pins from the block, this
  * is ignored if a regulator is used, see the MMC_VDD_* masks in
  * mmc/host.h
- * @translate_vdd: a callback function to translate a MMC_VDD_*
- * mask into a value to be binary or:ed and written into the
- * MMCIPWR register of the block
+ * @vdd_handler: a callback function to translate a MMC_VDD_*
+ * mask into a value to be binary (or set some other custom bits
+ * in MMCIPWR) or:ed and written into the MMCIPWR register of the
+ * block.  May also control external power based on the power_mode.
  * @status: if no GPIO read function was given to the block in
  * gpio_wp (below) this function will be called to determine
  * whether a card is present in the MMC slot or not
@@ -29,7 +30,8 @@
 struct mmci_platform_data {
 	unsigned int f_max;
 	unsigned int ocr_mask;
-	u32 (*translate_vdd)(struct device *, unsigned int);
+	u32 (*vdd_handler)(struct device *, unsigned int vdd,
+			   unsigned char power_mode);
 	unsigned int (*status)(struct device *);
 	int	gpio_wp;
 	int	gpio_cd;
-- 
cgit v0.10.2


From 4956e10903fd3459306dd9438c1e714ba3068a2a Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:54:40 +0100
Subject: ARM: 6244/1: mmci: add variant data and default MCICLOCK support

Add a variant_data structure to handle the differences between the
various variants of this peripheral.  Add a first quirk for a default
MCICLOCK value, required on the Ux500 variant where the enable bit needs
to be always set, since it controls access to some registers.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7ae3eee..fd2e7ac 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -36,12 +36,30 @@
 
 static unsigned int fmax = 515633;
 
+/**
+ * struct variant_data - MMCI variant-specific quirks
+ * @clkreg: default value for MCICLOCK register
+ */
+struct variant_data {
+	unsigned int		clkreg;
+};
+
+static struct variant_data variant_arm = {
+};
+
+static struct variant_data variant_u300 = {
+};
+
+static struct variant_data variant_ux500 = {
+	.clkreg			= MCI_CLK_ENABLE,
+};
 /*
  * This must be called with host->lock held
  */
 static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
 {
-	u32 clk = 0;
+	struct variant_data *variant = host->variant;
+	u32 clk = variant->clkreg;
 
 	if (desired) {
 		if (desired >= host->mclk) {
@@ -563,6 +581,7 @@ static const struct mmc_host_ops mmci_ops = {
 static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct mmci_platform_data *plat = dev->dev.platform_data;
+	struct variant_data *variant = id->data;
 	struct mmci_host *host;
 	struct mmc_host *mmc;
 	int ret;
@@ -606,6 +625,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		goto clk_free;
 
 	host->plat = plat;
+	host->variant = variant;
 	host->mclk = clk_get_rate(host->clk);
 	/*
 	 * According to the spec, mclk is max 100 MHz,
@@ -845,19 +865,28 @@ static struct amba_id mmci_ids[] = {
 	{
 		.id	= 0x00041180,
 		.mask	= 0x000fffff,
+		.data	= &variant_arm,
 	},
 	{
 		.id	= 0x00041181,
 		.mask	= 0x000fffff,
+		.data	= &variant_arm,
 	},
 	/* ST Micro variants */
 	{
 		.id     = 0x00180180,
 		.mask   = 0x00ffffff,
+		.data	= &variant_u300,
 	},
 	{
 		.id     = 0x00280180,
 		.mask   = 0x00ffffff,
+		.data	= &variant_u300,
+	},
+	{
+		.id     = 0x00480180,
+		.mask   = 0x00ffffff,
+		.data	= &variant_ux500,
 	},
 	{ 0, 0 },
 };
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 7cb24ab..b98cc78 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -145,6 +145,7 @@
 #define NR_SG		16
 
 struct clk;
+struct variant_data;
 
 struct mmci_host {
 	void __iomem		*base;
@@ -164,6 +165,7 @@ struct mmci_host {
 	unsigned int		cclk;
 	u32			pwr;
 	struct mmci_platform_data *plat;
+	struct variant_data	*variant;
 
 	u8			hw_designer;
 	u8			hw_revision:4;
-- 
cgit v0.10.2


From 4380c14fd77338bac9d1da4dc5dd9f6eb4966c82 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:55:18 +0100
Subject: ARM: 6245/1: mmci: enable hardware flow control on Ux500 variants

Although both the U300 and Ux500 use ST variants, the HWFCEN bits are at
different positions, so use the variant_data to store the information.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fd2e7ac..379af90 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -39,19 +39,23 @@ static unsigned int fmax = 515633;
 /**
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
+ * @clkreg_enable: enable value for MMCICLOCK register
  */
 struct variant_data {
 	unsigned int		clkreg;
+	unsigned int		clkreg_enable;
 };
 
 static struct variant_data variant_arm = {
 };
 
 static struct variant_data variant_u300 = {
+	.clkreg_enable		= 1 << 13, /* HWFCEN */
 };
 
 static struct variant_data variant_ux500 = {
 	.clkreg			= MCI_CLK_ENABLE,
+	.clkreg_enable		= 1 << 14, /* HWFCEN */
 };
 /*
  * This must be called with host->lock held
@@ -71,8 +75,8 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
 				clk = 255;
 			host->cclk = host->mclk / (2 * (clk + 1));
 		}
-		if (host->hw_designer == AMBA_VENDOR_ST)
-			clk |= MCI_ST_FCEN; /* Bug fix in ST IP block */
+
+		clk |= variant->clkreg_enable;
 		clk |= MCI_CLK_ENABLE;
 		/* This hasn't proven to be worthwhile */
 		/* clk |= MCI_CLK_PWRSAVE; */
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index b98cc78..68970cf 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -28,8 +28,6 @@
 #define MCI_4BIT_BUS		(1 << 11)
 /* 8bit wide buses supported in ST Micro versions */
 #define MCI_ST_8BIT_BUS		(1 << 12)
-/* HW flow control on the ST Micro version */
-#define MCI_ST_FCEN		(1 << 13)
 
 #define MMCIARGUMENT		0x008
 #define MMCICOMMAND		0x00c
-- 
cgit v0.10.2


From 08458ef6eede6cf7d5a33c3a7c8bcdc3943012c2 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Wed, 21 Jul 2010 12:55:59 +0100
Subject: ARM: 6246/1: mmci: support larger MMCIDATALENGTH register

The Ux500 variant has a 24-bit MMCIDATALENGTH register, as opposed to
the 16-bit one on the ARM version.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 379af90..7edae83 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -40,22 +40,27 @@ static unsigned int fmax = 515633;
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
  * @clkreg_enable: enable value for MMCICLOCK register
+ * @datalength_bits: number of bits in the MMCIDATALENGTH register
  */
 struct variant_data {
 	unsigned int		clkreg;
 	unsigned int		clkreg_enable;
+	unsigned int		datalength_bits;
 };
 
 static struct variant_data variant_arm = {
+	.datalength_bits	= 16,
 };
 
 static struct variant_data variant_u300 = {
 	.clkreg_enable		= 1 << 13, /* HWFCEN */
+	.datalength_bits	= 16,
 };
 
 static struct variant_data variant_ux500 = {
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= 1 << 14, /* HWFCEN */
+	.datalength_bits	= 24,
 };
 /*
  * This must be called with host->lock held
@@ -699,10 +704,11 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	mmc->max_phys_segs = NR_SG;
 
 	/*
-	 * Since we only have a 16-bit data length register, we must
-	 * ensure that we don't exceed 2^16-1 bytes in a single request.
+	 * Since only a certain number of bits are valid in the data length
+	 * register, we must ensure that we don't exceed 2^num-1 bytes in a
+	 * single request.
 	 */
-	mmc->max_req_size = 65535;
+	mmc->max_req_size = (1 << variant->datalength_bits) - 1;
 
 	/*
 	 * Set the maximum segment size.  Since we aren't doing DMA
-- 
cgit v0.10.2