10 files changed, 324 insertions, 316 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 27892e3..3fefb43 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -228,7 +228,6 @@ config CPU_SA1100
 	select CPU_CACHE_V4WB
 	select CPU_CACHE_VIVT
 	select CPU_TLB_V4WB
-	select CPU_MINICACHE
 
 # XScale
 config CPU_XSCALE
@@ -239,7 +238,6 @@ config CPU_XSCALE
 	select CPU_ABRT_EV5T
 	select CPU_CACHE_VIVT
 	select CPU_TLB_V4WBI
-	select CPU_MINICACHE
 
 # ARMv6
 config CPU_V6
@@ -345,11 +343,6 @@ config CPU_TLB_V4WBI
 config CPU_TLB_V6
 	bool
 
-config CPU_MINICACHE
-	bool
-	help
-	  Processor has a minicache.
-
 comment "Processor Features"
 
 config ARM_THUMB
@@ -410,17 +403,30 @@ config CPU_BPREDICT_DISABLE
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
+config TLS_REG_EMUL
+	bool
+	default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3)
+	help
+	  An SMP system using a pre-ARMv6 processor (there are apparently
+	  a few prototypes like that in existence) and therefore access to
+	  that required register must be emulated.
+
 config HAS_TLS_REG
 	bool
-	depends on CPU_32v6 && !CPU_32v5 && !CPU_32v4 && !CPU_32v3
-	default y
+	depends on !TLS_REG_EMUL
+	default y if SMP || CPU_32v7
 	help
 	  This selects support for the CP15 thread register.
-	  It is defined to be available on ARMv6 or later.  However
-	  if the kernel is configured to support multiple CPUs including
-	  a pre-ARMv6 processors, or if a given ARMv6 processor doesn't
-	  implement the thread register for some reason, then access to
-	  this register from user space must be trapped and emulated.
-	  If user space is relying on the __kuser_get_tls code then
-	  there should not be any impact.
+	  It is defined to be available on some ARMv6 processors (including
+	  all SMP capable ARMv6's) or later processors.  User space may
+	  assume directly accessing that register and always obtain the
+	  expected value only on ARMv7 and above.
+
+config NEEDS_SYSCALL_FOR_CMPXCHG
+	bool
+	default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3)
+	help
+	  SMP on a pre-ARMv6 processor?  Well OK then.
+	  Forget about fast user space cmpxchg support.
+	  It is just not possible.
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index ccf316c..59f47d4 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6)	+= copypage-v6.o mmu.o
 obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
 obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
 
-obj-$(CONFIG_CPU_MINICACHE)	+= minicache.o
-
 obj-$(CONFIG_CPU_TLB_V3)	+= tlb-v3.o
 obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
 obj-$(CONFIG_CPU_TLB_V4WB)	+= tlb-v4wb.o
diff --git a/arch/arm/mm/copypage-v4mc.S b/arch/arm/mm/copypage-v4mc.S
deleted file mode 100644
index 305af3d..0000000
--- a/arch/arm/mm/copypage-v4mc.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- *  linux/arch/arm/lib/copy_page-armv4mc.S
- *
- *  Copyright (C) 1995-2001 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/constants.h>
-
-	.text
-	.align	5
-/*
- * ARMv4 mini-dcache optimised copy_user_page
- *
- * We flush the destination cache lines just before we write the data into the
- * corresponding address.  Since the Dcache is read-allocate, this removes the
- * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
- * and merged as appropriate.
- *
- * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
- * instruction.  If your processor does not supply this, you have to write your
- * own copy_user_page that does the right thing.
- */
-ENTRY(v4_mc_copy_user_page)
-	stmfd	sp!, {r4, lr}			@ 2
-	mov	r4, r0
-	mov	r0, r1
-	bl	map_page_minicache
-	mov	r1, #PAGE_SZ/64			@ 1
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4
-1:	mcr	p15, 0, r4, c7, c6, 1		@ 1   invalidate D line
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4+1
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4
-	mcr	p15, 0, r4, c7, c6, 1		@ 1   invalidate D line
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4
-	subs	r1, r1, #1			@ 1
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmneia	r0!, {r2, r3, ip, lr}		@ 4
-	bne	1b				@ 1
-	ldmfd	sp!, {r4, pc}			@ 3
-
-	.align	5
-/*
- * ARMv4 optimised clear_user_page
- *
- * Same story as above.
- */
-ENTRY(v4_mc_clear_user_page)
-	str	lr, [sp, #-4]!
-	mov	r1, #PAGE_SZ/64			@ 1
-	mov	r2, #0				@ 1
-	mov	r3, #0				@ 1
-	mov	ip, #0				@ 1
-	mov	lr, #0				@ 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	subs	r1, r1, #1			@ 1
-	bne	1b				@ 1
-	ldr	pc, [sp], #4
-
-	__INITDATA
-
-	.type	v4_mc_user_fns, #object
-ENTRY(v4_mc_user_fns)
-	.long	v4_mc_clear_user_page
-	.long	v4_mc_copy_user_page
-	.size	v4_mc_user_fns, . - v4_mc_user_fns
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
new file mode 100644
index 0000000..fc69dcc
--- /dev/null
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -0,0 +1,111 @@
+/*
+ *  linux/arch/arm/lib/copypage-armv4mc.S
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles the mini data cache, as found on SA11x0 and XScale
+ * processors.  When we copy a user page page, we map it in such a way
+ * that accesses to this page will not touch the main data cache, but
+ * will be cached in the mini data cache.  This prevents us thrashing
+ * the main data cache on page faults.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/*
+ * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
+ * specific hacks for copying pages efficiently.
+ */
+#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+				  L_PTE_CACHEABLE)
+
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
+static DEFINE_SPINLOCK(minicache_lock);
+
+/*
+ * ARMv4 mini-dcache optimised copy_user_page
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ *
+ * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
+ * instruction.  If your processor does not supply this, you have to write your
+ * own copy_user_page that does the right thing.
+ */
+static void __attribute__((naked))
+mc_copy_user_page(void *from, void *to)
+{
+	asm volatile(
+	"stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r4, %2				@ 1\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+1:	mcr	p15, 0, %1, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4+1\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %1, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r4, r4, #1			@ 1\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmneia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	bne	1b				@ 1\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "r" (from), "r" (to), "I" (PAGE_SIZE / 64));
+}
+
+void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
+{
+	spin_lock(&minicache_lock);
+
+	set_pte(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
+	flush_tlb_kernel_page(0xffff8000);
+
+	mc_copy_user_page((void *)0xffff8000, kto);
+
+	spin_unlock(&minicache_lock);
+}
+
+/*
+ * ARMv4 optimised clear_user_page
+ */
+void __attribute__((naked))
+v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
+{
+	asm volatile(
+	"str	lr, [sp, #-4]!\n\
+	mov	r1, %0				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	ldr	pc, [sp], #4"
+	:
+	: "I" (PAGE_SIZE / 64));
+}
+
+struct cpu_user_fns v4_mc_user_fns __initdata = {
+	.cpu_clear_user_page	= v4_mc_clear_user_page, 
+	.cpu_copy_user_page	= v4_mc_copy_user_page,
+};
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 694ac82..a8c0023 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -26,8 +26,8 @@
 #define to_address	(0xffffc000)
 #define to_pgprot	PAGE_KERNEL
 
-static pte_t *from_pte;
-static pte_t *to_pte;
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
 static DEFINE_SPINLOCK(v6_lock);
 
 #define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
@@ -74,8 +74,8 @@ void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vadd
 	 */
 	spin_lock(&v6_lock);
 
-	set_pte(from_pte + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot));
-	set_pte(to_pte + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot));
+	set_pte(TOP_PTE(from_address) + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot));
+	set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot));
 
 	from = from_address + (offset << PAGE_SHIFT);
 	to   = to_address + (offset << PAGE_SHIFT);
@@ -114,7 +114,7 @@ void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr)
 	 */
 	spin_lock(&v6_lock);
 
-	set_pte(to_pte + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot));
+	set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot));
 	flush_tlb_kernel_page(to);
 	clear_page((void *)to);
 
@@ -129,21 +129,6 @@ struct cpu_user_fns v6_user_fns __initdata = {
 static int __init v6_userpage_init(void)
 {
 	if (cache_is_vipt_aliasing()) {
-		pgd_t *pgd;
-		pmd_t *pmd;
-
-		pgd = pgd_offset_k(from_address);
-		pmd = pmd_alloc(&init_mm, pgd, from_address);
-		if (!pmd)
-			BUG();
-		from_pte = pte_alloc_kernel(&init_mm, pmd, from_address);
-		if (!from_pte)
-			BUG();
-
-		to_pte = pte_alloc_kernel(&init_mm, pmd, to_address);
-		if (!to_pte)
-			BUG();
-
 		cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing;
 		cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing;
 	}
@@ -151,5 +136,4 @@ static int __init v6_userpage_init(void)
 	return 0;
 }
 
-__initcall(v6_userpage_init);
-
+core_initcall(v6_userpage_init);
diff --git a/arch/arm/mm/copypage-xscale.S b/arch/arm/mm/copypage-xscale.S
deleted file mode 100644
index bb27731..0000000
--- a/arch/arm/mm/copypage-xscale.S
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *  linux/arch/arm/lib/copypage-xscale.S
- *
- *  Copyright (C) 2001 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/constants.h>
-
-/*
- * General note:
- *  We don't really want write-allocate cache behaviour for these functions
- *  since that will just eat through 8K of the cache.
- */
-
-	.text
-	.align	5
-/*
- * XScale optimised copy_user_page
- *  r0 = destination
- *  r1 = source
- *  r2 = virtual user address of ultimate destination page
- *
- * The source page may have some clean entries in the cache already, but we
- * can safely ignore them - break_cow() will flush them out of the cache
- * if we eventually end up using our copied page.
- *
- * What we could do is use the mini-cache to buffer reads from the source
- * page.  We rely on the mini-cache being smaller than one page, so we'll
- * cycle through the complete cache anyway.
- */
-ENTRY(xscale_mc_copy_user_page)
-	stmfd	sp!, {r4, r5, lr}
-	mov	r5, r0
-	mov	r0, r1
-	bl	map_page_minicache
-	mov	r1, r5
-	mov	lr, #PAGE_SZ/64-1
-
-	/*
-	 * Strangely enough, best performance is achieved
-	 * when prefetching destination as well.  (NP)
-	 */
-	pld	[r0, #0]
-	pld	[r0, #32]
-	pld	[r1, #0]
-	pld	[r1, #32]
-
-1:	pld	[r0, #64]
-	pld	[r0, #96]
-	pld	[r1, #64]
-	pld	[r1, #96]
-
-2:	ldrd	r2, [r0], #8
-	ldrd	r4, [r0], #8
-	mov	ip, r1
-	strd	r2, [r1], #8
-	ldrd	r2, [r0], #8
-	strd	r4, [r1], #8
-	ldrd	r4, [r0], #8
-	strd	r2, [r1], #8
-	strd	r4, [r1], #8
-	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
-	ldrd	r2, [r0], #8
-	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
-	ldrd	r4, [r0], #8
-	mov	ip, r1
-	strd	r2, [r1], #8
-	ldrd	r2, [r0], #8
-	strd	r4, [r1], #8
-	ldrd	r4, [r0], #8
-	strd	r2, [r1], #8
-	strd	r4, [r1], #8
-	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
-	subs	lr, lr, #1
-	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
-	bgt	1b
-	beq	2b
-
-	ldmfd	sp!, {r4, r5, pc}
-
-	.align	5
-/*
- * XScale optimised clear_user_page
- *  r0 = destination
- *  r1 = virtual user address of ultimate destination page
- */
-ENTRY(xscale_mc_clear_user_page)
-	mov	r1, #PAGE_SZ/32
-	mov	r2, #0
-	mov	r3, #0
-1:	mov	ip, r0
-	strd	r2, [r0], #8
-	strd	r2, [r0], #8
-	strd	r2, [r0], #8
-	strd	r2, [r0], #8
-	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
-	subs	r1, r1, #1
-	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
-	bne	1b
-	mov	pc, lr
-
-	__INITDATA
-
-	.type	xscale_mc_user_fns, #object
-ENTRY(xscale_mc_user_fns)
-	.long	xscale_mc_clear_user_page
-	.long	xscale_mc_copy_user_page
-	.size	xscale_mc_user_fns, . - xscale_mc_user_fns
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
new file mode 100644
index 0000000..42a6ee2
--- /dev/null
+++ b/arch/arm/mm/copypage-xscale.c
@@ -0,0 +1,131 @@
+/*
+ *  linux/arch/arm/lib/copypage-xscale.S
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles the mini data cache, as found on SA11x0 and XScale
+ * processors.  When we copy a user page page, we map it in such a way
+ * that accesses to this page will not touch the main data cache, but
+ * will be cached in the mini data cache.  This prevents us thrashing
+ * the main data cache on page faults.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/*
+ * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
+ * specific hacks for copying pages efficiently.
+ */
+#define COPYPAGE_MINICACHE	0xffff8000
+
+#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+				  L_PTE_CACHEABLE)
+
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
+static DEFINE_SPINLOCK(minicache_lock);
+
+/*
+ * XScale mini-dcache optimised copy_user_page
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ */
+static void __attribute__((naked))
+mc_copy_user_page(void *from, void *to)
+{
+	/*
+	 * Strangely enough, best performance is achieved
+	 * when prefetching destination as well.  (NP)
+	 */
+	asm volatile(
+	"stmfd	sp!, {r4, r5, lr}		\n\
+	mov	lr, %2				\n\
+	pld	[r0, #0]			\n\
+	pld	[r0, #32]			\n\
+	pld	[r1, #0]			\n\
+	pld	[r1, #32]			\n\
+1:	pld	[r0, #64]			\n\
+	pld	[r0, #96]			\n\
+	pld	[r1, #64]			\n\
+	pld	[r1, #96]			\n\
+2:	ldrd	r2, [r0], #8			\n\
+	ldrd	r4, [r0], #8			\n\
+	mov	ip, r1				\n\
+	strd	r2, [r1], #8			\n\
+	ldrd	r2, [r0], #8			\n\
+	strd	r4, [r1], #8			\n\
+	ldrd	r4, [r0], #8			\n\
+	strd	r2, [r1], #8			\n\
+	strd	r4, [r1], #8			\n\
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
+	ldrd	r2, [r0], #8			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
+	ldrd	r4, [r0], #8			\n\
+	mov	ip, r1				\n\
+	strd	r2, [r1], #8			\n\
+	ldrd	r2, [r0], #8			\n\
+	strd	r4, [r1], #8			\n\
+	ldrd	r4, [r0], #8			\n\
+	strd	r2, [r1], #8			\n\
+	strd	r4, [r1], #8			\n\
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
+	subs	lr, lr, #1			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
+	bgt	1b				\n\
+	beq	2b				\n\
+	ldmfd	sp!, {r4, r5, pc}		"
+	:
+	: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1));
+}
+
+void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
+{
+	spin_lock(&minicache_lock);
+
+	set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
+	flush_tlb_kernel_page(COPYPAGE_MINICACHE);
+
+	mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
+
+	spin_unlock(&minicache_lock);
+}
+
+/*
+ * XScale optimised clear_user_page
+ */
+void __attribute__((naked))
+xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
+{
+	asm volatile(
+	"mov	r1, %0				\n\
+	mov	r2, #0				\n\
+	mov	r3, #0				\n\
+1:	mov	ip, r0				\n\
+	strd	r2, [r0], #8			\n\
+	strd	r2, [r0], #8			\n\
+	strd	r2, [r0], #8			\n\
+	strd	r2, [r0], #8			\n\
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
+	subs	r1, r1, #1			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
+	bne	1b				\n\
+	mov	pc, lr"
+	:
+	: "I" (PAGE_SIZE / 32));
+}
+
+struct cpu_user_fns xscale_mc_user_fns __initdata = {
+	.cpu_clear_user_page	= xscale_mc_clear_user_page, 
+	.cpu_copy_user_page	= xscale_mc_copy_user_page,
+};
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index c6de48d..4085ed9 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -13,6 +13,29 @@
 
 #include <asm/cacheflush.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_CPU_CACHE_VIPT
+#define ALIAS_FLUSH_START	0xffff4000
+
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
+static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
+{
+	unsigned long to = ALIAS_FLUSH_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
+
+	set_pte(TOP_PTE(to), pfn_pte(pfn, PAGE_KERNEL));
+	flush_tlb_kernel_page(to);
+
+	asm(	"mcrr	p15, 0, %1, %0, c14\n"
+	"	mcrr	p15, 0, %1, %0, c5\n"
+	    :
+	    : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES)
+	    : "cc");
+}
+#else
+#define flush_pfn_alias(pfn,vaddr)	do { } while (0)
+#endif
 
 static void __flush_dcache_page(struct address_space *mapping, struct page *page)
 {
@@ -37,6 +60,18 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page
 		return;
 
 	/*
+	 * This is a page cache page.  If we have a VIPT cache, we
+	 * only need to do one flush - which would be at the relevant
+	 * userspace colour, which is congruent with page->index.
+	 */
+	if (cache_is_vipt()) {
+		if (cache_is_vipt_aliasing())
+			flush_pfn_alias(page_to_pfn(page),
+					page->index << PAGE_CACHE_SHIFT);
+		return;
+	}
+
+	/*
 	 * There are possible user space mappings of this page:
 	 * - VIVT cache: we need to also write back and invalidate all user
 	 *   data in the current VM view associated with this page.
@@ -57,8 +92,6 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page
 			continue;
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
-		if (cache_is_vipt())
-			break;
 	}
 	flush_dcache_mmap_unlock(mapping);
 }
diff --git a/arch/arm/mm/minicache.c b/arch/arm/mm/minicache.c
deleted file mode 100644
index dedf2ab..0000000
--- a/arch/arm/mm/minicache.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  linux/arch/arm/mm/minicache.c
- *
- *  Copyright (C) 2001 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This handles the mini data cache, as found on SA11x0 and XScale
- * processors.  When we copy a user page page, we map it in such a way
- * that accesses to this page will not touch the main data cache, but
- * will be cached in the mini data cache.  This prevents us thrashing
- * the main data cache on page faults.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-
-/*
- * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
- * specific hacks for copying pages efficiently.
- */
-#define minicache_address (0xffff8000)
-#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
-				  L_PTE_CACHEABLE)
-
-static pte_t *minicache_pte;
-
-/*
- * Note that this is intended to be called only from the copy_user_page
- * asm code; anything else will require special locking to prevent the
- * mini-cache space being re-used.  (Note: probably preempt unsafe).
- *
- * We rely on the fact that the minicache is 2K, and we'll be pushing
- * 4K of data through it, so we don't actually have to specifically
- * flush the minicache when we change the mapping.
- *
- * Note also: assert(PAGE_OFFSET <= virt < high_memory).
- * Unsafe: preempt, kmap.
- */
-unsigned long map_page_minicache(unsigned long virt)
-{
-	set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot));
-	flush_tlb_kernel_page(minicache_address);
-
-	return minicache_address;
-}
-
-static int __init minicache_init(void)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-
-	spin_lock(&init_mm.page_table_lock);
-
-	pgd = pgd_offset_k(minicache_address);
-	pmd = pmd_alloc(&init_mm, pgd, minicache_address);
-	if (!pmd)
-		BUG();
-	minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address);
-	if (!minicache_pte)
-		BUG();
-
-	spin_unlock(&init_mm.page_table_lock);
-
-	return 0;
-}
-
-core_initcall(minicache_init);
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index 585dfb8..2c2b93d 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -37,6 +37,8 @@ pgprot_t pgprot_kernel;
 
 EXPORT_SYMBOL(pgprot_kernel);
 
+pmd_t *top_pmd;
+
 struct cachepolicy {
 	const char	policy[16];
 	unsigned int	cr_mask;
@@ -142,6 +144,16 @@ __setup("noalign", noalign_setup);
 
 #define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
 
+static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
+{
+	return pmd_offset(pgd, virt);
+}
+
+static inline pmd_t *pmd_off_k(unsigned long virt)
+{
+	return pmd_off(pgd_offset_k(virt), virt);
+}
+
 /*
  * need to get a 16k page for level 1
  */
@@ -220,7 +232,7 @@ void free_pgd_slow(pgd_t *pgd)
 		return;
 
 	/* pgd is always present and good */
-	pmd = (pmd_t *)pgd;
+	pmd = pmd_off(pgd, 0);
 	if (pmd_none(*pmd))
 		goto free;
 	if (pmd_bad(*pmd)) {
@@ -246,9 +258,8 @@ free:
 static inline void
 alloc_init_section(unsigned long virt, unsigned long phys, int prot)
 {
-	pmd_t *pmdp;
+	pmd_t *pmdp = pmd_off_k(virt);
 
-	pmdp = pmd_offset(pgd_offset_k(virt), virt);
 	if (virt & (1 << 20))
 		pmdp++;
 
@@ -283,11 +294,9 @@ alloc_init_supersection(unsigned long virt, unsigned long phys, int prot)
 static inline void
 alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot)
 {
-	pmd_t *pmdp;
+	pmd_t *pmdp = pmd_off_k(virt);
 	pte_t *ptep;
 
-	pmdp = pmd_offset(pgd_offset_k(virt), virt);
-
 	if (pmd_none(*pmdp)) {
 		unsigned long pmdval;
 		ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
@@ -310,7 +319,7 @@ alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pg
  */
 static inline void clear_mapping(unsigned long virt)
 {
-	pmd_clear(pmd_offset(pgd_offset_k(virt), virt));
+	pmd_clear(pmd_off_k(virt));
 }
 
 struct mem_types {
@@ -578,7 +587,7 @@ void setup_mm_for_reboot(char mode)
 			 PMD_TYPE_SECT;
 		if (cpu_arch <= CPU_ARCH_ARMv5)
 			pmdval |= PMD_BIT4;
-		pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+		pmd = pmd_off(pgd, i << PGDIR_SHIFT);
 		pmd[0] = __pmd(pmdval);
 		pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
 		flush_pmd_entry(pmd);
@@ -675,6 +684,8 @@ void __init memtable_init(struct meminfo *mi)
 
 	flush_cache_all();
 	flush_tlb_all();
+
+	top_pmd = pmd_off_k(0xffff0000);
 }
 
 /*