From fe240f11cdab5831d0c4c1ecbaab00b7d302f295 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 9 Jul 2013 15:55:15 +0530
Subject: ARC: Add some .gitignore entries

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore
new file mode 100644
index 0000000..5d65b54
--- /dev/null
+++ b/arch/arc/boot/.gitignore
@@ -0,0 +1 @@
+*.dtb*
diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/arc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
-- 
cgit v0.10.2


From 37f3ac498c988536f0d42b4659fb5d2c050b2f96 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 9 Jul 2013 15:07:13 +0530
Subject: ARC: Exception Handlers Code consolidation

After the recent cleanups, all the exception handlers now have same
boilerplate prologue code. Move that into common macro.

This reduces readability but helps greatly with sharing / duplicating
entry code with ARCv2 ISA where the handlers are pretty much the same,
just the entry prologue is different (due to hardware assist).

Also while at it, add the missing FAKE_RET_FROM_EXCPN calls in couple of
places to drop down to pure kernel mode (from exception mode) before
jumping off into "C" code.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index df57611..8840810 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -365,7 +365,7 @@
  * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
  *
  * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of ptregs.
+ * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro EXCPN_PROLOG_FREEUP_REG	reg
 #ifdef CONFIG_SMP
@@ -384,6 +384,28 @@
 .endm
 
 /*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	/* Need at least 1 reg to code the early exception prologue */
+	EXCPN_PROLOG_FREEUP_REG r9
+
+	/* U/K mode at time of exception (stack not switched if already K) */
+	lr  r9, [erstatus]
+
+	/* ARC700 doesn't provide auto-stack switching */
+	SWITCH_TO_KERNEL_STK
+
+	/* save the regfile */
+	SAVE_ALL_SYS
+.endm
+
+/*--------------------------------------------------------------
  * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
  * Requires SP to be already switched to kernel mode Stack
  * sp points to the next free element on the stack at exit of this macro.
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1d71651..059ca94 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -267,12 +267,7 @@ ARC_EXIT handle_interrupt_level1
 
 ARC_ENTRY instr_service
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
@@ -289,15 +284,13 @@ ARC_EXIT instr_service
 
 ARC_ENTRY mem_service
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN r9
+
 	bl  do_memory_error
 	b   ret_from_exception
 ARC_EXIT mem_service
@@ -308,11 +301,7 @@ ARC_EXIT mem_service
 
 ARC_ENTRY EV_MachineCheck
 
-	EXCPN_PROLOG_FREEUP_REG r9
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r2, [ecr]
 	lr  r0, [efa]
@@ -342,13 +331,7 @@ ARC_EXIT EV_MachineCheck
 
 ARC_ENTRY EV_TLBProtV
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	;Which mode (user/kernel) was the system in when Exception occured
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	;---------(3) Save some more regs-----------------
 	;  vineetg: Mar 6th: Random Seg Fault issue #1
@@ -406,12 +389,7 @@ ARC_EXIT EV_TLBProtV
 ; ---------------------------------------------
 ARC_ENTRY EV_PrivilegeV
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
@@ -427,14 +405,13 @@ ARC_EXIT EV_PrivilegeV
 ; ---------------------------------------------
 ARC_ENTRY EV_Extension
 
-	EXCPN_PROLOG_FREEUP_REG r9
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN r9
+
 	bl  do_extension_fault
 	b   ret_from_exception
 ARC_EXIT EV_Extension
@@ -526,14 +503,7 @@ trap_with_param:
 
 ARC_ENTRY EV_Trap
 
-	; Need at least 1 reg to code the early exception prolog
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	;Which mode (user/kernel) was the system in when intr occured
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	;------- (4) What caused the Trap --------------
 	lr     r12, [ecr]
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 5c5bb23..fc34ebc 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -371,13 +371,7 @@ do_slow_path_pf:
 
 	; Slow path TLB Miss handled as a regular ARC Exception
 	; (stack switching / save the complete reg-file).
-	; That requires freeing up r9
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	; ------- setup args for Linux Page fault Hanlder ---------
 	mov_s r0, sp
-- 
cgit v0.10.2


From fce16bc35ae4a45634f3dc348d8d297a25c277cf Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 9 Jul 2013 17:06:40 +0530
Subject: ARC: Entry Handler tweaks: Optimize away redundant IRQ_DISABLE_SAVE

In the exception return path, for both U/K cases, intr are already
disabled (for various existing reasons). So when we drop down to
@restore_regs, we need not redo that.

There was subtle issue - when intr were NOT being disabled for
ret-to-kernel-but-no-preemption case - now fixed by moving the
IRQ_DISABLE further up in @resume_kernel_mode.

So what do we gain:

* Shaves off a few insn in return path.

* Eliminates the need for IRQ_DISABLE_SAVE assembler macro for ARCv2
  hence allows for entry code sharing.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index d99f79b..b68b53f 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -157,13 +157,6 @@ static inline void arch_unmask_irq(unsigned int irq)
 	flag	\scratch
 .endm
 
-.macro IRQ_DISABLE_SAVE  scratch, save
-	lr	\scratch, [status32]
-	mov	\save, \scratch		/* Make a copy */
-	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
-	flag	\scratch
-.endm
-
 .macro IRQ_ENABLE  scratch
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 059ca94..b908dde 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -612,6 +612,9 @@ resume_kernel_mode:
 
 #ifdef CONFIG_PREEMPT
 
+	; This is a must for preempt_schedule_irq()
+	IRQ_DISABLE	r9
+
 	; Can't preempt if preemption disabled
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
@@ -621,8 +624,6 @@ resume_kernel_mode:
 	ld  r9, [r10, THREAD_INFO_FLAGS]
 	bbit0  r9, TIF_NEED_RESCHED, restore_regs
 
-	IRQ_DISABLE	r9
-
 	; Invoke PREEMPTION
 	bl      preempt_schedule_irq
 
@@ -635,12 +636,11 @@ resume_kernel_mode:
 ;
 ; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
 ; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
 
 restore_regs :
 
-	; Disable Interrupts while restoring reg-file back
-	; XXX can this be optimised out
-	IRQ_DISABLE_SAVE    r9, r10	;@r10 has prisitine (pre-disable) copy
+	lr	r10, [status32]
 
 	; Restore REG File. In case multiple Events outstanding,
 	; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
-- 
cgit v0.10.2


From 4b06ff35fb1dcafbcbdcbe9ce794ab0770f2a843 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 10 Jul 2013 11:40:27 +0530
Subject: ARC: Code cosmetics (Nothing semantical)

* reduce editor lines taken by pt_regs
* ARCompact ISA specific part of TLB Miss handlers clubbed together
* cleanup some comments

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index c9938e7..1bfeec2 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -20,27 +20,17 @@ struct pt_regs {
 
 	/* Real registers */
 	long bta;	/* bta_l1, bta_l2, erbta */
-	long lp_start;
-	long lp_end;
-	long lp_count;
+
+	long lp_start, lp_end, lp_count;
+
 	long status32;	/* status32_l1, status32_l2, erstatus */
 	long ret;	/* ilink1, ilink2 or eret */
 	long blink;
 	long fp;
 	long r26;	/* gp */
-	long r12;
-	long r11;
-	long r10;
-	long r9;
-	long r8;
-	long r7;
-	long r6;
-	long r5;
-	long r4;
-	long r3;
-	long r2;
-	long r1;
-	long r0;
+
+	long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+
 	long sp;	/* user/kernel sp depending on where we came from  */
 	long orig_r0;
 
@@ -70,19 +60,7 @@ struct pt_regs {
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
 struct callee_regs {
-	long r25;
-	long r24;
-	long r23;
-	long r22;
-	long r21;
-	long r20;
-	long r19;
-	long r18;
-	long r17;
-	long r16;
-	long r15;
-	long r14;
-	long r13;
+	long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
 #define instruction_pointer(regs)	((regs)->ret)
diff --git a/arch/arc/include/asm/spinlock_types.h b/arch/arc/include/asm/spinlock_types.h
index 8276bfd..662627c 100644
--- a/arch/arc/include/asm/spinlock_types.h
+++ b/arch/arc/include/asm/spinlock_types.h
@@ -20,9 +20,9 @@ typedef struct {
 #define __ARCH_SPIN_LOCK_LOCKED		{ __ARCH_SPIN_LOCK_LOCKED__ }
 
 /*
- * Unlocked:     0x01_00_00_00
- * Read lock(s): 0x00_FF_00_00 to say 0x01
- * Write lock:   0x0, but only possible if prior value "unlocked" 0x0100_0000
+ * Unlocked     : 0x0100_0000
+ * Read lock(s) : 0x00FF_FFFF to 0x01  (Multiple Readers decrement it)
+ * Write lock   : 0x0, but only if prior value is "unlocked" 0x0100_0000
  */
 typedef struct {
 	volatile unsigned int	counter;
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index f415d85..5a1259c 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -622,12 +622,12 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 /*
  * General purpose helper to make I and D cache lines consistent.
  * @paddr is phy addr of region
- * @vaddr is typically user or kernel vaddr (vmalloc)
- *    Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
+ * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
+ *    However in one instance, when called by kprobe (for a breakpt in
  *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
  *    use a paddr to index the cache (despite VIPT). This is fine since since a
- *    built-in kernel page will not have any virtual mappings (not even kernel)
- *    kprobe on loadable module is different as it will have kvaddr.
+ *    builtin kernel page will not have any virtual mappings.
+ *    kprobe on loadable module will be kernel vaddr.
  */
 void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
 {
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index fc34ebc..9cce00e 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -44,17 +44,36 @@
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#if (CONFIG_ARC_MMU_VER == 1)
 #include <asm/tlb-mmu1.h>
-#endif
 
-;--------------------------------------------------------------------------
-; scratch memory to save the registers (r0-r3) used to code TLB refill Handler
-; For details refer to comments before TLBMISS_FREEUP_REGS below
+;-----------------------------------------------------------------
+; ARC700 Exception Handling doesn't auto-switch stack and it only provides
+; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
+;
+; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
+; "global" is used to free-up FIRST core reg to be able to code the rest of
+; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
+; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
+; need to be saved as well by extending the "global" to be 4 words. Hence
+;	".size   ex_saved_reg1, 16"
+; [All of this dance is to avoid stack switching for each TLB Miss, since we
+; only need to save only a handful of regs, as opposed to complete reg file]
+;
+; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
+; core reg as it will not be SMP safe.
+; Thus scratch AUX reg is used (and no longer used to cache task PGD).
+; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
+; Epilogue thus has to locate the "per-cpu" storage for regs.
+; To avoid cache line bouncing the per-cpu global is aligned/sized per
+; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
+;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
+
+; As simple as that....
 ;--------------------------------------------------------------------------
 
+; scratch memory to save [r0-r3] used to code TLB refill Handler
 ARCFP_DATA ex_saved_reg1
-	.align 1 << L1_CACHE_SHIFT	; IMP: Must be Cache Line aligned
+	.align 1 << L1_CACHE_SHIFT
 	.type   ex_saved_reg1, @object
 #ifdef CONFIG_SMP
 	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
@@ -66,6 +85,44 @@ ex_saved_reg1:
 	.zero 16
 #endif
 
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_SMP
+	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
+	GET_CPU_ID  r0			; get to per cpu scratch mem,
+	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
+	add r0, @ex_saved_reg1, r0
+#else
+	st    r0, [@ex_saved_reg1]
+	mov_s r0, @ex_saved_reg1
+#endif
+	st_s  r1, [r0, 4]
+	st_s  r2, [r0, 8]
+	st_s  r3, [r0, 12]
+
+	; VERIFY if the ASID in MMU-PID Reg is same as
+	; one in Linux data structures
+
+	DBG_ASID_MISMATCH
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r0			; get to per cpu scratch mem
+	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
+	add r0, @ex_saved_reg1, r0
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	lr    r0, [ARC_REG_SCRATCH_DATA0]
+#else
+	mov_s r0, @ex_saved_reg1
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	ld_s  r0, [r0]
+#endif
+.endm
+
 ;============================================================================
 ;  Troubleshooting Stuff
 ;============================================================================
@@ -191,68 +248,6 @@ ex_saved_reg1:
 #endif
 .endm
 
-;-----------------------------------------------------------------
-; ARC700 Exception Handling doesn't auto-switch stack and it only provides
-; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
-;
-; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
-; "global" is used to free-up FIRST core reg to be able to code the rest of
-; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
-; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
-; need to be saved as well by extending the "global" to be 4 words. Hence
-;	".size   ex_saved_reg1, 16"
-; [All of this dance is to avoid stack switching for each TLB Miss, since we
-; only need to save only a handful of regs, as opposed to complete reg file]
-;
-; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
-; core reg as it will not be SMP safe.
-; Thus scratch AUX reg is used (and no longer used to cache task PGD).
-; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
-; Epilogue thus has to locate the "per-cpu" storage for regs.
-; To avoid cache line bouncing the per-cpu global is aligned/sized per
-; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
-;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
-
-; As simple as that....
-
-.macro TLBMISS_FREEUP_REGS
-#ifdef CONFIG_SMP
-	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
-	GET_CPU_ID  r0			; get to per cpu scratch mem,
-	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
-	add r0, @ex_saved_reg1, r0
-#else
-	st    r0, [@ex_saved_reg1]
-	mov_s r0, @ex_saved_reg1
-#endif
-	st_s  r1, [r0, 4]
-	st_s  r2, [r0, 8]
-	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	DBG_ASID_MISMATCH
-.endm
-
-;-----------------------------------------------------------------
-.macro TLBMISS_RESTORE_REGS
-#ifdef CONFIG_SMP
-	GET_CPU_ID  r0			; get to per cpu scratch mem
-	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
-	add r0, @ex_saved_reg1, r0
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	lr    r0, [ARC_REG_SCRATCH_DATA0]
-#else
-	mov_s r0, @ex_saved_reg1
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	ld_s  r0, [r0]
-#endif
-.endm
 
 ARCFP_CODE	;Fast Path Code, candidate for ICCM
 
-- 
cgit v0.10.2


From 64b703ef276964b160a5e88df0764f254460cafb Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 17 Jun 2013 18:12:13 +0530
Subject: ARC: MMUv4 preps/1 - Fold PTE K/U access flags

The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.

In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.

It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:

1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
   0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
   space entries can never be global). Thus such a PTE can translate
   to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.

2. For non global entries, the PTE flags can be used to create mirrored
   K and U TLB bits. This is true after commit a950549c675f2c8c504
   "ARC: copy_(to|from)_user() to honor usermode-access permissions"
   which ensured that user-space translations _MUST_ have same access
   permissions for both U/K mode accesses so that  copy_{to,from}_user()
   play fair with fault based CoW break and such...

There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 4749a0e..99799c9 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -57,27 +57,21 @@
 
 #define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
 #define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<6)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<7)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<8)	/* Page has kernel perm (H) */
+#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
 #define _PAGE_GLOBAL        (1<<9)	/* Page is global (H) */
 #define _PAGE_MODIFIED      (1<<10)	/* Page modified (dirty) (S) */
 #define _PAGE_FILE          (1<<10)	/* page cache/ swap (S) */
 #define _PAGE_PRESENT       (1<<11)	/* TLB entry is valid (H) */
 
-#else
+#else	/* MMU v3 onwards */
 
 /* PD1 */
 #define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<4)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<5)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<6)	/* Page has kernel perm (H) */
+#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
 #define _PAGE_ACCESSED      (1<<7)	/* Page is accessed (S) */
 
 /* PD0 */
@@ -92,8 +86,8 @@
 #define _PAGE_SHARED_CODE_H (1<<31)	/* Hardware counterpart of above */
 #endif
 
-/* Kernel allowed all permissions for all pages */
-#define _K_PAGE_PERMS  (_PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ | \
+/* vmalloc permissions */
+#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
 
 #ifdef CONFIG_ARC_CACHE_PAGES
@@ -109,10 +103,6 @@
  */
 #define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE)
 
-#define _PAGE_READ	(_PAGE_U_READ    | _PAGE_K_READ)
-#define _PAGE_WRITE	(_PAGE_U_WRITE   | _PAGE_K_WRITE)
-#define _PAGE_EXECUTE	(_PAGE_U_EXECUTE | _PAGE_K_EXECUTE)
-
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED)
 
@@ -126,8 +116,8 @@
 
 #define PAGE_SHARED	PAGE_U_W_R
 
-/* While kernel runs out of unstrslated space, vmalloc/modules use a chunk of
- * kernel vaddr space - visible in all addr spaces, but kernel mode only
+/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
+ * user vaddr space - visible in all addr spaces, but kernel mode only
  * Thus Global, all-kernel-access, no-user-access, cached
  */
 #define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE)
@@ -136,10 +126,9 @@
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
 /* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0	(_PAGE_GLOBAL | _PAGE_PRESENT)
-#define PTE_BITS_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE | \
-			 _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \
-			 _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7957dc4..f990834 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -341,7 +341,7 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
 	unsigned long flags;
-	unsigned int idx, asid_or_sasid;
+	unsigned int idx, asid_or_sasid, rwx;
 	unsigned long pd0_flags;
 
 	/*
@@ -393,8 +393,23 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 
 	write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid);
 
+	/*
+	 * ARC MMU provides fully orthogonal access bits for K/U mode,
+	 * however Linux only saves 1 set to save PTE real-estate
+	 * Here we convert 3 PTE bits into 6 MMU bits:
+	 * -Kernel only entries have Kr Kw Kx 0 0 0
+	 * -User entries have mirrored K and U bits
+	 */
+	rwx = pte_val(*ptep) & PTE_BITS_RWX;
+
+	if (pte_val(*ptep) & _PAGE_GLOBAL)
+		rwx <<= 3;		/* r w x => Kr Kw Kx 0 0 0 */
+	else
+		rwx |= (rwx << 3);	/* r w x => Kr Kw Kx Ur Uw Ux */
+
 	/* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */
-	write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1));
+	write_aux_reg(ARC_REG_TLBPD1,
+		      rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1));
 
 	/* First verify if entry for this vaddr+ASID already exists */
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 9cce00e..ec382e5 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -218,8 +218,15 @@ ex_saved_reg1:
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 .macro CONV_PTE_TO_TLB
-	and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE
-	sr  r3, [ARC_REG_TLBPD1]    ; these go in PD1
+	and    r3, r0, PTE_BITS_RWX	;       r w x
+	lsl    r2, r3, 3		; r w x 0 0 0
+	and.f  0,  r0, _PAGE_GLOBAL
+	or.z   r2, r2, r3		; r w x r w x
+
+	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
+	or  r3, r3, r2
+
+	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
 #if (CONFIG_ARC_MMU_VER <= 2)   /* Neednot be done with v3 onwards */
@@ -272,8 +279,8 @@ ARC_ENTRY EV_TLBMissI
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Check if PTE permissions approp for executing code
 	cmp_s   r2, VMALLOC_START
-	mov.lo  r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE)
-	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE)
+	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
+	or.hs   r2, r2, _PAGE_GLOBAL
 
 	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
 	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
@@ -312,26 +319,21 @@ ARC_ENTRY EV_TLBMissD
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
 
-	mov_s   r2, 0
+	cmp_s	r2, VMALLOC_START
+	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
+	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only
+
+	; Linux PTE [RWX] bits are semantically overloaded:
+	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
+	; -Otherwise they are user-mode permissions, and those are exactly
+	;  same for kernel mode as well (e.g. copy_(to|from)_user)
+
 	lr      r3, [ecr]
 	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
-	or.nz   r2, r2, _PAGE_U_READ      	; chk for Read flag in PTE
+	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
 	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
-	or.nz   r2, r2, _PAGE_U_WRITE     	; chk for Write flag in PTE
-	; Above laddering takes care of XCHG access
-	;   which is both Read and Write
-
-	; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx
-	; For copy_(to|from)_user, despite exception taken in kernel mode,
-	; this code is not hit, because EFA would still be the user mode
-	; address (EFA < 0x6000_0000).
-	; This code is for legit kernel mode faults, vmalloc specifically
-	; (EFA: 0x7000_0000 to 0x7FFF_FFFF)
-
-	lr      r3, [efa]
-	cmp     r3, VMALLOC_START - 1   ; If kernel mode access
-	asl.hi  r2, r2, 3               ; make _PAGE_xx flags as _PAGE_K_xx
-	or      r2, r2, _PAGE_PRESENT   ; Common flag for K/U mode
+	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
+	; Above laddering takes care of XCHG access (both R and W)
 
 	; By now, r2 setup with all the Flags we need to check in PTE
 	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
-- 
cgit v0.10.2


From d091fcb97ff48a5cb6de19ad0881fb2c8e76dbc0 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 17 Jun 2013 19:44:06 +0530
Subject: ARC: MMUv4 preps/2 - Reshuffle PTE bits

With previous commit freeing up PTE bits, reassign them so as to:

- Match the bit to H/w counterpart where possible
  (e.g. MMUv2 GLOBAL/PRESENT, this avoids a shift in create_tlb())
- Avoid holes in _PAGE_xxx definitions

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 99799c9..6b0b7f7e 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -60,30 +60,24 @@
 #define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
 #define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_GLOBAL        (1<<9)	/* Page is global (H) */
-#define _PAGE_MODIFIED      (1<<10)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<10)	/* page cache/ swap (S) */
-#define _PAGE_PRESENT       (1<<11)	/* TLB entry is valid (H) */
+#define _PAGE_MODIFIED      (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<7)	/* page cache/ swap (S) */
+#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
+#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
 #else	/* MMU v3 onwards */
 
-/* PD1 */
 #define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
 #define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
 #define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_ACCESSED      (1<<7)	/* Page is accessed (S) */
-
-/* PD0 */
+#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
+#define _PAGE_MODIFIED      (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<6)	/* page cache/ swap (S) */
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-#define _PAGE_SHARED_CODE   (1<<10)	/* Shared Code page with cmn vaddr
+#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
-
-#define _PAGE_MODIFIED      (1<<11)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<12)	/* page cache/ swap (S) */
-
-#define _PAGE_SHARED_CODE_H (1<<31)	/* Hardware counterpart of above */
 #endif
 
 /* vmalloc permissions */
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index f990834..85a8716 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -342,7 +342,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
 	unsigned long flags;
 	unsigned int idx, asid_or_sasid, rwx;
-	unsigned long pd0_flags;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -381,17 +380,13 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 	/* update this PTE credentials */
 	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
 
-	/* Create HW TLB entry Flags (in PD0) from PTE Flags */
-#if (CONFIG_ARC_MMU_VER <= 2)
-	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1);
-#else
-	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0));
-#endif
+	/* Create HW TLB(PD0,PD1) from PTE  */
 
 	/* ASID for this task */
 	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid);
+	write_aux_reg(ARC_REG_TLBPD0, address | asid_or_sasid |
+				      (pte_val(*ptep) & PTE_BITS_IN_PD0));
 
 	/*
 	 * ARC MMU provides fully orthogonal access bits for K/U mode,
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index ec382e5..50e83ca 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -229,9 +229,6 @@ ex_saved_reg1:
 	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
-#if (CONFIG_ARC_MMU_VER <= 2)   /* Neednot be done with v3 onwards */
-	lsr r2, r2                  ; shift PTE flags to match layout in PD0
-#endif
 
 	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid
 
-- 
cgit v0.10.2


From 483e9bcb01432ce66448c214bd0afc231da48b4b Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 1 Jul 2013 18:12:28 +0530
Subject: ARC: MMUv4 preps/3 - Abstract out TLB Insert/Delete

This reorganizes the current TLB operations into psuedo-ops to better
pair with MMUv4's native Insert/Delete operations

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 7c03fe6..d14da3d 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -32,6 +32,8 @@
 /* Error code if probe fails */
 #define TLB_LKUP_ERR		0x80000000
 
+#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
+
 /* TLB Commands */
 #define TLBWrite    0x1
 #define TLBRead     0x2
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 85a8716..f58d5f6 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -52,6 +52,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/bug.h>
 #include <asm/arcregs.h>
 #include <asm/setup.h>
 #include <asm/mmu_context.h>
@@ -109,38 +110,41 @@ struct mm_struct *asid_mm_map[NUM_ASID + 1];
 
 /*
  * Utility Routine to erase a J-TLB entry
- * The procedure is to look it up in the MMU. If found, ERASE it by
- *  issuing a TlbWrite CMD with PD0 = PD1 = 0
+ * Caller needs to setup Index Reg (manually or via getIndex)
  */
-
-static void __tlb_entry_erase(void)
+static inline void __tlb_entry_erase(void)
 {
 	write_aux_reg(ARC_REG_TLBPD1, 0);
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
-static void tlb_entry_erase(unsigned int vaddr_n_asid)
+static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
 	unsigned int idx;
 
-	/* Locate the TLB entry for this vaddr + ASID */
 	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
+
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
 	idx = read_aux_reg(ARC_REG_TLBINDEX);
 
+	return idx;
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+	unsigned int idx;
+
+	/* Locate the TLB entry for this vaddr + ASID */
+	idx = tlb_entry_lkup(vaddr_n_asid);
+
 	/* No error means entry found, zero it out */
 	if (likely(!(idx & TLB_LKUP_ERR))) {
 		__tlb_entry_erase();
-	} else {		/* Some sort of Error */
-
+	} else {
 		/* Duplicate entry error */
-		if (idx & 0x1) {
-			/* TODO we need to handle this case too */
-			pr_emerg("unhandled Duplicate flush for %x\n",
-			       vaddr_n_asid);
-		}
-		/* else entry not found so nothing to do */
+		WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n",
+					   vaddr_n_asid);
 	}
 }
 
@@ -159,7 +163,7 @@ static void utlb_invalidate(void)
 {
 #if (CONFIG_ARC_MMU_VER >= 2)
 
-#if (CONFIG_ARC_MMU_VER < 3)
+#if (CONFIG_ARC_MMU_VER == 2)
 	/* MMU v2 introduced the uTLB Flush command.
 	 * There was however an obscure hardware bug, where uTLB flush would
 	 * fail when a prior probe for J-TLB (both totally unrelated) would
@@ -182,6 +186,36 @@ static void utlb_invalidate(void)
 
 }
 
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+	unsigned int idx;
+
+	/*
+	 * First verify if entry for this vaddr+ASID already exists
+	 * This also sets up PD0 (vaddr, ASID..) for final commit
+	 */
+	idx = tlb_entry_lkup(pd0);
+
+	/*
+	 * If Not already present get a free slot from MMU.
+	 * Otherwise, Probe would have located the entry and set INDEX Reg
+	 * with existing location. This will cause Write CMD to over-write
+	 * existing entry with new PD0 and PD1
+	 */
+	if (likely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+
+	/* setup the other half of TLB entry (pfn, rwx..) */
+	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	/*
+	 * Commit the Entry to MMU
+	 * It doesnt sound safe to use the TLBWriteNI cmd here
+	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
+	 */
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
 /*
  * Un-conditionally (without lookup) erase the entire MMU contents
  */
@@ -341,7 +375,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
 	unsigned long flags;
-	unsigned int idx, asid_or_sasid, rwx;
+	unsigned int asid_or_sasid, rwx;
+	unsigned long pd0, pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -385,8 +420,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 	/* ASID for this task */
 	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	write_aux_reg(ARC_REG_TLBPD0, address | asid_or_sasid |
-				      (pte_val(*ptep) & PTE_BITS_IN_PD0));
+	pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
 
 	/*
 	 * ARC MMU provides fully orthogonal access bits for K/U mode,
@@ -402,29 +436,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 	else
 		rwx |= (rwx << 3);	/* r w x => Kr Kw Kx Ur Uw Ux */
 
-	/* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */
-	write_aux_reg(ARC_REG_TLBPD1,
-		      rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1));
-
-	/* First verify if entry for this vaddr+ASID already exists */
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
-
-	/*
-	 * If Not already present get a free slot from MMU.
-	 * Otherwise, Probe would have located the entry and set INDEX Reg
-	 * with existing location. This will cause Write CMD to over-write
-	 * existing entry with new PD0 and PD1
-	 */
-	if (likely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+	pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1);
 
-	/*
-	 * Commit the Entry to MMU
-	 * It doesnt sound safe to use the TLBWriteNI cmd here
-	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
-	 */
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+	tlb_entry_insert(pd0, pd1);
 
 	local_irq_restore(flags);
 }
-- 
cgit v0.10.2


From c0857f5d0e747dbbf53d8f27bcf7d977aac33760 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 29 Aug 2013 17:42:02 +0530
Subject: ARC: No need to flush the TLB in early boot

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index f58d5f6..7646a96 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -577,13 +577,6 @@ void arc_mmu_init(void)
 	if (mmu->pg_sz != PAGE_SIZE)
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
-	/*
-	 * ASID mgmt data structures are compile time init
-	 *  asid_cache = FIRST_ASID and asid_mm_map[] all zeroes
-	 */
-
-	local_flush_tlb_all();
-
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
-- 
cgit v0.10.2


From ade922f8e269115252d199bf6c524a10379cf716 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 25 Jul 2013 18:11:50 -0700
Subject: ARC: [ASID] Remove legacy/unused debug code

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index d14da3d..7165f25 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -49,9 +49,6 @@
 
 typedef struct {
 	unsigned long asid;	/* Pvt Addr-Space ID for mm */
-#ifdef CONFIG_ARC_TLB_DBG
-	struct task_struct *tsk;
-#endif
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 0d71fb1..a63800f 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -114,14 +114,6 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	asid_mm_map[asid_cache] = mm;
 	mm->context.asid = asid_cache;
 
-#ifdef CONFIG_ARC_TLB_DBG
-	pr_info("ARC_TLB_DBG: NewMM=0x%x OldMM=0x%x task_struct=0x%x Task: %s,"
-	       " pid:%u, assigned asid:%lu\n",
-	       (unsigned int)mm, (unsigned int)prev_owner,
-	       (unsigned int)(mm->context.tsk), (mm->context.tsk)->comm,
-	       (mm->context.tsk)->pid, mm->context.asid);
-#endif
-
 	write_aux_reg(ARC_REG_PID, asid_cache | MMU_ENABLE);
 
 	local_irq_restore(flags);
@@ -135,9 +127,6 @@ static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	mm->context.asid = NO_ASID;
-#ifdef CONFIG_ARC_TLB_DBG
-	mm->context.tsk = tsk;
-#endif
 	return 0;
 }
 
-- 
cgit v0.10.2


From 5bd87adf9b2ae5fa1bb469c68029b4eec06d6e03 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 23 Aug 2013 17:37:18 +0530
Subject: ARC: [ASID] Refactor the TLB paranoid debug code

-Asm code already has values of SW and HW ASID values, so they can be
 passed to the printing routine.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 7165f25..1639f25 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -52,7 +52,7 @@ typedef struct {
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int pid_sw, unsigned long address);
+void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
 #else
 #define tlb_paranoid_check(a, b)
 #endif
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7646a96..a4ad68c 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -688,25 +688,27 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
  * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
  * don't match
  */
-void print_asid_mismatch(int is_fast_path)
+void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
 {
-	int pid_sw, pid_hw;
-	pid_sw = current->active_mm->context.asid;
-	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
-
 	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
-	       is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw);
+	       is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
 
 	__asm__ __volatile__("flag 1");
 }
 
-void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr)
+void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
 {
-	unsigned int pid_hw;
+	unsigned int mmu_asid;
 
-	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
+	mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID)))
-		print_asid_mismatch(0);
+	/*
+	 * At the time of a TLB miss/installation
+	 *   - HW version needs to match SW version
+	 *   - SW needs to have a valid ASID
+	 */
+	if (addr < 0x70000000 &&
+	    ((mmu_asid != mm_asid) || (mm_asid == NO_ASID)))
+		print_asid_mismatch(mm_asid, mmu_asid, 0);
 }
 #endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 50e83ca..88897a1 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -102,7 +102,7 @@ ex_saved_reg1:
 	; VERIFY if the ASID in MMU-PID Reg is same as
 	; one in Linux data structures
 
-	DBG_ASID_MISMATCH
+	tlb_paranoid_check_asm
 .endm
 
 .macro TLBMISS_RESTORE_REGS
@@ -133,34 +133,32 @@ ex_saved_reg1:
 ; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
 ; So we try to detect this in TLB Mis shandler
 
-
-.macro DBG_ASID_MISMATCH
+.macro tlb_paranoid_check_asm
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
 
-	; make sure h/w ASID is same as s/w ASID
-
 	GET_CURR_TASK_ON_CPU  r3
 	ld r0, [r3, TASK_ACT_MM]
 	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
 
 	lr r1, [ARC_REG_PID]
 	and r1, r1, 0xFF
+
 	breq r1, r0, 5f
 
 	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-	lr  r0, [erstatus]
-	bbit0 r0, STATUS_U_BIT, 5f
+	lr  r2, [erstatus]
+	bbit0 r2, STATUS_U_BIT, 5f
 
 	; We sure are in troubled waters, Flag the error, but to do so
 	; need to switch to kernel mode stack to call error routine
 	GET_TSK_STACK_BASE   r3, sp
 
 	; Call printk to shoutout aloud
-	mov r0, 1
+	mov r2, 1
 	j print_asid_mismatch
 
-5:   ; ASIDs match so proceed normally
+5:	; ASIDs match so proceed normally
 	nop
 
 #endif
-- 
cgit v0.10.2


From 3daa48d1d9bc44baa079d65e72ef2e3f1139ac03 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 24 Jul 2013 13:53:45 -0700
Subject: ARC: [ASID] get_new_mmu_context() to conditionally allocate new ASID

ASID allocation changes/1

This patch does 2 things:

(1) get_new_mmu_context() NOW moves mm->ASID to a new value ONLY if it
    was from a prev allocation cycle/generation OR if mm had no ASID
    allocated (vs. before would unconditionally moving to a new ASID)

    Callers desiring unconditional update of ASID, e.g.local_flush_tlb_mm()
    (for parent's address space invalidation at fork) need to first force
    the parent to an unallocated ASID.

(2) get_new_mmu_context() always sets the MMU PID reg with unchanged/new
    ASID value.

The gains are:
- consolidation of all asid alloc logic into get_new_mmu_context()
- avoiding code duplication in switch_mm() for PID reg setting
- Enables future change to fold activate_mm() into switch_mm()

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index a63800f..7a3ecd2 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -69,8 +69,8 @@ extern struct mm_struct *asid_mm_map[NUM_ASID + 1];
 extern int asid_cache;
 
 /*
- * Assign a new ASID to task. If the task already has an ASID, it is
- * relinquished.
+ * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
+ * Also set the MMU PID register to existing/updated ASID
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
@@ -80,6 +80,17 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	local_irq_save(flags);
 
 	/*
+	 * Move to new ASID if it was not from current alloc-cycle/generation.
+	 *
+	 * Note: Callers needing new ASID unconditionally, independent of
+	 * 	 generation, e.g. local_flush_tlb_mm() for forking  parent,
+	 * 	 first need to destroy the context, setting it to invalid
+	 * 	 value.
+	 */
+	if (mm->context.asid <= asid_cache)
+		goto set_hw;
+
+	/*
 	 * Relinquish the currently owned ASID (if any).
 	 * Doing unconditionally saves a cmp-n-branch; for already unused
 	 * ASID slot, the value was/remains NULL
@@ -99,9 +110,9 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	 * task with ASID from prev allocation cycle (before ASID roll-over).
 	 *
 	 * This might look wrong - if we are re-using some other task's ASID,
-	 * won't we use it's stale TLB entries too. Actually switch_mm( ) takes
+	 * won't we use it's stale TLB entries too. Actually the algorithm takes
 	 * care of such a case: it ensures that task with ASID from prev alloc
-	 * cycle, when scheduled will refresh it's ASID: see switch_mm( ) below
+	 * cycle, when scheduled will refresh it's ASID
 	 * The stealing scenario described here will only happen if that task
 	 * didn't get a chance to refresh it's ASID - implying stale entries
 	 * won't exist.
@@ -114,7 +125,8 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	asid_mm_map[asid_cache] = mm;
 	mm->context.asid = asid_cache;
 
-	write_aux_reg(ARC_REG_PID, asid_cache | MMU_ENABLE);
+set_hw:
+	write_aux_reg(ARC_REG_PID, mm->context.asid | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -141,28 +153,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
 #endif
 
-	/*
-	 * Get a new ASID if task doesn't have a valid one. Possible when
-	 *  -task never had an ASID (fresh after fork)
-	 *  -it's ASID was stolen - past an ASID roll-over.
-	 *  -There's a third obscure scenario (if this task is running for the
-	 *   first time afer an ASID rollover), where despite having a valid
-	 *   ASID, we force a get for new ASID - see comments at top.
-	 *
-	 * Both the non-alloc scenario and first-use-after-rollover can be
-	 * detected using the single condition below:  NO_ASID = 256
-	 * while asid_cache is always a valid ASID value (0-255).
-	 */
-	if (next->context.asid > asid_cache) {
-		get_new_mmu_context(next);
-	} else {
-		/*
-		 * XXX: This will never happen given the chks above
-		 * BUG_ON(next->context.asid > MAX_ASID);
-		 */
-		write_aux_reg(ARC_REG_PID, next->context.asid | MMU_ENABLE);
-	}
-
+	get_new_mmu_context(next);
 }
 
 static inline void destroy_context(struct mm_struct *mm)
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index a4ad68c..b5c5e0a 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -258,13 +258,14 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 		return;
 
 	/*
-	 * Workaround for Android weirdism:
-	 * A binder VMA could end up in a task such that vma->mm != tsk->mm
-	 * old code would cause h/w - s/w ASID to get out of sync
+	 * - Move to a new ASID, but only if the mm is still wired in
+	 *   (Android Binder ended up calling this for vma->mm != tsk->mm,
+	 *    causing h/w - s/w ASID to get out of sync)
+	 * - Also get_new_mmu_context() new implementation allocates a new
+	 *   ASID only if it is not allocated already - so unallocate first
 	 */
-	if (current->mm != mm)
-		destroy_context(mm);
-	else
+	destroy_context(mm);
+	if (current->mm == mm)
 		get_new_mmu_context(mm);
 }
 
-- 
cgit v0.10.2


From c60115537c96d78a884d2a4bd78839a57266d48b Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 24 Jul 2013 17:31:08 -0700
Subject: ARC: [ASID] activate_mm() == switch_mm()

ASID allocation changes/2

Use the fact that switch_mm() and activate_mm() are exactly same code
now while acknowledging the semantical difference in comment

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 7a3ecd2..9b09d18 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -156,6 +156,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	get_new_mmu_context(next);
 }
 
+/*
+ * Called at the time of execve() to get a new ASID
+ * Note the subtlety here: get_new_mmu_context() behaves differently here
+ * vs. in switch_mm(). Here it always returns a new ASID, because mm has
+ * an unallocated "initial" value, while in latter, it moves to a new ASID,
+ * only if it was unallocated
+ */
+#define activate_mm(prev, next)		switch_mm(prev, next, NULL)
+
 static inline void destroy_context(struct mm_struct *mm)
 {
 	unsigned long flags;
@@ -177,17 +186,6 @@ static inline void destroy_context(struct mm_struct *mm)
  */
 #define deactivate_mm(tsk, mm)   do { } while (0)
 
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-#ifndef CONFIG_SMP
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
-
-	/* Unconditionally get a new ASID */
-	get_new_mmu_context(next);
-
-}
-
 #define enter_lazy_tlb(mm, tsk)
 
 #endif /* __ASM_ARC_MMU_CONTEXT_H */
-- 
cgit v0.10.2


From 947bf103fcd2defa3bc4b7ebc6b05d0427bcde2d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 25 Jul 2013 15:45:50 -0700
Subject: ARC: [ASID] Track ASID allocation cycles/generations

This helps remove asid-to-mm reverse map

While mm->context.id contains the ASID assigned to a process, our ASID
allocator also used asid_mm_map[] reverse map. In a new allocation
cycle (mm->ASID >= @asid_cache), the Round Robin ASID allocator used this
to check if new @asid_cache belonged to some mm2 (from prev cycle).
If so, it could locate that mm using the ASID reverse map, and mark that
mm as unallocated ASID, to force it to refresh at the time of switch_mm()

However, for SMP, the reverse map has to be maintained per CPU, so
becomes 2 dimensional, hence got rid of it.

With reverse map gone, it is NOT possible to reach out to current
assignee. So we track the ASID allocation generation/cycle and
on every switch_mm(), check if the current generation of CPU ASID is
same as mm's ASID; If not it is refreshed.

(Based loosely on arch/sh implementation)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 1639f25..c82db8b 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -48,7 +48,7 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* Pvt Addr-Space ID for mm */
+	unsigned long asid;	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 9b09d18..43a1b51 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -34,39 +34,22 @@
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
- * For book-keeping, Linux uses a couple of data-structures:
- *  -mm_struct has an @asid field to keep a note of task's ASID (needed at the
- *   time of say switch_mm( )
- *  -An array of mm structs @asid_mm_map[] for asid->mm the reverse mapping,
- *  given an ASID, finding the mm struct associated.
- *
- * The round-robin allocation algorithm allows for ASID stealing.
- * If asid tracker is at "x-1", a new req will allocate "x", even if "x" was
- * already assigned to another (switched-out) task. Obviously the prev owner
- * is marked with an invalid ASID to make it request for a new ASID when it
- * gets scheduled next time. However its TLB entries (with ASID "x") could
- * exist, which must be cleared before the same ASID is used by the new owner.
- * Flushing them would be plausible but costly solution. Instead we force a
- * allocation policy quirk, which ensures that a stolen ASID won't have any
- * TLB entries associates, alleviating the need to flush.
- * The quirk essentially is not allowing ASID allocated in prev cycle
- * to be used past a roll-over in the next cycle.
- * When this happens (i.e. task ASID > asid tracker), task needs to refresh
- * its ASID, aligning it to current value of tracker. If the task doesn't get
- * scheduled past a roll-over, hence its ASID is not yet realigned with
- * tracker, such ASID is anyways safely reusable because it is
- * gauranteed that TLB entries with that ASID wont exist.
+ * A new allocation cycle, post rollover, could potentially reassign an ASID
+ * to a different task. Thus the rule is to refresh the ASID in a new cycle.
+ * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * serve as cycle/generation indicator and natural 32 bit unsigned math
+ * automagically increments the generation when lower 8 bits rollover.
  */
 
-#define FIRST_ASID  0
-#define MAX_ASID    255			/* 8 bit PID field in PID Aux reg */
-#define NO_ASID     (MAX_ASID + 1)	/* ASID Not alloc to mmu ctxt */
-#define NUM_ASID    ((MAX_ASID - FIRST_ASID) + 1)
+#define MM_CTXT_ASID_MASK	0x000000ff /* MMU PID reg :8 bit PID */
+#define MM_CTXT_CYCLE_MASK	(~MM_CTXT_ASID_MASK)
+
+#define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
+#define MM_CTXT_NO_ASID		0UL
 
-/* ASID to mm struct mapping */
-extern struct mm_struct *asid_mm_map[NUM_ASID + 1];
+#define hw_pid(mm)		(mm->context.asid & MM_CTXT_ASID_MASK)
 
-extern int asid_cache;
+extern unsigned int asid_cache;
 
 /*
  * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
@@ -74,59 +57,42 @@ extern int asid_cache;
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
-	struct mm_struct *prev_owner;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	/*
 	 * Move to new ASID if it was not from current alloc-cycle/generation.
+	 * This is done by ensuring that the generation bits in both mm->ASID
+	 * and cpu's ASID counter are exactly same.
 	 *
 	 * Note: Callers needing new ASID unconditionally, independent of
 	 * 	 generation, e.g. local_flush_tlb_mm() for forking  parent,
 	 * 	 first need to destroy the context, setting it to invalid
 	 * 	 value.
 	 */
-	if (mm->context.asid <= asid_cache)
+	if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK))
 		goto set_hw;
 
-	/*
-	 * Relinquish the currently owned ASID (if any).
-	 * Doing unconditionally saves a cmp-n-branch; for already unused
-	 * ASID slot, the value was/remains NULL
-	 */
-	asid_mm_map[mm->context.asid] = (struct mm_struct *)NULL;
+	/* move to new ASID and handle rollover */
+	if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) {
 
-	/* move to new ASID */
-	if (++asid_cache > MAX_ASID) {	/* ASID roll-over */
-		asid_cache = FIRST_ASID;
 		flush_tlb_all();
-	}
 
-	/*
-	 * Is next ASID already owned by some-one else (we are stealing it).
-	 * If so, let the orig owner be aware of this, so when it runs, it
-	 * asks for a brand new ASID. This would only happen for a long-lived
-	 * task with ASID from prev allocation cycle (before ASID roll-over).
-	 *
-	 * This might look wrong - if we are re-using some other task's ASID,
-	 * won't we use it's stale TLB entries too. Actually the algorithm takes
-	 * care of such a case: it ensures that task with ASID from prev alloc
-	 * cycle, when scheduled will refresh it's ASID
-	 * The stealing scenario described here will only happen if that task
-	 * didn't get a chance to refresh it's ASID - implying stale entries
-	 * won't exist.
-	 */
-	prev_owner = asid_mm_map[asid_cache];
-	if (prev_owner)
-		prev_owner->context.asid = NO_ASID;
+		/*
+		 * Above checke for rollover of 8 bit ASID in 32 bit container.
+		 * If the container itself wrapped around, set it to a non zero
+		 * "generation" to distinguish from no context
+		 */
+		if (!asid_cache)
+			asid_cache = MM_CTXT_FIRST_CYCLE;
+	}
 
 	/* Assign new ASID to tsk */
-	asid_mm_map[asid_cache] = mm;
 	mm->context.asid = asid_cache;
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, mm->context.asid | MMU_ENABLE);
+	write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -138,7 +104,7 @@ set_hw:
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = NO_ASID;
+	mm->context.asid = MM_CTXT_NO_ASID;
 	return 0;
 }
 
@@ -167,14 +133,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
 static inline void destroy_context(struct mm_struct *mm)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	asid_mm_map[mm->context.asid] = NULL;
-	mm->context.asid = NO_ASID;
-
-	local_irq_restore(flags);
+	mm->context.asid = MM_CTXT_NO_ASID;
 }
 
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index b5c5e0a..71cb26d 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -100,13 +100,7 @@
 
 
 /* A copy of the ASID from the PID reg is kept in asid_cache */
-int asid_cache = FIRST_ASID;
-
-/* ASID to mm struct mapping. We have one extra entry corresponding to
- * NO_ASID to save us a compare when clearing the mm entry for old asid
- * see get_new_mmu_context (asm-arc/mmu_context.h)
- */
-struct mm_struct *asid_mm_map[NUM_ASID + 1];
+unsigned int asid_cache = MM_CTXT_FIRST_CYCLE;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -281,7 +275,6 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
 {
 	unsigned long flags;
-	unsigned int asid;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
 	 * its better to move to a new ASID rather than searching for
@@ -303,11 +296,10 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	start &= PAGE_MASK;
 
 	local_irq_save(flags);
-	asid = vma->vm_mm->context.asid;
 
-	if (asid != NO_ASID) {
+	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
 		while (start < end) {
-			tlb_entry_erase(start | (asid & 0xff));
+			tlb_entry_erase(start | hw_pid(vma->vm_mm));
 			start += PAGE_SIZE;
 		}
 	}
@@ -361,9 +353,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	 */
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != NO_ASID) {
-		tlb_entry_erase((page & PAGE_MASK) |
-				(vma->vm_mm->context.asid & 0xff));
+	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm));
 		utlb_invalidate();
 	}
 
@@ -709,7 +700,8 @@ void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
 	 *   - SW needs to have a valid ASID
 	 */
 	if (addr < 0x70000000 &&
-	    ((mmu_asid != mm_asid) || (mm_asid == NO_ASID)))
+	    ((mm_asid == MM_CTXT_NO_ASID) ||
+	      (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
 		print_asid_mismatch(mm_asid, mmu_asid, 0);
 }
 #endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 88897a1..cf7d7d9 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -140,12 +140,15 @@ ex_saved_reg1:
 	GET_CURR_TASK_ON_CPU  r3
 	ld r0, [r3, TASK_ACT_MM]
 	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
+	breq r0, 0, 55f	; Error if no ASID allocated
 
 	lr r1, [ARC_REG_PID]
 	and r1, r1, 0xFF
 
-	breq r1, r0, 5f
+	and r2, r0, 0xFF	; MMU PID bits only for comparison
+	breq r1, r2, 5f
 
+55:
 	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
 	lr  r2, [erstatus]
 	bbit0 r2, STATUS_U_BIT, 5f
-- 
cgit v0.10.2


From 7d669a193bc0f44c20054687a3bf9ff82ad001a7 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Mon, 10 Sep 2012 15:13:19 +0300
Subject: ARC: Handle un-aligned user space access in BE.

Adding endian awarness to un-aligned access exception handling.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index c0f832f..28d1700 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -16,6 +16,16 @@
 #include <linux/uaccess.h>
 #include <asm/disasm.h>
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define BE		1
+#define FIRST_BYTE_16	"swap %1, %1\n swape %1, %1\n"
+#define FIRST_BYTE_32	"swape %1, %1\n"
+#else
+#define BE		0
+#define FIRST_BYTE_16
+#define FIRST_BYTE_32
+#endif
+
 #define __get8_unaligned_check(val, addr, err)		\
 	__asm__(					\
 	"1:	ldb.ab	%1, [%2, 1]\n"			\
@@ -36,9 +46,9 @@
 	do {						\
 		unsigned int err = 0, v, a = addr;	\
 		__get8_unaligned_check(v, a, err);	\
-		val =  v ;				\
+		val =  v << ((BE) ? 8 : 0);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 8;				\
+		val |= v << ((BE) ? 0 : 8);		\
 		if (err)				\
 			goto fault;			\
 	} while (0)
@@ -47,13 +57,13 @@
 	do {						\
 		unsigned int err = 0, v, a = addr;	\
 		__get8_unaligned_check(v, a, err);	\
-		val =  v << 0;				\
+		val =  v << ((BE) ? 24 : 0);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 8;				\
+		val |= v << ((BE) ? 16 : 8);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 16;				\
+		val |= v << ((BE) ? 8 : 16);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 24;				\
+		val |= v << ((BE) ? 0 : 24);		\
 		if (err)				\
 			goto fault;			\
 	} while (0)
@@ -63,6 +73,7 @@
 		unsigned int err = 0, v = val, a = addr;\
 							\
 		__asm__(				\
+		FIRST_BYTE_16				\
 		"1:	stb.ab	%1, [%2, 1]\n"		\
 		"	lsr %1, %1, 8\n"		\
 		"2:	stb	%1, [%2]\n"		\
@@ -87,8 +98,9 @@
 #define put32_unaligned_check(val, addr)		\
 	do {						\
 		unsigned int err = 0, v = val, a = addr;\
-		__asm__(				\
 							\
+		__asm__(				\
+		FIRST_BYTE_32				\
 		"1:	stb.ab	%1, [%2, 1]\n"		\
 		"	lsr %1, %1, 8\n"		\
 		"2:	stb.ab	%1, [%2, 1]\n"		\
-- 
cgit v0.10.2


From 6532b02fe5affb962b267e3c12e87ec16311aebf Mon Sep 17 00:00:00 2001
From: Mischa Jonker <mjonker@synopsys.com>
Date: Wed, 28 Aug 2013 20:32:50 +0200
Subject: ARC: Add read*_relaxed to asm/io.h

Some drivers require these, and ARC didn't had them yet.

Signed-off-by: Mischa Jonker <mjonker@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 473424d..334ce70 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -100,6 +100,10 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 
 }
 
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+
 #include <asm-generic/io.h>
 
 #endif /* _ASM_ARC_IO_H */
-- 
cgit v0.10.2


From 8508d5653f4ca25cd13f87340121afdfed448e1b Mon Sep 17 00:00:00 2001
From: Mischa Jonker <mjonker@synopsys.com>
Date: Wed, 28 Aug 2013 20:38:18 +0200
Subject: ARC: remove console_verbose() from setup_arch()

It prevents kernel parameters such as 'loglevel' from doing their job.

Signed-off-by: Mischa Jonker <mjonker@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 6b08345..b011f8c 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -357,8 +357,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	root_mountflags &= ~MS_RDONLY;
 
-	console_verbose();
-
 #if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
 	conswitchp = &dummy_con;
 #endif
-- 
cgit v0.10.2


From 7efd0da2d17360e1cef91507dbe619db0ee2c691 Mon Sep 17 00:00:00 2001
From: Mischa Jonker <mjonker@synopsys.com>
Date: Fri, 30 Aug 2013 11:56:25 +0200
Subject: ARC: Fix __udelay calculation

Cast usecs to u64, to ensure that the (usecs * 4295 * HZ)
multiplication is 64 bit.

Initially, the (usecs * 4295 * HZ) part was done as a 32 bit
multiplication, with the result casted to 64 bit. This led to some bits
falling off, causing a "DMA initialization error" in the stmmac Ethernet
driver, due to a premature timeout.

Signed-off-by: Mischa Jonker <mjonker@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 442ce5d..43de302 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -53,11 +53,10 @@ static inline void __udelay(unsigned long usecs)
 {
 	unsigned long loops;
 
-	/* (long long) cast ensures 64 bit MPY - real or emulated
+	/* (u64) cast ensures 64 bit MPY - real or emulated
 	 * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
 	 */
-	loops = ((long long)(usecs * 4295 * HZ) *
-		 (long long)(loops_per_jiffy)) >> 32;
+	loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
 
 	__delay(loops);
 }
-- 
cgit v0.10.2


From cc80ae38bfb698559c8f728d9f0c7b20ed4a59e6 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 27 Aug 2013 13:52:51 +0530
Subject: Kconfig.debug: Add FRAME_POINTER anti-dependency for ARC

Frame pointer on ARC doesn't serve the conventional purpose of stack
unwinding due to the typical way ABI designates it's usage.
Thus it's explicit usage on ARC is discouraged (gcc is free to use it,
for some tricky stack frames even if -fomit-frame-pointer).

Hence no point enabling it for ARC.

References: http://www.spinics.net/lists/kernel/msg1593937.html
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Paul E. McKenney" <paul.mckenney@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: linux-kernel@vger.kernel.org

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1501aa5..c971f3a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -908,7 +908,7 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -1347,7 +1347,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
@@ -1357,7 +1357,7 @@ config LATENCYTOP
 	depends on DEBUG_KERNEL
 	depends on STACKTRACE_SUPPORT
 	depends on PROC_FS
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
-- 
cgit v0.10.2


From 07b9b65147d1d7cc03b9ff1e1f3b1c163ba4d067 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 Sep 2013 19:19:06 +0530
Subject: ARC: fix new Section mismatches in build (post __cpuinit cleanup)

--------------->8--------------------
WARNING: vmlinux.o(.text+0x708): Section mismatch in reference from the
function read_arc_build_cfg_regs() to the function
.init.text:read_decode_cache_bcr()

WARNING: vmlinux.o(.text+0x702): Section mismatch in reference from the
function read_arc_build_cfg_regs() to the function
.init.text:read_decode_mmu_bcr()
--------------->8--------------------

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 5802849..e4abdaa 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -57,7 +57,7 @@
 
 extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void __init read_decode_cache_bcr(void);
+extern void read_decode_cache_bcr(void);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index c82db8b..c2663b3 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -59,7 +59,7 @@ void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
 
 void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void __init read_decode_mmu_bcr(void);
+void read_decode_mmu_bcr(void);
 
 #endif	/* !__ASSEMBLY__ */
 
-- 
cgit v0.10.2