From 7b22c03536a539142f931815528d55df455ffe2d Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 3 Oct 2013 06:47:44 +0100
Subject: arm64: check for number of arguments in syscall_get/set_arguments()

In ftrace_syscall_enter(),
    syscall_get_arguments(..., 0, n, ...)
        if (i == 0) { <handle orig_x0> ...; n--;}
        memcpy(..., n * sizeof(args[0]));
If 'number of arguments(n)' is zero and 'argument index(i)' is also zero in
syscall_get_arguments(), none of arguments should be copied by memcpy().
Otherwise 'n--' can be a big positive number and unexpected amount of data
will be copied. Tracing system calls which take no argument, say sync(void),
may hit this case and eventually make the system corrupted.
This patch fixes the issue both in syscall_get_arguments() and
syscall_set_arguments().

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 89c047f..70ba9d4 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -59,6 +59,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
+	if (n == 0)
+		return;
+
 	if (i + n > SYSCALL_MAX_ARGS) {
 		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
 		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
@@ -82,6 +85,9 @@ static inline void syscall_set_arguments(struct task_struct *task,
 					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
+	if (n == 0)
+		return;
+
 	if (i + n > SYSCALL_MAX_ARGS) {
 		pr_warning("%s called with max args %d, handling only %d\n",
 			   __func__, i + n, SYSCALL_MAX_ARGS);
-- 
cgit v0.10.2


From e29a074b44a9110c567fc31cdb12928f8eca7c79 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 24 Oct 2013 12:00:28 +0100
Subject: arm64: Fix memory layout typo

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 78a37712..f28899d 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -39,7 +39,7 @@ ffffffbffbc00000	ffffffbffbdfffff	   2MB		earlyprintk device
 
 ffffffbffbe00000	ffffffbffbe0ffff	  64KB		PCI I/O space
 
-ffffffbbffff0000	ffffffbcffffffff	  ~2MB		[guard]
+ffffffbffbe10000	ffffffbcffffffff	  ~2MB		[guard]
 
 ffffffbffc000000	ffffffbfffffffff	  64MB		modules
 
-- 
cgit v0.10.2


From 52ea2a560a9dba57fe5fd6b4726b1089751accf2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Oct 2013 15:54:26 +0100
Subject: arm64: locks: introduce ticket-based spinlock implementation

This patch introduces a ticket lock implementation for arm64, along the
same lines as the implementation for arch/arm/.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 0defa07..525dd53 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -22,17 +22,10 @@
 /*
  * Spinlock implementation.
  *
- * The old value is read exclusively and the new one, if unlocked, is written
- * exclusively. In case of failure, the loop is restarted.
- *
  * The memory barriers are implicit with the load-acquire and store-release
  * instructions.
- *
- * Unlocked value: 0
- * Locked value: 1
  */
 
-#define arch_spin_is_locked(x)		((x)->lock != 0)
 #define arch_spin_unlock_wait(lock) \
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -41,32 +34,51 @@
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned int tmp;
+	arch_spinlock_t lockval, newval;
 
 	asm volatile(
-	"	sevl\n"
-	"1:	wfe\n"
-	"2:	ldaxr	%w0, %1\n"
-	"	cbnz	%w0, 1b\n"
-	"	stxr	%w0, %w2, %1\n"
-	"	cbnz	%w0, 2b\n"
-	: "=&r" (tmp), "+Q" (lock->lock)
-	: "r" (1)
-	: "cc", "memory");
+	/* Atomically increment the next ticket. */
+"	prfm	pstl1strm, %3\n"
+"1:	ldaxr	%w0, %3\n"
+"	add	%w1, %w0, %w5\n"
+"	stxr	%w2, %w1, %3\n"
+"	cbnz	%w2, 1b\n"
+	/* Did we get the lock? */
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbz	%w1, 3f\n"
+	/*
+	 * No: spin on the owner. Send a local event to avoid missing an
+	 * unlock before the exclusive load.
+	 */
+"	sevl\n"
+"2:	wfe\n"
+"	ldaxrh	%w2, %4\n"
+"	eor	%w1, %w2, %w0, lsr #16\n"
+"	cbnz	%w1, 2b\n"
+	/* We got the lock. Critical section starts here. */
+"3:"
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
+	: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
+	: "memory");
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned int tmp;
+	arch_spinlock_t lockval;
 
 	asm volatile(
-	"2:	ldaxr	%w0, %1\n"
-	"	cbnz	%w0, 1f\n"
-	"	stxr	%w0, %w2, %1\n"
-	"	cbnz	%w0, 2b\n"
-	"1:\n"
-	: "=&r" (tmp), "+Q" (lock->lock)
-	: "r" (1)
-	: "cc", "memory");
+"	prfm	pstl1strm, %2\n"
+"1:	ldaxr	%w0, %2\n"
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbnz	%w1, 2f\n"
+"	add	%w0, %w0, %3\n"
+"	stxr	%w1, %w0, %2\n"
+"	cbnz	%w1, 1b\n"
+"2:"
+	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
+	: "I" (1 << TICKET_SHIFT)
+	: "memory");
 
 	return !tmp;
 }
@@ -74,9 +86,24 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(
-	"	stlr	%w1, %0\n"
-	: "=Q" (lock->lock) : "r" (0) : "memory");
+"	stlrh	%w1, %0\n"
+	: "=Q" (lock->owner)
+	: "r" (lock->owner + 1)
+	: "memory");
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	arch_spinlock_t lockval = ACCESS_ONCE(*lock);
+	return lockval.owner != lockval.next;
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	arch_spinlock_t lockval = ACCESS_ONCE(*lock);
+	return (lockval.next - lockval.owner) > 1;
 }
+#define arch_spin_is_contended	arch_spin_is_contended
 
 /*
  * Write lock implementation.
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 9a49434..8769275 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -20,14 +20,14 @@
 # error "please don't include this file directly"
 #endif
 
-/* We only require natural alignment for exclusive accesses. */
-#define __lock_aligned
+#define TICKET_SHIFT	16
 
 typedef struct {
-	volatile unsigned int lock;
-} arch_spinlock_t;
+	u16 owner;
+	u16 next;
+} __aligned(4) arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
 
 typedef struct {
 	volatile unsigned int lock;
-- 
cgit v0.10.2


From 5686b06cea34e31ec0a549d9b5ac00776e8e8d6d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Oct 2013 15:54:27 +0100
Subject: arm64: lockref: add support for lockless lockrefs using cmpxchg

Our spinlocks are only 32-bit (2x16-bit tickets) and our cmpxchg can
deal with 8-bytes (as one would hope!).

This patch wires up the cmpxchg-based lockless lockref implementation
for arm64.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c044548..9e8233b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
 config ARM64
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 525dd53..3d5cf06 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -92,10 +92,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	: "memory");
 }
 
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner == lock.next;
+}
+
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	arch_spinlock_t lockval = ACCESS_ONCE(*lock);
-	return lockval.owner != lockval.next;
+	return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-- 
cgit v0.10.2


From cf10b79a7d88edc689479af989b3a88e9adf07ff Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Oct 2013 15:54:28 +0100
Subject: arm64: cmpxchg: implement cmpxchg64_relaxed

This patch introduces cmpxchg64_relaxed for arm64 using the existing
cmpxchg_local macro, which performs a cmpxchg operation (up to 64 bits)
without barrier semantics.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 8a8ce0e..3914c0d 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -173,4 +173,6 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 #define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
 #define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
 
+#define cmpxchg64_relaxed(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
+
 #endif	/* __ASM_CMPXCHG_H */
-- 
cgit v0.10.2


From 2a3f912c782f2364f5e5813ab66ca6c92fb43acb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 7 Oct 2013 16:42:05 +0100
Subject: arm64: Export __copy_in_user() to modules

This function may be called from loadable modules, so it needs
exporting.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Loc Ho <lho@apm.com>

diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 41b4f62..e7ee770 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -39,6 +39,7 @@ EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(__copy_from_user);
 EXPORT_SYMBOL(__copy_to_user);
 EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__copy_in_user);
 
 	/* physical memory */
 EXPORT_SYMBOL(memstart_addr);
-- 
cgit v0.10.2


From 4fcd6e1416b0424f94ede03cb322a323a640bf4b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 11 Oct 2013 14:52:07 +0100
Subject: Docs: arm64: booting: clarify boot requirements

There are a few points in the arm64 booting document which are unclear
(such as the initial state of secondary CPUs), and/or have not been
documented (PSCI is a supported mechanism for booting secondary CPUs).

This patch amends the arm64 boot document to better express the
(existing) requirements, and to describe PSCI as a supported booting
mechanism.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Fu Wei <tekkamanninja@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 98df4a0..a9691cc 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -115,9 +115,10 @@ Before jumping into the kernel, the following conditions must be met:
   External caches (if present) must be configured and disabled.
 
 - Architected timers
-  CNTFRQ must be programmed with the timer frequency.
-  If entering the kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0)
-  set where available.
+  CNTFRQ must be programmed with the timer frequency and CNTVOFF must
+  be programmed with a consistent value on all CPUs.  If entering the
+  kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0) set where
+  available.
 
 - Coherency
   All CPUs to be booted by the kernel must be part of the same coherency
@@ -130,30 +131,46 @@ Before jumping into the kernel, the following conditions must be met:
   the kernel image will be entered must be initialised by software at a
   higher exception level to prevent execution in an UNKNOWN state.
 
+The requirements described above for CPU mode, caches, MMUs, architected
+timers, coherency and system registers apply to all CPUs.  All CPUs must
+enter the kernel in the same exception level.
+
 The boot loader is expected to enter the kernel on each CPU in the
 following manner:
 
 - The primary CPU must jump directly to the first instruction of the
   kernel image.  The device tree blob passed by this CPU must contain
-  for each CPU node:
-
-    1. An 'enable-method' property. Currently, the only supported value
-       for this field is the string "spin-table".
-
-    2. A 'cpu-release-addr' property identifying a 64-bit,
-       zero-initialised memory location.
+  an 'enable-method' property for each cpu node.  The supported
+  enable-methods are described below.
 
   It is expected that the bootloader will generate these device tree
   properties and insert them into the blob prior to kernel entry.
 
-- Any secondary CPUs must spin outside of the kernel in a reserved area
-  of memory (communicated to the kernel by a /memreserve/ region in the
+- CPUs with a "spin-table" enable-method must have a 'cpu-release-addr'
+  property in their cpu node.  This property identifies a
+  naturally-aligned 64-bit zero-initalised memory location.
+
+  These CPUs should spin outside of the kernel in a reserved area of
+  memory (communicated to the kernel by a /memreserve/ region in the
   device tree) polling their cpu-release-addr location, which must be
   contained in the reserved region.  A wfe instruction may be inserted
   to reduce the overhead of the busy-loop and a sev will be issued by
   the primary CPU.  When a read of the location pointed to by the
-  cpu-release-addr returns a non-zero value, the CPU must jump directly
-  to this value.
+  cpu-release-addr returns a non-zero value, the CPU must jump to this
+  value.  The value will be written as a single 64-bit little-endian
+  value, so CPUs must convert the read value to their native endianness
+  before jumping to it.
+
+- CPUs with a "psci" enable method should remain outside of
+  the kernel (i.e. outside of the regions of memory described to the
+  kernel in the memory node, or in a reserved area of memory described
+  to the kernel by a /memreserve/ region in the device tree).  The
+  kernel will issue CPU_ON calls as described in ARM document number ARM
+  DEN 0022A ("Power State Coordination Interface System Software on ARM
+  processors") to bring CPUs into the kernel.
+
+  The device tree should contain a 'psci' node, as described in
+  Documentation/devicetree/bindings/arm/psci.txt.
 
 - Secondary CPU general-purpose register settings
   x0 = 0 (reserved for future use)
-- 
cgit v0.10.2


From 00ef54bb97389bfe8894272f48496f4191aae946 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Oct 2013 20:30:14 +0100
Subject: arm64: unify smp_psci.c and psci.c

The functions in psci.c are only used from smp_psci.c, and smp_psci
cannot function without psci.c. Additionally psci.c is built when !SMP,
where it's expected that cpu_suspend may be useful.

This patch unifies the two files, removing pointless duplication and
paving the way for PSCI support in UP systems.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index 0604237..e5312ea 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,25 +14,6 @@
 #ifndef __ASM_PSCI_H
 #define __ASM_PSCI_H
 
-#define PSCI_POWER_STATE_TYPE_STANDBY		0
-#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
-
-struct psci_power_state {
-	u16	id;
-	u8	type;
-	u8	affinity_level;
-};
-
-struct psci_operations {
-	int (*cpu_suspend)(struct psci_power_state state,
-			   unsigned long entry_point);
-	int (*cpu_off)(struct psci_power_state state);
-	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
-	int (*migrate)(unsigned long cpuid);
-};
-
-extern struct psci_operations psci_ops;
-
 int psci_init(void);
 
 #endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564..75a90c1 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,7 +14,7 @@ arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
-arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o
+arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 14f73c4..368b787 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -17,12 +17,31 @@
 
 #include <linux/init.h>
 #include <linux/of.h>
+#include <linux/smp.h>
 
 #include <asm/compiler.h>
 #include <asm/errno.h>
 #include <asm/psci.h>
+#include <asm/smp_plat.h>
 
-struct psci_operations psci_ops;
+#define PSCI_POWER_STATE_TYPE_STANDBY		0
+#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
+
+struct psci_power_state {
+	u16	id;
+	u8	type;
+	u8	affinity_level;
+};
+
+struct psci_operations {
+	int (*cpu_suspend)(struct psci_power_state state,
+			   unsigned long entry_point);
+	int (*cpu_off)(struct psci_power_state state);
+	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
+	int (*migrate)(unsigned long cpuid);
+};
+
+static struct psci_operations psci_ops;
 
 static int (*invoke_psci_fn)(u64, u64, u64, u64);
 
@@ -209,3 +228,36 @@ out_put_node:
 	of_node_put(np);
 	return err;
 }
+
+#ifdef CONFIG_SMP
+
+static int __init smp_psci_init_cpu(struct device_node *dn, int cpu)
+{
+	return 0;
+}
+
+static int __init smp_psci_prepare_cpu(int cpu)
+{
+	int err;
+
+	if (!psci_ops.cpu_on) {
+		pr_err("no cpu_on method, not booting CPU%d\n", cpu);
+		return -ENODEV;
+	}
+
+	err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen));
+	if (err) {
+		pr_err("failed to boot CPU%d (%d)\n", cpu, err);
+		return err;
+	}
+
+	return 0;
+}
+
+const struct smp_enable_ops smp_psci_ops __initconst = {
+	.name		= "psci",
+	.init_cpu	= smp_psci_init_cpu,
+	.prepare_cpu	= smp_psci_prepare_cpu,
+};
+
+#endif
diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c
deleted file mode 100644
index 0c53330..0000000
--- a/arch/arm64/kernel/smp_psci.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * PSCI SMP initialisation
- *
- * Copyright (C) 2013 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-static int __init smp_psci_init_cpu(struct device_node *dn, int cpu)
-{
-	return 0;
-}
-
-static int __init smp_psci_prepare_cpu(int cpu)
-{
-	int err;
-
-	if (!psci_ops.cpu_on) {
-		pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu);
-		return -ENODEV;
-	}
-
-	err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen));
-	if (err) {
-		pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err);
-		return err;
-	}
-
-	return 0;
-}
-
-const struct smp_enable_ops smp_psci_ops __initconst = {
-	.name		= "psci",
-	.init_cpu	= smp_psci_init_cpu,
-	.prepare_cpu	= smp_psci_prepare_cpu,
-};
-- 
cgit v0.10.2


From cd1aebf5277a3a154a9e4c0ea4b3acabb62e5cab Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Oct 2013 20:30:15 +0100
Subject: arm64: reorganise smp_enable_ops

For hotplug support, we're going to want a place to store operations
that do more than bring CPUs online, and it makes sense to group these
with our current smp_enable_ops. For cpuidle support, we'll want to
group additional functions, and we may want them even for UP kernels.

This patch renames smp_enable_ops to the more general cpu_operations,
and pulls the definitions out of smp code such that they can be used in
UP kernels. While we're at it, fix up instances of the cpu parameter to
be an unsigned int, drop the init markings and rename the *_cpu
functions to cpu_* to reduce future churn when cpu_operations is
extended.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
new file mode 100644
index 0000000..3c60c8d
--- /dev/null
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPU_OPS_H
+#define __ASM_CPU_OPS_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+
+struct device_node;
+
+struct cpu_operations {
+	const char	*name;
+	int		(*cpu_init)(struct device_node *, unsigned int);
+	int		(*cpu_prepare)(unsigned int);
+};
+
+extern const struct cpu_operations *cpu_ops[NR_CPUS];
+extern const struct cpu_operations * __init cpu_get_ops(const char *name);
+
+#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 4b8023c..7e34295 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -66,15 +66,4 @@ extern volatile unsigned long secondary_holding_pen_release;
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-struct device_node;
-
-struct smp_enable_ops {
-	const char	*name;
-	int		(*init_cpu)(struct device_node *, int);
-	int		(*prepare_cpu)(int);
-};
-
-extern const struct smp_enable_ops smp_spin_table_ops;
-extern const struct smp_enable_ops smp_psci_ops;
-
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 75a90c1..5ba2fd4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -9,7 +9,7 @@ AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o
+			   hyp-stub.o psci.o cpu_ops.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
new file mode 100644
index 0000000..e2652aa
--- /dev/null
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -0,0 +1,47 @@
+/*
+ * CPU kernel entry/exit control
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/cpu_ops.h>
+#include <linux/string.h>
+
+extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations cpu_psci_ops;
+
+const struct cpu_operations *cpu_ops[NR_CPUS];
+
+static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_SMP
+	&smp_spin_table_ops,
+	&cpu_psci_ops,
+#endif
+	NULL,
+};
+
+const struct cpu_operations * __init cpu_get_ops(const char *name)
+{
+	const struct cpu_operations **ops = supported_cpu_ops;
+
+	while (*ops) {
+		if (!strcmp(name, (*ops)->name))
+			return *ops;
+
+		ops++;
+	}
+
+	return NULL;
+}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 368b787..ccec2ca 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,6 +20,7 @@
 #include <linux/smp.h>
 
 #include <asm/compiler.h>
+#include <asm/cpu_ops.h>
 #include <asm/errno.h>
 #include <asm/psci.h>
 #include <asm/smp_plat.h>
@@ -231,12 +232,12 @@ out_put_node:
 
 #ifdef CONFIG_SMP
 
-static int __init smp_psci_init_cpu(struct device_node *dn, int cpu)
+static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
 {
 	return 0;
 }
 
-static int __init smp_psci_prepare_cpu(int cpu)
+static int __init cpu_psci_cpu_prepare(unsigned int cpu)
 {
 	int err;
 
@@ -254,10 +255,10 @@ static int __init smp_psci_prepare_cpu(int cpu)
 	return 0;
 }
 
-const struct smp_enable_ops smp_psci_ops __initconst = {
+const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
-	.init_cpu	= smp_psci_init_cpu,
-	.prepare_cpu	= smp_psci_prepare_cpu,
+	.cpu_init	= cpu_psci_cpu_init,
+	.cpu_prepare	= cpu_psci_cpu_prepare,
 };
 
 #endif
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 78db90d..8965fb7 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -39,6 +39,7 @@
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/cpu_ops.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -232,28 +233,6 @@ void __init smp_prepare_boot_cpu(void)
 
 static void (*smp_cross_call)(const struct cpumask *, unsigned int);
 
-static const struct smp_enable_ops *enable_ops[] __initconst = {
-	&smp_spin_table_ops,
-	&smp_psci_ops,
-	NULL,
-};
-
-static const struct smp_enable_ops *smp_enable_ops[NR_CPUS];
-
-static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name)
-{
-	const struct smp_enable_ops **ops = enable_ops;
-
-	while (*ops) {
-		if (!strcmp(name, (*ops)->name))
-			return *ops;
-
-		ops++;
-	}
-
-	return NULL;
-}
-
 /*
  * Enumerate the possible CPU set from the device tree and build the
  * cpu logical map array containing MPIDR values related to logical
@@ -263,7 +242,7 @@ void __init smp_init_cpus(void)
 {
 	const char *enable_method;
 	struct device_node *dn = NULL;
-	int i, cpu = 1;
+	unsigned int i, cpu = 1;
 	bool bootcpu_valid = false;
 
 	while ((dn = of_find_node_by_type(dn, "cpu"))) {
@@ -342,15 +321,15 @@ void __init smp_init_cpus(void)
 			goto next;
 		}
 
-		smp_enable_ops[cpu] = smp_get_enable_ops(enable_method);
+		cpu_ops[cpu] = cpu_get_ops(enable_method);
 
-		if (!smp_enable_ops[cpu]) {
+		if (!cpu_ops[cpu]) {
 			pr_err("%s: invalid enable-method property: %s\n",
 			       dn->full_name, enable_method);
 			goto next;
 		}
 
-		if (smp_enable_ops[cpu]->init_cpu(dn, cpu))
+		if (cpu_ops[cpu]->cpu_init(dn, cpu))
 			goto next;
 
 		pr_debug("cpu logical map 0x%llx\n", hwid);
@@ -380,8 +359,8 @@ next:
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	int cpu, err;
-	unsigned int ncores = num_possible_cpus();
+	int err;
+	unsigned int cpu, ncores = num_possible_cpus();
 
 	/*
 	 * are we trying to boot more cores than exist?
@@ -408,10 +387,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		if (cpu == smp_processor_id())
 			continue;
 
-		if (!smp_enable_ops[cpu])
+		if (!cpu_ops[cpu])
 			continue;
 
-		err = smp_enable_ops[cpu]->prepare_cpu(cpu);
+		err = cpu_ops[cpu]->cpu_prepare(cpu);
 		if (err)
 			continue;
 
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 7c35fa6..a8b76e4 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -21,10 +21,11 @@
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
 
 static phys_addr_t cpu_release_addr[NR_CPUS];
 
-static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)
+static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)
 {
 	/*
 	 * Determine the address from which the CPU is polling.
@@ -40,7 +41,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)
 	return 0;
 }
 
-static int __init smp_spin_table_prepare_cpu(int cpu)
+static int smp_spin_table_cpu_prepare(unsigned int cpu)
 {
 	void **release_addr;
 
@@ -59,8 +60,8 @@ static int __init smp_spin_table_prepare_cpu(int cpu)
 	return 0;
 }
 
-const struct smp_enable_ops smp_spin_table_ops __initconst = {
+const struct cpu_operations smp_spin_table_ops = {
 	.name		= "spin-table",
-	.init_cpu 	= smp_spin_table_init_cpu,
-	.prepare_cpu	= smp_spin_table_prepare_cpu,
+	.cpu_init	= smp_spin_table_cpu_init,
+	.cpu_prepare	= smp_spin_table_cpu_prepare,
 };
-- 
cgit v0.10.2


From 652af899799354049b273af897b798b8f03fdd88 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Oct 2013 20:30:16 +0100
Subject: arm64: factor out spin-table boot method

The arm64 kernel has an internal holding pen, which is necessary for
some systems where we can't bring CPUs online individually and must hold
multiple CPUs in a safe area until the kernel is able to handle them.
The current SMP infrastructure for arm64 is closely coupled to this
holding pen, and alternative boot methods must launch CPUs into the pen,
where they sit before they are launched into the kernel proper.

With PSCI (and possibly other future boot methods), we can bring CPUs
online individually, and need not perform the secondary_holding_pen
dance. Instead, this patch factors the holding pen management code out
to the spin-table boot method code, as it is the only boot method
requiring the pen.

A new entry point for secondaries, secondary_entry is added for other
boot methods to use, which bypasses the holding pen and its associated
overhead when bringing CPUs online. The smp.pen.text section is also
removed, as the pen can live in head.text without problem.

The cpu_operations structure is extended with two new functions,
cpu_boot and cpu_postboot, for bringing a cpu into the kernel and
performing any post-boot cleanup required by a bootmethod (e.g.
resetting the secondary_holding_pen_release to INVALID_HWID).
Documentation is added for cpu_operations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 3c60c8d..67bc4fd 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -21,10 +21,26 @@
 
 struct device_node;
 
+/**
+ * struct cpu_operations - Callback operations for hotplugging CPUs.
+ *
+ * @name:	Name of the property as appears in a devicetree cpu node's
+ *		enable-method property.
+ * @cpu_init:	Reads any data necessary for a specific enable-method from the
+ *		devicetree, for a given cpu node and proposed logical id.
+ * @cpu_prepare: Early one-time preparation step for a cpu. If there is a
+ *		mechanism for doing so, tests whether it is possible to boot
+ *		the given CPU.
+ * @cpu_boot:	Boots a cpu into the kernel.
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ *		synchronisation. Called from the cpu being booted.
+ */
 struct cpu_operations {
 	const char	*name;
 	int		(*cpu_init)(struct device_node *, unsigned int);
 	int		(*cpu_prepare)(unsigned int);
+	int		(*cpu_boot)(unsigned int);
+	void		(*cpu_postboot)(void);
 };
 
 extern const struct cpu_operations *cpu_ops[NR_CPUS];
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 7e34295..d64187c 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -60,8 +60,7 @@ struct secondary_data {
 	void *stack;
 };
 extern struct secondary_data secondary_data;
-extern void secondary_holding_pen(void);
-extern volatile unsigned long secondary_holding_pen_release;
+extern void secondary_entry(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7090c12..bf7efdf 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -225,7 +225,6 @@ ENTRY(__boot_cpu_mode)
 	.quad	PAGE_OFFSET
 
 #ifdef CONFIG_SMP
-	.pushsection    .smp.pen.text, "ax"
 	.align	3
 1:	.quad	.
 	.quad	secondary_holding_pen_release
@@ -250,7 +249,16 @@ pen:	ldr	x4, [x3]
 	wfe
 	b	pen
 ENDPROC(secondary_holding_pen)
-	.popsection
+
+	/*
+	 * Secondary entry point that jumps straight into the kernel. Only to
+	 * be used where CPUs are brought online dynamically by the kernel.
+	 */
+ENTRY(secondary_entry)
+	bl	__calc_phys_offset		// x2=phys offset
+	bl	el2_setup			// Drop to EL1
+	b	secondary_startup
+ENDPROC(secondary_entry)
 
 ENTRY(secondary_startup)
 	/*
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index ccec2ca..fb56b61 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -239,26 +239,28 @@ static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
 
 static int __init cpu_psci_cpu_prepare(unsigned int cpu)
 {
-	int err;
-
 	if (!psci_ops.cpu_on) {
 		pr_err("no cpu_on method, not booting CPU%d\n", cpu);
 		return -ENODEV;
 	}
 
-	err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen));
-	if (err) {
-		pr_err("failed to boot CPU%d (%d)\n", cpu, err);
-		return err;
-	}
-
 	return 0;
 }
 
+static int cpu_psci_cpu_boot(unsigned int cpu)
+{
+	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+	if (err)
+		pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err);
+
+	return err;
+}
+
 const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
+	.cpu_boot	= cpu_psci_cpu_boot,
 };
 
 #endif
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 8965fb7..6806bc4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -55,7 +55,6 @@
  * where to place its SVC stack
  */
 struct secondary_data secondary_data;
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
 
 enum ipi_msg_type {
 	IPI_RESCHEDULE,
@@ -64,61 +63,16 @@ enum ipi_msg_type {
 	IPI_CPU_STOP,
 };
 
-static DEFINE_RAW_SPINLOCK(boot_lock);
-
-/*
- * Write secondary_holding_pen_release in a way that is guaranteed to be
- * visible to all observers, irrespective of whether they're taking part
- * in coherency or not.  This is necessary for the hotplug code to work
- * reliably.
- */
-static void write_pen_release(u64 val)
-{
-	void *start = (void *)&secondary_holding_pen_release;
-	unsigned long size = sizeof(secondary_holding_pen_release);
-
-	secondary_holding_pen_release = val;
-	__flush_dcache_area(start, size);
-}
-
 /*
  * Boot a secondary CPU, and assign it the specified idle task.
  * This also gives us the initial stack to use for this CPU.
  */
 static int boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	unsigned long timeout;
-
-	/*
-	 * Set synchronisation state between this boot processor
-	 * and the secondary one
-	 */
-	raw_spin_lock(&boot_lock);
-
-	/*
-	 * Update the pen release flag.
-	 */
-	write_pen_release(cpu_logical_map(cpu));
+	if (cpu_ops[cpu]->cpu_boot)
+		return cpu_ops[cpu]->cpu_boot(cpu);
 
-	/*
-	 * Send an event, causing the secondaries to read pen_release.
-	 */
-	sev();
-
-	timeout = jiffies + (1 * HZ);
-	while (time_before(jiffies, timeout)) {
-		if (secondary_holding_pen_release == INVALID_HWID)
-			break;
-		udelay(10);
-	}
-
-	/*
-	 * Now the secondary core is starting up let it run its
-	 * calibrations, then wait for it to finish
-	 */
-	raw_spin_unlock(&boot_lock);
-
-	return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
+	return -EOPNOTSUPP;
 }
 
 static DECLARE_COMPLETION(cpu_running);
@@ -188,17 +142,8 @@ asmlinkage void secondary_start_kernel(void)
 	preempt_disable();
 	trace_hardirqs_off();
 
-	/*
-	 * Let the primary processor know we're out of the
-	 * pen, then head off into the C entry point
-	 */
-	write_pen_release(INVALID_HWID);
-
-	/*
-	 * Synchronise with the boot thread.
-	 */
-	raw_spin_lock(&boot_lock);
-	raw_spin_unlock(&boot_lock);
+	if (cpu_ops[cpu]->cpu_postboot)
+		cpu_ops[cpu]->cpu_postboot();
 
 	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index a8b76e4..27f0836 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -16,14 +16,37 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cpu_ops.h>
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+extern void secondary_holding_pen(void);
+volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
 
 static phys_addr_t cpu_release_addr[NR_CPUS];
+static DEFINE_RAW_SPINLOCK(boot_lock);
+
+/*
+ * Write secondary_holding_pen_release in a way that is guaranteed to be
+ * visible to all observers, irrespective of whether they're taking part
+ * in coherency or not.  This is necessary for the hotplug code to work
+ * reliably.
+ */
+static void write_pen_release(u64 val)
+{
+	void *start = (void *)&secondary_holding_pen_release;
+	unsigned long size = sizeof(secondary_holding_pen_release);
+
+	secondary_holding_pen_release = val;
+	__flush_dcache_area(start, size);
+}
+
 
 static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)
 {
@@ -60,8 +83,60 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
 	return 0;
 }
 
+static int smp_spin_table_cpu_boot(unsigned int cpu)
+{
+	unsigned long timeout;
+
+	/*
+	 * Set synchronisation state between this boot processor
+	 * and the secondary one
+	 */
+	raw_spin_lock(&boot_lock);
+
+	/*
+	 * Update the pen release flag.
+	 */
+	write_pen_release(cpu_logical_map(cpu));
+
+	/*
+	 * Send an event, causing the secondaries to read pen_release.
+	 */
+	sev();
+
+	timeout = jiffies + (1 * HZ);
+	while (time_before(jiffies, timeout)) {
+		if (secondary_holding_pen_release == INVALID_HWID)
+			break;
+		udelay(10);
+	}
+
+	/*
+	 * Now the secondary core is starting up let it run its
+	 * calibrations, then wait for it to finish
+	 */
+	raw_spin_unlock(&boot_lock);
+
+	return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
+}
+
+void smp_spin_table_cpu_postboot(void)
+{
+	/*
+	 * Let the primary processor know we're out of the pen.
+	 */
+	write_pen_release(INVALID_HWID);
+
+	/*
+	 * Synchronise with the boot thread.
+	 */
+	raw_spin_lock(&boot_lock);
+	raw_spin_unlock(&boot_lock);
+}
+
 const struct cpu_operations smp_spin_table_ops = {
 	.name		= "spin-table",
 	.cpu_init	= smp_spin_table_cpu_init,
 	.cpu_prepare	= smp_spin_table_cpu_prepare,
+	.cpu_boot	= smp_spin_table_cpu_boot,
+	.cpu_postboot	= smp_spin_table_cpu_postboot,
 };
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index f8ab9d8..991ffdd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -54,7 +54,6 @@ SECTIONS
 	}
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
-			*(.smp.pen.text)
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
-- 
cgit v0.10.2


From e8765b265a69c83504afc6901d6e137b1811d1f0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Oct 2013 20:30:17 +0100
Subject: arm64: read enable-method for CPU0

With the advent of CPU_HOTPLUG, the enable-method property for CPU0 may
tells us something useful (i.e. how to hotplug it back on), so we must
read it along with all the enable-method for all the other CPUs.  Even
on UP the enable-method may tell us useful information (e.g. if a core
has some mechanism that might be usable for cpuidle), so we should
always read it.

This patch factors out the reading of the enable method, and ensures
that CPU0's enable method is read regardless of whether the kernel is
built with SMP support.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 67bc4fd..1a98dbf 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -44,6 +44,7 @@ struct cpu_operations {
 };
 
 extern const struct cpu_operations *cpu_ops[NR_CPUS];
-extern const struct cpu_operations * __init cpu_get_ops(const char *name);
+extern int __init cpu_read_ops(struct device_node *dn, int cpu);
+extern void __init cpu_read_bootcpu_ops(void);
 
 #endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index e2652aa..aa0c9e7 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -17,6 +17,9 @@
  */
 
 #include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+#include <linux/errno.h>
+#include <linux/of.h>
 #include <linux/string.h>
 
 extern const struct cpu_operations smp_spin_table_ops;
@@ -32,7 +35,7 @@ static const struct cpu_operations *supported_cpu_ops[] __initconst = {
 	NULL,
 };
 
-const struct cpu_operations * __init cpu_get_ops(const char *name)
+static const struct cpu_operations * __init cpu_get_ops(const char *name)
 {
 	const struct cpu_operations **ops = supported_cpu_ops;
 
@@ -45,3 +48,52 @@ const struct cpu_operations * __init cpu_get_ops(const char *name)
 
 	return NULL;
 }
+
+/*
+ * Read a cpu's enable method from the device tree and record it in cpu_ops.
+ */
+int __init cpu_read_ops(struct device_node *dn, int cpu)
+{
+	const char *enable_method = of_get_property(dn, "enable-method", NULL);
+	if (!enable_method) {
+		/*
+		 * The boot CPU may not have an enable method (e.g. when
+		 * spin-table is used for secondaries). Don't warn spuriously.
+		 */
+		if (cpu != 0)
+			pr_err("%s: missing enable-method property\n",
+				dn->full_name);
+		return -ENOENT;
+	}
+
+	cpu_ops[cpu] = cpu_get_ops(enable_method);
+	if (!cpu_ops[cpu]) {
+		pr_err("%s: invalid enable-method property: %s\n",
+		       dn->full_name, enable_method);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+void __init cpu_read_bootcpu_ops(void)
+{
+	struct device_node *dn = NULL;
+	u64 mpidr = cpu_logical_map(0);
+
+	while ((dn = of_find_node_by_type(dn, "cpu"))) {
+		u64 hwid;
+		const __be32 *prop;
+
+		prop = of_get_property(dn, "reg", NULL);
+		if (!prop)
+			continue;
+
+		hwid = of_read_number(prop, of_n_addr_cells(dn));
+		if (hwid == mpidr) {
+			cpu_read_ops(dn, 0);
+			of_node_put(dn);
+			return;
+		}
+	}
+}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 055cfb8..b65c132 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -45,6 +45,7 @@
 #include <asm/cputype.h>
 #include <asm/elf.h>
 #include <asm/cputable.h>
+#include <asm/cpu_ops.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -264,6 +265,7 @@ void __init setup_arch(char **cmdline_p)
 	psci_init();
 
 	cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
+	cpu_read_bootcpu_ops();
 #ifdef CONFIG_SMP
 	smp_init_cpus();
 #endif
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6806bc4..e992734 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -185,7 +185,6 @@ static void (*smp_cross_call)(const struct cpumask *, unsigned int);
  */
 void __init smp_init_cpus(void)
 {
-	const char *enable_method;
 	struct device_node *dn = NULL;
 	unsigned int i, cpu = 1;
 	bool bootcpu_valid = false;
@@ -256,23 +255,8 @@ void __init smp_init_cpus(void)
 		if (cpu >= NR_CPUS)
 			goto next;
 
-		/*
-		 * We currently support only the "spin-table" enable-method.
-		 */
-		enable_method = of_get_property(dn, "enable-method", NULL);
-		if (!enable_method) {
-			pr_err("%s: missing enable-method property\n",
-				dn->full_name);
+		if (cpu_read_ops(dn, cpu) != 0)
 			goto next;
-		}
-
-		cpu_ops[cpu] = cpu_get_ops(enable_method);
-
-		if (!cpu_ops[cpu]) {
-			pr_err("%s: invalid enable-method property: %s\n",
-			       dn->full_name, enable_method);
-			goto next;
-		}
 
 		if (cpu_ops[cpu]->cpu_init(dn, cpu))
 			goto next;
-- 
cgit v0.10.2


From 9327e2c6bb8cb0131b38a07847cd58c78dc095e9 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Oct 2013 20:30:18 +0100
Subject: arm64: add CPU_HOTPLUG infrastructure

This patch adds the basic infrastructure necessary to support
CPU_HOTPLUG on arm64, based on the arm implementation. Actual hotplug
support will depend on an implementation's cpu_operations (e.g. PSCI).

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9e8233b..86ebfe4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -161,6 +161,13 @@ config NR_CPUS
 	default "8" if ARCH_XGENE
 	default "4"
 
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP
+	help
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
+
 source kernel/Kconfig.preempt
 
 config HZ
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 1a98dbf..c4cdb5e 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -34,6 +34,11 @@ struct device_node;
  * @cpu_boot:	Boots a cpu into the kernel.
  * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
  *		synchronisation. Called from the cpu being booted.
+ * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific
+ * 		reason, which will cause the hot unplug to be aborted. Called
+ * 		from the cpu to be killed.
+ * @cpu_die:	Makes a cpu leave the kernel. Must not fail. Called from the
+ *		cpu being killed.
  */
 struct cpu_operations {
 	const char	*name;
@@ -41,6 +46,10 @@ struct cpu_operations {
 	int		(*cpu_prepare)(unsigned int);
 	int		(*cpu_boot)(unsigned int);
 	void		(*cpu_postboot)(void);
+#ifdef CONFIG_HOTPLUG_CPU
+	int		(*cpu_disable)(unsigned int cpu);
+	void		(*cpu_die)(unsigned int cpu);
+#endif
 };
 
 extern const struct cpu_operations *cpu_ops[NR_CPUS];
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 0332fc0..e1f7ecd 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,6 +4,7 @@
 #include <asm-generic/irq.h>
 
 extern void (*handle_arch_irq)(struct pt_regs *);
+extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 #endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d64187c..a498f2c 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -65,4 +65,9 @@ extern void secondary_entry(void);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
+extern int __cpu_disable(void);
+
+extern void __cpu_die(unsigned int cpu);
+extern void cpu_die(void);
+
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c
index 63cfc4a..fd3993c 100644
--- a/arch/arm64/kernel/cputable.c
+++ b/arch/arm64/kernel/cputable.c
@@ -22,7 +22,7 @@
 
 extern unsigned long __cpu_setup(void);
 
-struct cpu_info __initdata cpu_table[] = {
+struct cpu_info cpu_table[] = {
 	{
 		.cpu_id_val	= 0x000f0000,
 		.cpu_id_mask	= 0x000f0000,
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index ecb3354..473e5db 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -81,3 +81,64 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+static bool migrate_one_irq(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->affinity;
+	struct irq_chip *c;
+	bool ret = false;
+
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
+		ret = true;
+	}
+
+	c = irq_data_get_irq_chip(d);
+	if (!c->irq_set_affinity)
+		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
+	else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret)
+		cpumask_copy(d->affinity, affinity);
+
+	return ret;
+}
+
+/*
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	for_each_irq_desc(i, desc) {
+		bool affinity_broken;
+
+		raw_spin_lock(&desc->lock);
+		affinity_broken = migrate_one_irq(desc);
+		raw_spin_unlock(&desc->lock);
+
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+					    i, smp_processor_id());
+	}
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7ae8a1f..de17c89 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -102,6 +102,13 @@ void arch_cpu_idle(void)
 	local_irq_enable();
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+       cpu_die();
+}
+#endif
+
 void machine_shutdown(void)
 {
 #ifdef CONFIG_SMP
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index e992734..5b4ddbd 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -167,6 +167,102 @@ asmlinkage void secondary_start_kernel(void)
 	cpu_startup_entry(CPUHP_ONLINE);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * If we don't have a cpu_die method, abort before we reach the point
+	 * of no return. CPU0 may not have an cpu_ops, so test for it.
+	 */
+	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+		return -EOPNOTSUPP;
+
+	/*
+	 * We may need to abort a hot unplug for some other mechanism-specific
+	 * reason.
+	 */
+	if (cpu_ops[cpu]->cpu_disable)
+		return cpu_ops[cpu]->cpu_disable(cpu);
+
+	return 0;
+}
+
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+	int ret;
+
+	ret = op_cpu_disable(cpu);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take this CPU offline.  Once we clear this, we can't return,
+	 * and we must not schedule until we're ready to give up the cpu.
+	 */
+	set_cpu_online(cpu, false);
+
+	/*
+	 * OK - migrate IRQs away from this CPU
+	 */
+	migrate_irqs();
+
+	/*
+	 * Remove this CPU from the vm mask set of all processes.
+	 */
+	clear_tasks_mm_cpumask(cpu);
+
+	return 0;
+}
+
+static DECLARE_COMPLETION(cpu_died);
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpu_die(unsigned int cpu)
+{
+	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+		pr_crit("CPU%u: cpu didn't die\n", cpu);
+		return;
+	}
+	pr_notice("CPU%u: shutdown\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we disable IRQs here, but do not re-enable them
+ * before returning to the caller. This is also the behaviour
+ * of the other hotplug-cpu capable cores, so presumably coming
+ * out of idle fixes this.
+ */
+void cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	idle_task_exit();
+
+	local_irq_disable();
+
+	/* Tell __cpu_die() that this CPU is now safe to dispose of */
+	complete(&cpu_died);
+
+	/*
+	 * Actually shutdown the CPU. This must never fail. The specific hotplug
+	 * mechanism must perform all required cache maintenance to ensure that
+	 * no dirty lines are lost in the process of shutting down the CPU.
+	 */
+	cpu_ops[cpu]->cpu_die(cpu);
+
+	BUG();
+}
+#endif
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
-- 
cgit v0.10.2


From 831ccf79b46e02f31cfd16c66df9d5600f635155 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Oct 2013 20:30:19 +0100
Subject: arm64: add PSCI CPU_OFF-based hotplug support

This patch adds support for using PSCI CPU_OFF calls for CPU hotplug.
With this code it is possible to hot unplug CPUs with "psci" as their
boot-method, as long as there's an appropriate cpu_off function id
specified in the psci node.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index fb56b61..4f97db3 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -256,11 +256,41 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
 	return err;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int cpu_psci_cpu_disable(unsigned int cpu)
+{
+	/* Fail early if we don't have CPU_OFF support */
+	if (!psci_ops.cpu_off)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static void cpu_psci_cpu_die(unsigned int cpu)
+{
+	int ret;
+	/*
+	 * There are no known implementations of PSCI actually using the
+	 * power state field, pass a sensible default for now.
+	 */
+	struct psci_power_state state = {
+		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+	};
+
+	ret = psci_ops.cpu_off(state);
+
+	pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret);
+}
+#endif
+
 const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
 	.cpu_boot	= cpu_psci_cpu_boot,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= cpu_psci_cpu_disable,
+	.cpu_die	= cpu_psci_cpu_die,
+#endif
 };
 
 #endif
-- 
cgit v0.10.2


From a0974e6e217aead196033d72f898e2acb575304d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 11 Oct 2013 14:52:08 +0100
Subject: arm64: big-endian: add big-endian support to top-level arch Makefile

This patch adds big-endian support to the AArch64 top-level Makefile.
This currently just passes the relevant flags to the toolchain and is
predicated on a Kconfig option that will be introduced later on.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d90cf79..2fceb71 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -20,9 +20,15 @@ LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 KBUILD_DEFCONFIG := defconfig
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+KBUILD_CPPFLAGS	+= -mbig-endian
+AS		+= -EB
+LD		+= -EB
+else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 AS		+= -EL
 LD		+= -EL
+endif
 
 comma = ,
 
-- 
cgit v0.10.2


From c194520ada7c8f2eddec5ebf24982483b49736a0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 11 Oct 2013 14:52:09 +0100
Subject: arm64: big-endian: fix byteorder include

For big-endian processors, we must include
linux/byteorder/big_endian.h to get the relevant definitions for
swabbing between CPU order and a defined endianness.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/uapi/asm/byteorder.h b/arch/arm64/include/uapi/asm/byteorder.h
index 2b92046..dc19e95 100644
--- a/arch/arm64/include/uapi/asm/byteorder.h
+++ b/arch/arm64/include/uapi/asm/byteorder.h
@@ -16,6 +16,10 @@
 #ifndef __ASM_BYTEORDER_H
 #define __ASM_BYTEORDER_H
 
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
 #include <linux/byteorder/little_endian.h>
+#endif
 
 #endif	/* __ASM_BYTEORDER_H */
-- 
cgit v0.10.2


From 5436b5c8305b4ed37c5d11f96c1aaccca63c9ab2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 11 Oct 2013 14:52:10 +0100
Subject: arm64: ELF: add support for big-endian executables

This patch adds support for the aarch64_be ELF format to the AArch64 ELF
loader.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index e7fa87f..a3e4a55 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -90,11 +90,24 @@ typedef struct user_fpsimd_state elf_fpregset_t;
  * These are used to set parameters in the core dumps.
  */
 #define ELF_CLASS	ELFCLASS64
+#ifdef __AARCH64EB__
+#define ELF_DATA	ELFDATA2MSB
+#else
 #define ELF_DATA	ELFDATA2LSB
+#endif
 #define ELF_ARCH	EM_AARCH64
 
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
 #define ELF_PLATFORM_SIZE	16
+#ifdef __AARCH64EB__
+#define ELF_PLATFORM		("aarch64_be")
+#else
 #define ELF_PLATFORM		("aarch64")
+#endif
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
-- 
cgit v0.10.2


From 94ed1f2cb5d46533f10262b1b760db7dbec9cf10 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 11 Oct 2013 14:52:11 +0100
Subject: arm64: setup: report ELF_PLATFORM as the machine for utsname

uname -m reports the machine field from the current utsname, which should
reflect the endianness of the system.

This patch reports ELF_PLATFORM for the field, so that everything appears
consistent from userspace.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index b65c132..fa6faf5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -119,7 +119,7 @@ static void __init setup_processor(void)
 	printk("CPU: %s [%08x] revision %d\n",
 	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
 
-	sprintf(init_utsname()->machine, "aarch64");
+	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
 }
 
-- 
cgit v0.10.2


From a795a38eb91cf72c4a05e72a9c84e317ee179a48 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 11 Oct 2013 14:52:12 +0100
Subject: arm64: compat: add support for big-endian (BE8) AArch32 binaries

This patch adds support for BE8 AArch32 tasks to the compat layer.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 899af80..fda2704 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -26,7 +26,11 @@
 #include <linux/ptrace.h>
 
 #define COMPAT_USER_HZ		100
+#ifdef __AARCH64EB__
+#define COMPAT_UTS_MACHINE	"armv8b\0\0"
+#else
 #define COMPAT_UTS_MACHINE	"armv8l\0\0"
+#endif
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
@@ -73,13 +77,23 @@ struct compat_timeval {
 };
 
 struct compat_stat {
+#ifdef __AARCH64EB__
+	short		st_dev;
+	short		__pad1;
+#else
 	compat_dev_t	st_dev;
+#endif
 	compat_ino_t	st_ino;
 	compat_mode_t	st_mode;
 	compat_ushort_t	st_nlink;
 	__compat_uid16_t	st_uid;
 	__compat_gid16_t	st_gid;
+#ifdef __AARCH64EB__
+	short		st_rdev;
+	short		__pad2;
+#else
 	compat_dev_t	st_rdev;
+#endif
 	compat_off_t	st_size;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index a3e4a55..01d3aab 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -162,7 +162,12 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
 #ifdef CONFIG_COMPAT
+
+#ifdef __AARCH64EB__
+#define COMPAT_ELF_PLATFORM		("v8b")
+#else
 #define COMPAT_ELF_PLATFORM		("v8l")
+#endif
 
 #define COMPAT_ELF_ET_DYN_BASE		(randomize_et_dyn(2 * TASK_SIZE_32 / 3))
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ab239b2..45b20cd 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -107,6 +107,11 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->pstate = COMPAT_PSR_MODE_USR;
 	if (pc & 1)
 		regs->pstate |= COMPAT_PSR_T_BIT;
+
+#ifdef __AARCH64EB__
+	regs->pstate |= COMPAT_PSR_E_BIT;
+#endif
+
 	regs->compat_sp = sp;
 }
 #endif
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 0dacbbf..0e7fa49 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -42,6 +42,7 @@
 #define COMPAT_PSR_MODE_UND	0x0000001b
 #define COMPAT_PSR_MODE_SYS	0x0000001f
 #define COMPAT_PSR_T_BIT	0x00000020
+#define COMPAT_PSR_E_BIT	0x00000200
 #define COMPAT_PSR_F_BIT	0x00000040
 #define COMPAT_PSR_I_BIT	0x00000080
 #define COMPAT_PSR_A_BIT	0x00000100
-- 
cgit v0.10.2


From 55b89540b0d8d031f90e3d711ec0df3f797ecc61 Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew.leach@arm.com>
Date: Fri, 11 Oct 2013 14:52:13 +0100
Subject: arm64: compat: correct register concatenation for syscall wrappers

The arm64 port contains wrappers for arm32 syscalls that pass 64-bit
values. These wrappers concatenate the two registers to hold a 64-bit
value in a single X register. On BE, however, the lower and higher
words are swapped.

Create a new assembler macro, regs_to_64, that when on BE systems
swaps the registers in the orr instruction.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 5aceb83..381b935 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -115,3 +115,15 @@ lr	.req	x30		// link register
 	.align	7
 	b	\label
 	.endm
+/*
+ * Define a macro that constructs a 64-bit value by concatenating two
+ * 32-bit registers. Note that on big endian systems the order of the
+ * registers is swapped.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	.macro	regs_to_64, rd, lbits, hbits
+#else
+	.macro	regs_to_64, rd, hbits, lbits
+#endif
+	orr	\rd, \lbits, \hbits, lsl #32
+	.endm
diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S
index a1b19ed..423a5b3 100644
--- a/arch/arm64/kernel/sys32.S
+++ b/arch/arm64/kernel/sys32.S
@@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper)
  * extension.
  */
 compat_sys_pread64_wrapper:
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x3, x4, x5
 	b	sys_pread64
 ENDPROC(compat_sys_pread64_wrapper)
 
 compat_sys_pwrite64_wrapper:
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x3, x4, x5
 	b	sys_pwrite64
 ENDPROC(compat_sys_pwrite64_wrapper)
 
 compat_sys_truncate64_wrapper:
-	orr	x1, x2, x3, lsl #32
+	regs_to_64	x1, x2, x3
 	b	sys_truncate
 ENDPROC(compat_sys_truncate64_wrapper)
 
 compat_sys_ftruncate64_wrapper:
-	orr	x1, x2, x3, lsl #32
+	regs_to_64	x1, x2, x3
 	b	sys_ftruncate
 ENDPROC(compat_sys_ftruncate64_wrapper)
 
 compat_sys_readahead_wrapper:
-	orr	x1, x2, x3, lsl #32
+	regs_to_64	x1, x2, x3
 	mov	w2, w4
 	b	sys_readahead
 ENDPROC(compat_sys_readahead_wrapper)
 
 compat_sys_fadvise64_64_wrapper:
 	mov	w6, w1
-	orr	x1, x2, x3, lsl #32
-	orr	x2, x4, x5, lsl #32
+	regs_to_64	x1, x2, x3
+	regs_to_64	x2, x4, x5
 	mov	w3, w6
 	b	sys_fadvise64_64
 ENDPROC(compat_sys_fadvise64_64_wrapper)
 
 compat_sys_sync_file_range2_wrapper:
-	orr	x2, x2, x3, lsl #32
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x2, x2, x3
+	regs_to_64	x3, x4, x5
 	b	sys_sync_file_range2
 ENDPROC(compat_sys_sync_file_range2_wrapper)
 
 compat_sys_fallocate_wrapper:
-	orr	x2, x2, x3, lsl #32
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x2, x2, x3
+	regs_to_64	x3, x4, x5
 	b	sys_fallocate
 ENDPROC(compat_sys_fallocate_wrapper)
 
-- 
cgit v0.10.2


From a1d5ebaf8ccdd100f45042ce32c591867de04ac3 Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew.leach@arm.com>
Date: Fri, 11 Oct 2013 14:52:14 +0100
Subject: arm64: big-endian: don't treat code as data when copying sigret code

Currently the sigreturn compat code is copied to an offset in the
vectors table. When using a BE kernel this data will be stored in the
wrong endianess so when returning from a signal on a 32-bit BE system,
arbitrary code will be executed.

Instead of declaring the code inside a struct and copying that, use
the assembler's .byte directives to store the code in the correct
endianess regardless of platform endianess.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 8b69ecb..1e4905d 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -27,6 +27,9 @@
  *
  * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
  */
+
+#include <asm/unistd32.h>
+
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -75,3 +78,42 @@ __kuser_helper_version:			// 0xffff0ffc
 	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
 	.globl	__kuser_helper_end
 __kuser_helper_end:
+
+/*
+ * AArch32 sigreturn code
+ *
+ * For ARM syscalls, the syscall number has to be loaded into r7.
+ * We do not support an OABI userspace.
+ *
+ * For Thumb syscalls, we also pass the syscall number via r7. We therefore
+ * need two 16-bit instructions.
+ */
+	.globl __aarch32_sigret_code_start
+__aarch32_sigret_code_start:
+
+	/*
+	 * ARM Code
+	 */
+	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn
+
+	/*
+	 * ARM code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn
+
+        .globl __aarch32_sigret_code_end
+__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e393174..e8772c0 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -100,34 +100,6 @@ struct compat_rt_sigframe {
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-/*
- * For ARM syscalls, the syscall number has to be loaded into r7.
- * We do not support an OABI userspace.
- */
-#define MOV_R7_NR_SIGRETURN	(0xe3a07000 | __NR_compat_sigreturn)
-#define SVC_SYS_SIGRETURN	(0xef000000 | __NR_compat_sigreturn)
-#define MOV_R7_NR_RT_SIGRETURN	(0xe3a07000 | __NR_compat_rt_sigreturn)
-#define SVC_SYS_RT_SIGRETURN	(0xef000000 | __NR_compat_rt_sigreturn)
-
-/*
- * For Thumb syscalls, we also pass the syscall number via r7. We therefore
- * need two 16-bit instructions.
- */
-#define SVC_THUMB_SIGRETURN	(((0xdf00 | __NR_compat_sigreturn) << 16) | \
-				   0x2700 | __NR_compat_sigreturn)
-#define SVC_THUMB_RT_SIGRETURN	(((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \
-				   0x2700 | __NR_compat_rt_sigreturn)
-
-const compat_ulong_t aarch32_sigret_code[6] = {
-	/*
-	 * AArch32 sigreturn code.
-	 * We don't construct an OABI SWI - instead we just set the imm24 field
-	 * to the EABI syscall number so that we create a sane disassembly.
-	 */
-	MOV_R7_NR_SIGRETURN,    SVC_SYS_SIGRETURN,    SVC_THUMB_SIGRETURN,
-	MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN,
-};
-
 static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
 {
 	compat_sigset_t	cset;
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 6a389dc..65d40cf 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -58,7 +58,10 @@ static struct page *vectors_page[1];
 static int alloc_vectors_page(void)
 {
 	extern char __kuser_helper_start[], __kuser_helper_end[];
+	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
+
 	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
 	unsigned long vpage;
 
 	vpage = get_zeroed_page(GFP_ATOMIC);
@@ -72,7 +75,7 @@ static int alloc_vectors_page(void)
 
 	/* sigreturn code */
 	memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
-		aarch32_sigret_code, sizeof(aarch32_sigret_code));
+               __aarch32_sigret_code_start, sigret_sz);
 
 	flush_icache_range(vpage, vpage + PAGE_SIZE);
 	vectors_page[0] = virt_to_page(vpage);
-- 
cgit v0.10.2


From e68bedaa03c950ae8045e7899e7a6b2a97d1bf41 Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew.leach@arm.com>
Date: Fri, 11 Oct 2013 14:52:15 +0100
Subject: arm64: asm: add CPU_LE & CPU_BE assembler helpers

Add CPU_LE and CPU_BE to select assembler code in little and big
endian configurations respectively.

Signed-off-by: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 381b935..fd3e392 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -115,6 +115,25 @@ lr	.req	x30		// link register
 	.align	7
 	b	\label
 	.endm
+
+/*
+ * Select code when configured for BE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_BE(code...) code
+#else
+#define CPU_BE(code...)
+#endif
+
+/*
+ * Select code when configured for LE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_LE(code...)
+#else
+#define CPU_LE(code...) code
+#endif
+
 /*
  * Define a macro that constructs a 64-bit value by concatenating two
  * 32-bit registers. Note that on big endian systems the order of the
-- 
cgit v0.10.2


From 828e9834e9a5b7e61046aa3c5f603a4fecba2fb4 Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew.leach@arm.com>
Date: Fri, 11 Oct 2013 14:52:16 +0100
Subject: arm64: head: create a new function for setting the boot_cpu_mode flag

Currently, the code for setting the __cpu_boot_mode flag is munged in
with el2_setup. This makes things difficult on a BE bringup as a
memory access has to have occurred before el2_setup which is the place
that we'd like to set the endianess on the current EL.

Create a new function for setting __cpu_boot_mode and have el2_setup
return the mode the CPU. Also define a new constant in virt.h,
BOOT_CPU_MODE_EL1, for readability.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 26e310c..130e2be 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -18,7 +18,8 @@
 #ifndef __ASM__VIRT_H
 #define __ASM__VIRT_H
 
-#define BOOT_CPU_MODE_EL2	(0x0e12b007)
+#define BOOT_CPU_MODE_EL1	(0xe11)
+#define BOOT_CPU_MODE_EL2	(0xe12)
 
 #ifndef __ASSEMBLY__
 #include <asm/cacheflush.h>
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index bf7efdf..775ecb3 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -123,8 +123,9 @@
 
 ENTRY(stext)
 	mov	x21, x0				// x21=FDT
+	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
 	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
-	bl	el2_setup			// Drop to EL1
+	bl	set_cpu_boot_mode_flag
 	mrs	x22, midr_el1			// x22=cpuid
 	mov	x0, x22
 	bl	lookup_processor_type
@@ -150,21 +151,20 @@ ENDPROC(stext)
 /*
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
+ *
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
+ * booted in EL1 or EL2 respectively.
  */
 ENTRY(el2_setup)
 	mrs	x0, CurrentEL
 	cmp	x0, #PSR_MODE_EL2t
 	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne
-	ldr	x0, =__boot_cpu_mode		// Compute __boot_cpu_mode
-	add	x0, x0, x28
 	b.eq	1f
-	str	wzr, [x0]			// Remember we don't have EL2...
+	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
 	ret
 
 	/* Hyp configuration. */
-1:	ldr	w1, =BOOT_CPU_MODE_EL2
-	str	w1, [x0, #4]			// This CPU has EL2
-	mov	x0, #(1 << 31)			// 64-bit EL1
+1:	mov	x0, #(1 << 31)			// 64-bit EL1
 	msr	hcr_el2, x0
 
 	/* Generic timers. */
@@ -204,10 +204,25 @@ ENTRY(el2_setup)
 		      PSR_MODE_EL1h)
 	msr	spsr_el2, x0
 	msr	elr_el2, lr
+	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	eret
 ENDPROC(el2_setup)
 
 /*
+ * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
+ * in x20. See arch/arm64/include/asm/virt.h for more info.
+ */
+ENTRY(set_cpu_boot_mode_flag)
+	ldr	x1, =__boot_cpu_mode		// Compute __boot_cpu_mode
+	add	x1, x1, x28
+	cmp	w20, #BOOT_CPU_MODE_EL2
+	b.ne	1f
+	add	x1, x1, #4
+1:	str	w20, [x1]			// This CPU has booted in EL1
+	ret
+ENDPROC(set_cpu_boot_mode_flag)
+
+/*
  * We need to find out the CPU boot mode long after boot, so we need to
  * store it in a writable variable.
  *
@@ -234,8 +249,9 @@ ENTRY(__boot_cpu_mode)
 	 * cores are held until we're ready for them to initialise.
 	 */
 ENTRY(secondary_holding_pen)
-	bl	__calc_phys_offset		// x24=phys offset
-	bl	el2_setup			// Drop to EL1
+	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
+	bl	set_cpu_boot_mode_flag
 	mrs	x0, mpidr_el1
 	ldr     x1, =MPIDR_HWID_BITMASK
 	and	x0, x0, x1
-- 
cgit v0.10.2


From 9cf71728931a4073b9e3a4bcbf9dada86bc98370 Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew.leach@arm.com>
Date: Fri, 11 Oct 2013 14:52:17 +0100
Subject: arm64: big-endian: set correct endianess on kernel entry

The endianness of memory accesses at EL2 and EL1 are configured by
SCTLR_EL2.EE and SCTLR_EL1.EE respectively. When the kernel is booted,
the state of SCTLR_EL{2,1}.EE is unknown, and thus the kernel must
ensure that they are set before performing any memory accesses.

This patch ensures that SCTLR_EL{2,1} are configured appropriately at
boot for kernels of either endianness.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Matthew Leach <matthew.leach@arm.com>
[catalin.marinas@arm.com: fix SCTLR_EL1.E0E bit setting in head.S]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 775ecb3..7009387 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -159,12 +159,22 @@ ENTRY(el2_setup)
 	mrs	x0, CurrentEL
 	cmp	x0, #PSR_MODE_EL2t
 	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne
-	b.eq	1f
+	b.ne	1f
+	mrs	x0, sctlr_el2
+CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2
+CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2
+	msr	sctlr_el2, x0
+	b	2f
+1:	mrs	x0, sctlr_el1
+CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1
+CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
+	msr	sctlr_el1, x0
 	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
+	isb
 	ret
 
 	/* Hyp configuration. */
-1:	mov	x0, #(1 << 31)			// 64-bit EL1
+2:	mov	x0, #(1 << 31)			// 64-bit EL1
 	msr	hcr_el2, x0
 
 	/* Generic timers. */
@@ -181,7 +191,8 @@ ENTRY(el2_setup)
 
 	/* sctlr_el1 */
 	mov	x0, #0x0800			// Set/clear RES{1,0} bits
-	movk	x0, #0x30d0, lsl #16
+CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems
+CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	msr	sctlr_el1, x0
 
 	/* Coprocessor traps. */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index b1b31bb..421b99f 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -162,9 +162,9 @@ ENDPROC(__cpu_setup)
 	 *       CE0      XWHW CZ     ME TEEA S
 	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
 	 * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved
-	 * .... .100 .... 01.1 11.1 ..01 0001 1101 < software settings
+	 * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings
 	 */
 	.type	crval, #object
 crval:
-	.word	0x030802e2			// clear
+	.word	0x000802e2			// clear
 	.word	0x0405d11d			// set
-- 
cgit v0.10.2


From 710be9ac4ea0d2e02a2c4aa625795e65bf3db5b1 Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew.leach@arm.com>
Date: Fri, 11 Oct 2013 14:52:18 +0100
Subject: arm64: big-endian: write CPU holding pen address as LE

Currently when CPUs are brought online via a spin-table, the address
they should jump to is written to the cpu-release-addr in the kernel's
native endianness. As the kernel may switch endianness, secondaries
might read the value byte-reversed from what was intended, and they
would jump to the wrong address.

As the only current arm64 spin-table implementations are
little-endian, stricten up the arm64 spin-table definition such that
the value written to cpu-release-addr is _always_ little-endian
regardless of the endianness of any CPU. If a spinning CPU is
operating big-endian, it must byte-reverse the value before jumping to
handle this.

Signed-off-by: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 27f0836..44c2280 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -72,7 +72,16 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
 		return -ENODEV;
 
 	release_addr = __va(cpu_release_addr[cpu]);
-	release_addr[0] = (void *)__pa(secondary_holding_pen);
+
+	/*
+	 * We write the release address as LE regardless of the native
+	 * endianess of the kernel. Therefore, any boot-loaders that
+	 * read this address need to convert this address to the
+	 * boot-loader's endianess before jumping. This is mandated by
+	 * the boot protocol.
+	 */
+	release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen));
+
 	__flush_dcache_area(release_addr, sizeof(release_addr[0]));
 
 	/*
-- 
cgit v0.10.2


From 4a12cae7ef2612eb094c4b48e8b37cf837e3df55 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 25 Oct 2013 15:48:33 +0100
Subject: arm64: Fix the endianness of arch_spinlock_t

The owner and next members of the arch_spinlock_t structure need to be
swapped when compiling for big endian.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Matthew Leach <matthew.leach@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 8769275..b8d3836 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -23,8 +23,13 @@
 #define TICKET_SHIFT	16
 
 typedef struct {
+#ifdef __AARCH64EB__
+	u16 next;
+	u16 owner;
+#else
 	u16 owner;
 	u16 next;
+#endif
 } __aligned(4) arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
-- 
cgit v0.10.2


From a872013d6d03ab63736a01dcd9747580be3a6b70 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 11 Oct 2013 14:52:19 +0100
Subject: arm64: kconfig: allow CPU_BIG_ENDIAN to be selected

This patch wires up CONFIG_CPU_BIG_ENDIAN for the AArch64 kernel
configuration.

Selecting this option builds a big-endian kernel which can boot into a
big-endian userspace.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 86ebfe4..bbb5d94 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -139,6 +139,11 @@ config ARM64_64K_PAGES
 	  look-up. AArch32 emulation is not available when this feature
 	  is enabled.
 
+config CPU_BIG_ENDIAN
+       bool "Build big-endian kernel"
+       help
+         Say Y if you plan on running a kernel in big-endian mode.
+
 config SMP
 	bool "Symmetric Multi-Processing"
 	select USE_GENERIC_SMP_HELPERS
-- 
cgit v0.10.2


From c019de3de61387d224ba4738e3d196aa24c88844 Mon Sep 17 00:00:00 2001
From: Vinayak Kale <vkale@apm.com>
Date: Fri, 18 Oct 2013 13:59:06 +0100
Subject: arm64: perf: fix event number mask

This patch fixes ARMV8_EVTYPE_* macros since evtCount (event number)
field width is 10bits in event selection register.

Signed-off-by: Vinayak Kale <vkale@apm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cea1594..5d14470 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -784,8 +784,8 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 /*
  * PMXEVTYPER: Event selection reg
  */
-#define	ARMV8_EVTYPE_MASK	0xc80000ff	/* Mask for writable bits */
-#define	ARMV8_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
+#define	ARMV8_EVTYPE_MASK	0xc80003ff	/* Mask for writable bits */
+#define	ARMV8_EVTYPE_EVENT	0x3ff		/* Mask for EVENT bits */
 
 /*
  * Event filters for PMUv3
@@ -1175,7 +1175,8 @@ static void armv8pmu_reset(void *info)
 static int armv8_pmuv3_map_event(struct perf_event *event)
 {
 	return map_cpu_event(event, &armv8_pmuv3_perf_map,
-				&armv8_pmuv3_perf_cache_map, 0xFF);
+				&armv8_pmuv3_perf_cache_map,
+				ARMV8_EVTYPE_EVENT);
 }
 
 static struct arm_pmu armv8pmu = {
-- 
cgit v0.10.2


From d0f38f9130b7683e39611c5a661349e301ee43c8 Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Mon, 7 Oct 2013 18:30:34 +0100
Subject: arm64: update 32-bit kuser helpers to ARMv8

This patch updates the barrier semantics in the kuser helper functions
to take advantage of the ARMv8 additions to AArch32, which are
guaranteed to be available in situations where these functions will be
called.

Note that this slightly changes the cmpxchg functions in that they are
no longer necessarily full barriers if they return 1. However, the
documentation only states they include their own barriers "as needed",
not that they are obligated to act as a full barrier for the caller.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
CC: Matthew Leach <matthew.leach@arm.com>
CC: Dave Martin <dave.martin@arm.com>
CC: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 1e4905d..63c48ff 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -38,33 +38,30 @@ __kuser_cmpxchg64:			// 0xffff0f60
 	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
 	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
 	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
-	.inst	0xf57ff05f		//	dmb		sy
-	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
+	.inst	0xe1b20e9f		// 1:	ldaexd		r0, r1, [r2]
 	.inst	0xe0303004		//	eors		r3, r0, r4
 	.inst	0x00313005		//	eoreqs		r3, r1, r5
-	.inst	0x01a23f96		//	strexdeq	r3, r6, [r2]
+	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffff9		//	beq		1b
-	.inst	0xf57ff05f		//	dmb		sy
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
 	.inst	0xe12fff1e		//	bx		lr
 
 	.align	5
 __kuser_memory_barrier:			// 0xffff0fa0
-	.inst	0xf57ff05f		//	dmb		sy
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe12fff1e		//	bx		lr
 
 	.align	5
 __kuser_cmpxchg:			// 0xffff0fc0
-	.inst	0xf57ff05f		//	dmb		sy
-	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
+	.inst	0xe1923e9f		// 1:	ldaex		r3, [r2]
 	.inst	0xe0533000		//	subs		r3, r3, r0
-	.inst	0x01823f91		//	strexeq	r3, r1, [r2]
+	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffffa		//	beq		1b
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
-	.inst	0xeaffffef		//	b		<__kuser_memory_barrier>
+	.inst	0xe12fff1e		//	bx		lr
 
 	.align	5
 __kuser_get_tls:			// 0xffff0fe0
-- 
cgit v0.10.2


From c04e8e2fe5e0163fd37ccec7f538be8fa63be40e Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Thu, 24 Oct 2013 15:54:17 +0100
Subject: arm64: allow ioremap_cache() to use existing RAM mappings

Some drivers (ACPI notably) use ioremap_cache() to map an area which could
either be outside of kernel RAM or in an already mapped reserved area of
RAM. To avoid aliases with different caching attributes, ioremap() does
not allow RAM to be remapped. But for ioremap_cache(), the existing kernel
mapping may be used.

Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 1d12f89..b56e5b5 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -224,6 +224,7 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
  */
 extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
 extern void __iounmap(volatile void __iomem *addr);
+extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
 #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -233,7 +234,6 @@ extern void __iounmap(volatile void __iomem *addr);
 #define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
-#define ioremap_cached(addr, size)	__ioremap((addr), (size), __pgprot(PROT_NORMAL))
 #define iounmap				__iounmap
 
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF)
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 1725cd6..2bb1d58 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -77,8 +77,24 @@ EXPORT_SYMBOL(__ioremap);
 
 void __iounmap(volatile void __iomem *io_addr)
 {
-	void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
+	unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
 
-	vunmap(addr);
+	/*
+	 * We could get an address outside vmalloc range in case
+	 * of ioremap_cache() reusing a RAM mapping.
+	 */
+	if (VMALLOC_START <= addr && addr < VMALLOC_END)
+		vunmap((void *)addr);
 }
 EXPORT_SYMBOL(__iounmap);
+
+void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
+{
+	/* For normal memory we already have a cacheable mapping. */
+	if (pfn_valid(__phys_to_pfn(phys_addr)))
+		return (void __iomem *)__phys_to_virt(phys_addr);
+
+	return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
-- 
cgit v0.10.2


From 6e15d0e04bfeaa5662a289ee915273307326e45a Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Date: Mon, 21 Oct 2013 13:29:42 +0100
Subject: ARM64: DT: define ARM64 specific arch_match_cpu_phys_id

OF/DT core library provides architecture specific hook to match the
logical cpu index with the corresponding physical identifier.

On ARM64, the MPIDR_EL1 contains specific bitfields(MPIDR_EL1.Aff{3..0})
which uniquely identify a CPU, in addition to some non-identifying
information and reserved bits. The ARM cpu binding defines the 'reg'
property to only contain the affinity bits, and any cpu nodes with other
bits set in their 'reg' entry are skipped.

This patch overrides the weak definition of arch_match_cpu_phys_id
with ARM64 specific version using MPIDR_EL1.Aff{3..0} as cpu physical
identifiers.

Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index fa6faf5..9cf30f4 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -98,6 +98,11 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return phys_id == cpu_logical_map(cpu);
+}
+
 static void __init setup_processor(void)
 {
 	struct cpu_info *cpu_info;
-- 
cgit v0.10.2


From 248f0e7f5f0012fb9c1954e582196aa7f32a0c81 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Date: Wed, 30 Oct 2013 13:47:16 +0000
Subject: ARM64: simplify cpu_read_bootcpu_ops using OF/DT helper

Once the cpu_logical_map for any logical cpu is populated with the
corresponding physical identifier(i.e. mpidr), it's device node can
be retrieved using the DT helper 'of_get_cpu_node'. Currently the
device tree parsing code to get boot cpu node is duplicated in
'cpu_read_bootcpu_ops'.

This patch replaces the code parsing the device tree for the boot
cpu with of_get_cpu_node.

Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index aa0c9e7..e2ba274 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -78,22 +78,10 @@ int __init cpu_read_ops(struct device_node *dn, int cpu)
 
 void __init cpu_read_bootcpu_ops(void)
 {
-	struct device_node *dn = NULL;
-	u64 mpidr = cpu_logical_map(0);
-
-	while ((dn = of_find_node_by_type(dn, "cpu"))) {
-		u64 hwid;
-		const __be32 *prop;
-
-		prop = of_get_property(dn, "reg", NULL);
-		if (!prop)
-			continue;
-
-		hwid = of_read_number(prop, of_n_addr_cells(dn));
-		if (hwid == mpidr) {
-			cpu_read_ops(dn, 0);
-			of_node_put(dn);
-			return;
-		}
+	struct device_node *dn = of_get_cpu_node(0, NULL);
+	if (!dn) {
+		pr_err("Failed to find device node for boot cpu\n");
+		return;
 	}
+	cpu_read_ops(dn, 0);
 }
-- 
cgit v0.10.2


From 264666e62848c19ba9252c80e895ffec9ad1d391 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 31 Oct 2013 16:37:26 +0000
Subject: arm64: Slightly improve the warning on CPU0 enable-method

Commit e8765b265a69 (arm64: read enable-method for CPU0) introduced
checks for the enable method on CPU0 (to be later used with CPU
suspend). However, if the kernel is compiled for UP and a DT file is
used with a method like 'spin-table', Linux complains about 'invalid
enable method'. This patch turns it into an 'unsupported enable method'
warning.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index e2ba274..d62d12f 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -68,8 +68,8 @@ int __init cpu_read_ops(struct device_node *dn, int cpu)
 
 	cpu_ops[cpu] = cpu_get_ops(enable_method);
 	if (!cpu_ops[cpu]) {
-		pr_err("%s: invalid enable-method property: %s\n",
-		       dn->full_name, enable_method);
+		pr_warn("%s: unsupported enable-method property: %s\n",
+			dn->full_name, enable_method);
 		return -EOPNOTSUPP;
 	}
 
-- 
cgit v0.10.2


From 3c620626c0cd4cfca856d70a846398275b48a768 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Mon, 4 Nov 2013 16:38:47 +0000
Subject: arm64: use generic RW_DATA_SECTION macro in linker script

The .data section in the arm64 linker script currently lacks a
definition for page-aligned data. This leads to a .page_aligned
section being placed between the end of data and start of bss.
This patch corrects that by using the generic RW_DATA_SECTION
macro which includes support for page-aligned data.

Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 991ffdd..5161ad9 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -96,30 +96,13 @@ SECTIONS
 	PERCPU_SECTION(64)
 
 	__init_end = .;
-	. = ALIGN(THREAD_SIZE);
-	__data_loc = .;
-
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
-
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(64)
-		READ_MOSTLY_DATA(64)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
-	}
+
+	. = ALIGN(PAGE_SIZE);
+	_data = .;
+	__data_loc = _data - LOAD_OFFSET;
+	_sdata = .;
+	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+	_edata = .;
 	_edata_loc = __data_loc + SIZEOF(.data);
 
 	BSS_SECTION(0, 0, 0)
-- 
cgit v0.10.2


From 7ade67b5984d0a0434462fda733ab5138c63aae1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 4 Nov 2013 16:55:22 +0000
Subject: arm64: move enabling of GIC before CPUs are set online

Commit 53ae3acd (arm64: Only enable local interrupts after the CPU
is marked online) moved the enabling of the GIC after the CPUs are
marked online.

This has some interesting effect:
[...]
[<ffffffc0002eefd8>] gic_raise_softirq+0xf8/0x160
[<ffffffc000088f58>] smp_send_reschedule+0x38/0x40
[<ffffffc0000c8728>] resched_task+0x84/0xc0
[<ffffffc0000c8cdc>] check_preempt_curr+0x58/0x98
[<ffffffc0000c8d38>] ttwu_do_wakeup+0x1c/0xf4
[<ffffffc0000c8f90>] ttwu_do_activate.constprop.84+0x64/0x70
[<ffffffc0000cad30>] try_to_wake_up+0x1d4/0x2b4
[<ffffffc0000cae6c>] default_wake_function+0x10/0x18
[<ffffffc0000c5ca4>] __wake_up_common+0x60/0xa0
[<ffffffc0000c7784>] complete+0x48/0x64
[<ffffffc000088bec>] secondary_start_kernel+0xe8/0x110
[...]

Here, we end-up calling gic_raise_softirq without having initialized
the interrupt controller for this CPU. While this goes unnoticed
with GICv2 (the distributor is always accessible), it explodes with
GICv3.

The fix is to move the call to notify_cpu_starting before we set
the secondary CPU online.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 5b4ddbd..3abb9e7 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -146,6 +146,11 @@ asmlinkage void secondary_start_kernel(void)
 		cpu_ops[cpu]->cpu_postboot();
 
 	/*
+	 * Enable GIC and timers.
+	 */
+	notify_cpu_starting(cpu);
+
+	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
 	 * the CPU migration code to notice that the CPU is online
 	 * before we continue.
@@ -153,11 +158,6 @@ asmlinkage void secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
-	/*
-	 * Enable GIC and timers.
-	 */
-	notify_cpu_starting(cpu);
-
 	local_irq_enable();
 	local_fiq_enable();
 
-- 
cgit v0.10.2


From 717321fcb58ed95169bf344ae47ac6098ba5dfbe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Mon, 4 Nov 2013 20:14:58 +0000
Subject: arm64: fix access to preempt_count from assembly code

preempt_count is defined as an int. Oddly enough, we access it
as a 64bit value. Things become interesting when running a BE
kernel, and looking at the current CPU number, which is stored
as an int next to preempt_count. Like in a per-cpu interrupt
handler, for example...

Using a 32bit access fixes the issue for good.

Cc: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3881fd1..e116614 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -311,14 +311,14 @@ el1_irq:
 #endif
 #ifdef CONFIG_PREEMPT
 	get_thread_info tsk
-	ldr	x24, [tsk, #TI_PREEMPT]		// get preempt count
-	add	x0, x24, #1			// increment it
-	str	x0, [tsk, #TI_PREEMPT]
+	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
+	add	w0, w24, #1			// increment it
+	str	w0, [tsk, #TI_PREEMPT]
 #endif
 	irq_handler
 #ifdef CONFIG_PREEMPT
-	str	x24, [tsk, #TI_PREEMPT]		// restore preempt count
-	cbnz	x24, 1f				// preempt count != 0
+	str	w24, [tsk, #TI_PREEMPT]		// restore preempt count
+	cbnz	w24, 1f				// preempt count != 0
 	ldr	x0, [tsk, #TI_FLAGS]		// get flags
 	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?
 	bl	el1_preempt
@@ -509,15 +509,15 @@ el0_irq_naked:
 #endif
 	get_thread_info tsk
 #ifdef CONFIG_PREEMPT
-	ldr	x24, [tsk, #TI_PREEMPT]		// get preempt count
-	add	x23, x24, #1			// increment it
-	str	x23, [tsk, #TI_PREEMPT]
+	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
+	add	w23, w24, #1			// increment it
+	str	w23, [tsk, #TI_PREEMPT]
 #endif
 	irq_handler
 #ifdef CONFIG_PREEMPT
-	ldr	x0, [tsk, #TI_PREEMPT]
-	str	x24, [tsk, #TI_PREEMPT]
-	cmp	x0, x23
+	ldr	w0, [tsk, #TI_PREEMPT]
+	str	w24, [tsk, #TI_PREEMPT]
+	cmp	w0, w23
 	b.eq	1f
 	mov	x1, #0
 	str	x1, [x1]			// BUG
-- 
cgit v0.10.2


From dab7ea360967f26337be1f26f56b12771da8c731 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 5 Nov 2013 10:03:53 +0000
Subject: arm64: defconfig: Enable CONFIG_PREEMPT by default

This way we can spot early bugs when just testing with the default
config.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 31c81e9..84139be 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -26,7 +26,7 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_XGENE=y
 CONFIG_SMP=y
-CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_PREEMPT=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
-- 
cgit v0.10.2


From 122e2fa0d310d262cb85cf0b003032e5d2bc2ae7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 5 Nov 2013 10:16:52 +0000
Subject: arm64: module: ensure instruction is little-endian before
 manipulation

Relocations that require an instruction immediate to be re-encoded must
ensure that the instruction pattern is represented in a little-endian
format for the manipulation code to work correctly.

This patch converts the loaded instruction into native-endianess prior
to encoding and then converts back to little-endian byteorder before
updating memory.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Matthew Leach <matthew.leach@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index ca0e3d5..2c28a6c 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -111,6 +111,9 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)
 	u32 immlo, immhi, lomask, himask, mask;
 	int shift;
 
+	/* The instruction stream is always little endian. */
+	insn = le32_to_cpu(insn);
+
 	switch (type) {
 	case INSN_IMM_MOVNZ:
 		/*
@@ -179,7 +182,7 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)
 	insn &= ~(mask << shift);
 	insn |= (imm & mask) << shift;
 
-	return insn;
+	return cpu_to_le32(insn);
 }
 
 static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
-- 
cgit v0.10.2


From 847264fb7e73ade5b5e4b6eea3daa243a1f5217e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 23 Oct 2013 16:50:07 +0100
Subject: arm64: Use 42-bit address space with 64K pages

This patch expands the VA_BITS to 42 when the 64K page configuration is
enabled allowing 2TB kernel linear mapping. Linux still uses 2 levels of
page tables in this configuration with pgd now being a full page.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index f28899d..5e054bf 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -21,7 +21,7 @@ The swapper_pgd_dir address is written to TTBR1 and never written to
 TTBR0.
 
 
-AArch64 Linux memory layout:
+AArch64 Linux memory layout with 4KB pages:
 
 Start			End			Size		Use
 -----------------------------------------------------------------------
@@ -46,6 +46,31 @@ ffffffbffc000000	ffffffbfffffffff	  64MB		modules
 ffffffc000000000	ffffffffffffffff	 256GB		kernel logical memory map
 
 
+AArch64 Linux memory layout with 64KB pages:
+
+Start			End			Size		Use
+-----------------------------------------------------------------------
+0000000000000000	000003ffffffffff	   4TB		user
+
+fffffc0000000000	fffffdfbfffeffff	  ~2TB		vmalloc
+
+fffffdfbffff0000	fffffdfbffffffff	  64KB		[guard page]
+
+fffffdfc00000000	fffffdfdffffffff	   8GB		vmemmap
+
+fffffdfe00000000	fffffdfffbbfffff	  ~8GB		[guard, future vmmemap]
+
+fffffdfffbc00000	fffffdfffbdfffff	   2MB		earlyprintk device
+
+fffffdfffbe00000	fffffdfffbe0ffff	  64KB		PCI I/O space
+
+fffffdfffbe10000	fffffdfffbffffff	  ~2MB		[guard]
+
+fffffdfffc000000	fffffdffffffffff	  64MB		modules
+
+fffffe0000000000	ffffffffffffffff	   2TB		kernel logical memory map
+
+
 Translation table lookup with 4KB pages:
 
 +--------+--------+--------+--------+--------+--------+--------+--------+
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 20925bc..3776217 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -33,18 +33,23 @@
 #define UL(x) _AC(x, UL)
 
 /*
- * PAGE_OFFSET - the virtual address of the start of the kernel image.
+ * PAGE_OFFSET - the virtual address of the start of the kernel image (top
+ *		 (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  * The module space lives between the addresses given by TASK_SIZE
  * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
-#define PAGE_OFFSET		UL(0xffffffc000000000)
+#ifdef CONFIG_ARM64_64K_PAGES
+#define VA_BITS			(42)
+#else
+#define VA_BITS			(39)
+#endif
+#define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
 #define MODULES_END		(PAGE_OFFSET)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
 #define EARLYCON_IOBASE		(MODULES_VADDR - SZ_4M)
-#define VA_BITS			(39)
 #define TASK_SIZE_64		(UL(1) << VA_BITS)
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h
index 0a8ed3f..2593b49 100644
--- a/arch/arm64/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-2level-hwdef.h
@@ -21,10 +21,10 @@
  * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not
  * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each
  * entry representing 512MB. The user and kernel address spaces are limited to
- * 512GB and therefore we only use 1024 entries in the PGD.
+ * 4TB in the 64KB page configuration.
  */
 #define PTRS_PER_PTE		8192
-#define PTRS_PER_PGD		1024
+#define PTRS_PER_PGD		8192
 
 /*
  * PGDIR_SHIFT determines the size a top-level page table entry can map.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index f0bebc5..17bd3af 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -33,7 +33,7 @@
 /*
  * VMALLOC and SPARSEMEM_VMEMMAP ranges.
  */
-#define VMALLOC_START		UL(0xffffff8000000000)
+#define VMALLOC_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define VMALLOC_END		(PAGE_OFFSET - UL(0x400000000) - SZ_64K)
 
 #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
-- 
cgit v0.10.2


From aa62c2091129af81a172350b718eb35d5448cebc Mon Sep 17 00:00:00 2001
From: "T.J. Purtell" <tj@mobisocial.us>
Date: Tue, 5 Nov 2013 17:07:18 +0000
Subject: arm64: compat: Clear the IT state independent of the 32-bit ARM or
 Thumb-2 mode

The ARM architecture reference specifies that the IT state bits in the
PSR must be all zeros in ARM mode or behavior is unspecified. If an ARM
function is registered as a signal handler, and that signal is delivered
inside a block of instructions following an IT instruction, some of the
instructions at the beginning of the signal handler may be skipped if
the IT state bits of the Program Status Register are not cleared by the
kernel.

Signed-off-by: T.J. Purtell <tj@mobisocial.us>
[catalin.marinas@arm.com: code comment and commit log updated]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e8772c0..e51bbe7 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -446,12 +446,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
 	/* Check if the handler is written for ARM or Thumb */
 	thumb = handler & 1;
 
-	if (thumb) {
+	if (thumb)
 		spsr |= COMPAT_PSR_T_BIT;
-		spsr &= ~COMPAT_PSR_IT_MASK;
-	} else {
+	else
 		spsr &= ~COMPAT_PSR_T_BIT;
-	}
+
+	/* The IT state must be cleared for both ARM and Thumb-2 */
+	spsr &= ~COMPAT_PSR_IT_MASK;
 
 	if (ka->sa.sa_flags & SA_RESTORER) {
 		retcode = ptr_to_compat(ka->sa.sa_restorer);
-- 
cgit v0.10.2


From 18ea3dbc9e5c8a53a361b17c4a5676ea6f4bcb72 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 5 Nov 2013 18:29:45 +0000
Subject: arm64: KVM: initialize HYP mode following the kernel endianness

Force SCTLR_EL2.EE to 1 if the kernel is compiled as BE.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index ba84e67..2b0244d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -74,7 +74,10 @@ __do_hyp_init:
 	msr	mair_el2, x4
 	isb
 
-	mov	x4, #SCTLR_EL2_FLAGS
+	mrs	x4, sctlr_el2
+	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_EL2_FLAGS
+	orr	x4, x4, x5
 	msr	sctlr_el2, x4
 	isb
 
-- 
cgit v0.10.2


From c5b2c0f5203b3bc678a8967daedf7114029975ae Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 5 Nov 2013 18:29:46 +0000
Subject: arm64: KVM: vgic: byteswap GICv2 access on world switch if BE

Ensure that accesses to the GICH_* registers are byteswapped
when the kernel is compiled as big-endian.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1ac0bbb..3b47c36 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -403,6 +403,14 @@ __kvm_hyp_code_start:
 	ldr	w9, [x2, #GICH_ELRSR0]
 	ldr	w10, [x2, #GICH_ELRSR1]
 	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
 
 	str	w4, [x3, #VGIC_CPU_HCR]
 	str	w5, [x3, #VGIC_CPU_VMCR]
@@ -421,6 +429,7 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
 	add	x3, x3, #VGIC_CPU_LR
 1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
 	str	w5, [x3], #4
 	sub	w4, w4, #1
 	cbnz	w4, 1b
@@ -446,6 +455,9 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_HCR]
 	ldr	w5, [x3, #VGIC_CPU_VMCR]
 	ldr	w6, [x3, #VGIC_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
 
 	str	w4, [x2, #GICH_HCR]
 	str	w5, [x2, #GICH_VMCR]
@@ -456,6 +468,7 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
 	add	x3, x3, #VGIC_CPU_LR
 1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
 	str	w5, [x2], #4
 	sub	w4, w4, #1
 	cbnz	w4, 1b
-- 
cgit v0.10.2


From 61c77e0802719efce8966619cdc4234de7f252c1 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 6 Nov 2013 11:42:41 +0000
Subject: arm64: locks: Remove CONFIG_GENERIC_LOCKBREAK

Commit 52ea2a560a9d (arm64: locks: introduce ticket-based spinlock
implementation) introduces the arch_spin_is_contended() function making
CONFIG_GENERIC_LOCKBREAK unnecessary.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bbb5d94..ce6ae94 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -62,10 +62,6 @@ config LOCKDEP_SUPPORT
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config GENERIC_LOCKBREAK
-	def_bool y
-	depends on SMP && PREEMPT
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
-- 
cgit v0.10.2


From 67317c2689567c24d18e0dd43ab6d409fd42dc6e Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Date: Thu, 7 Nov 2013 15:25:44 +0000
Subject: ARM64: /proc/interrupts: display IPIs of online CPUs only

The non-IPI interrupts are displayed only for the online cpus from
show_interrupts in kernel/irq/proc.c before calling arch_show_interrupts().
As a result, the column headers and the IPI count don't match if any
CPU is offline.

This patch fixes show_ipi_list to display IPIs for online CPUs only.

Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3abb9e7..a5aeefa 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -455,7 +455,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 	for (i = 0; i < NR_IPI; i++) {
 		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE,
 			   prec >= 4 ? " " : "");
-		for_each_present_cpu(cpu)
+		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ",
 				   __get_irq_stat(cpu, ipi_irqs[i]));
 		seq_printf(p, "      %s\n", ipi_types[i]);
-- 
cgit v0.10.2