From c7db4ff5d2b459a579d348532a92fd5885520ce6 Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Tue, 28 Jan 2014 11:20:17 +0000
Subject: arm64: Add macros to manage processor debug state

Add macros to enable and disable to manage PSTATE.D
for debugging. The macros local_dbg_save and local_dbg_restore
are moved to irqflags.h file

KGDB boot tests fail because of PSTATE.D is masked.
unmask it for debugging support

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 6231479..ee9f28e 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -43,23 +43,6 @@ enum debug_el {
 #ifndef __ASSEMBLY__
 struct task_struct;
 
-#define local_dbg_save(flags)							\
-	do {									\
-		typecheck(unsigned long, flags);				\
-		asm volatile(							\
-		"mrs	%0, daif			// local_dbg_save\n"	\
-		"msr	daifset, #8"						\
-		: "=r" (flags) : : "memory");					\
-	} while (0)
-
-#define local_dbg_restore(flags)						\
-	do {									\
-		typecheck(unsigned long, flags);				\
-		asm volatile(							\
-		"msr	daif, %0			// local_dbg_restore\n"	\
-		: : "r" (flags) : "memory");					\
-	} while (0)
-
 #define DBG_ARCH_ID_RESERVED	0	/* In case of ptrace ABI updates. */
 
 #define DBG_HOOK_HANDLED	0
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index b2fcfbc..11cc941 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -90,5 +90,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
 	return flags & PSR_I_BIT;
 }
 
+/*
+ * save and restore debug state
+ */
+#define local_dbg_save(flags)						\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		asm volatile(						\
+		"mrs    %0, daif		// local_dbg_save\n"	\
+		"msr    daifset, #8"					\
+		: "=r" (flags) : : "memory");				\
+	} while (0)
+
+#define local_dbg_restore(flags)					\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		asm volatile(						\
+		"msr    daif, %0		// local_dbg_restore\n"	\
+		: : "r" (flags) : "memory");				\
+	} while (0)
+
+#define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
+#define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
+
 #endif
 #endif
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 636ba8b..1a8127d 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -138,6 +138,7 @@ static void clear_os_lock(void *unused)
 {
 	asm volatile("msr oslar_el1, %0" : : "r" (0));
 	isb();
+	local_dbg_enable();
 }
 
 static int os_lock_notify(struct notifier_block *self,
-- 
cgit v0.10.2


From bcf5763b0d58d20e288ac52f96cbd7788e262cac Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Tue, 28 Jan 2014 16:50:18 +0530
Subject: arm64: KGDB: Add Basic KGDB support

Add KGDB debug support for kernel debugging.
With this patch, basic KGDB debugging is possible.GDB register
layout is updated and GDB tool can establish connection with
target and can set/clear breakpoints.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index ee9f28e..6e9b5b3 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -26,6 +26,53 @@
 #define DBG_ESR_EVT_HWWP	0x2
 #define DBG_ESR_EVT_BRK		0x6
 
+/*
+ * Break point instruction encoding
+ */
+#define BREAK_INSTR_SIZE		4
+
+/*
+ * ESR values expected for dynamic and compile time BRK instruction
+ */
+#define DBG_ESR_VAL_BRK(x)	(0xf2000000 | ((x) & 0xfffff))
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgbd are 0x400 - 0x7ff
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_IMM		0x400
+#define KDBG_COMPILED_DBG_BRK_IMM	0x401
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON	0xd4200000
+
+/*
+ * Extract byte from BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \
+	((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff)
+
+/*
+ * Extract byte from BRK #imm16
+ */
+#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \
+	(((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
+
+#define KGDB_DYN_DGB_BRK_BYTE(x) \
+	(KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x))
+
+#define  KGDB_DYN_BRK_INS_BYTE0  KGDB_DYN_DGB_BRK_BYTE(0)
+#define  KGDB_DYN_BRK_INS_BYTE1  KGDB_DYN_DGB_BRK_BYTE(1)
+#define  KGDB_DYN_BRK_INS_BYTE2  KGDB_DYN_DGB_BRK_BYTE(2)
+#define  KGDB_DYN_BRK_INS_BYTE3  KGDB_DYN_DGB_BRK_BYTE(3)
+
+#define CACHE_FLUSH_IS_SAFE		1
+
 enum debug_el {
 	DBG_ACTIVE_EL0 = 0,
 	DBG_ACTIVE_EL1,
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
new file mode 100644
index 0000000..3c8aafc
--- /dev/null
+++ b/arch/arm64/include/asm/kgdb.h
@@ -0,0 +1,84 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/include/kgdb.h
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KGDB_H
+#define __ARM_KGDB_H
+
+#include <linux/ptrace.h>
+#include <asm/debug-monitors.h>
+
+#ifndef	__ASSEMBLY__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM));
+}
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * gdb is expecting the following registers layout.
+ *
+ * General purpose regs:
+ *     r0-r30: 64 bit
+ *     sp,pc : 64 bit
+ *     pstate  : 64 bit
+ *     Total: 34
+ * FPU regs:
+ *     f0-f31: 128 bit
+ *     Total: 32
+ * Extra regs
+ *     fpsr & fpcr: 32 bit
+ *     Total: 2
+ *
+ */
+
+#define _GP_REGS		34
+#define _FP_REGS		32
+#define _EXTRA_REGS		2
+/*
+ * general purpose registers size in bytes.
+ * pstate is only 4 bytes. subtract 4 bytes
+ */
+#define GP_REG_BYTES		(_GP_REGS * 8)
+#define DBG_MAX_REG_NUM		(_GP_REGS + _FP_REGS + _EXTRA_REGS)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX			2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+
+#define NUMREGBYTES	((_GP_REGS * 8) + (_FP_REGS * 16) + \
+			(_EXTRA_REGS * 4))
+
+#endif /* __ASM_KGDB_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2d4554b..e52bcdc 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -20,6 +20,7 @@ arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
+arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
new file mode 100644
index 0000000..4b7a569
--- /dev/null
+++ b/arch/arm64/kernel/kgdb.c
@@ -0,0 +1,288 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/kernel/kgdb.c
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <asm/traps.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ "x0", 8, offsetof(struct pt_regs, regs[0])},
+	{ "x1", 8, offsetof(struct pt_regs, regs[1])},
+	{ "x2", 8, offsetof(struct pt_regs, regs[2])},
+	{ "x3", 8, offsetof(struct pt_regs, regs[3])},
+	{ "x4", 8, offsetof(struct pt_regs, regs[4])},
+	{ "x5", 8, offsetof(struct pt_regs, regs[5])},
+	{ "x6", 8, offsetof(struct pt_regs, regs[6])},
+	{ "x7", 8, offsetof(struct pt_regs, regs[7])},
+	{ "x8", 8, offsetof(struct pt_regs, regs[8])},
+	{ "x9", 8, offsetof(struct pt_regs, regs[9])},
+	{ "x10", 8, offsetof(struct pt_regs, regs[10])},
+	{ "x11", 8, offsetof(struct pt_regs, regs[11])},
+	{ "x12", 8, offsetof(struct pt_regs, regs[12])},
+	{ "x13", 8, offsetof(struct pt_regs, regs[13])},
+	{ "x14", 8, offsetof(struct pt_regs, regs[14])},
+	{ "x15", 8, offsetof(struct pt_regs, regs[15])},
+	{ "x16", 8, offsetof(struct pt_regs, regs[16])},
+	{ "x17", 8, offsetof(struct pt_regs, regs[17])},
+	{ "x18", 8, offsetof(struct pt_regs, regs[18])},
+	{ "x19", 8, offsetof(struct pt_regs, regs[19])},
+	{ "x20", 8, offsetof(struct pt_regs, regs[20])},
+	{ "x21", 8, offsetof(struct pt_regs, regs[21])},
+	{ "x22", 8, offsetof(struct pt_regs, regs[22])},
+	{ "x23", 8, offsetof(struct pt_regs, regs[23])},
+	{ "x24", 8, offsetof(struct pt_regs, regs[24])},
+	{ "x25", 8, offsetof(struct pt_regs, regs[25])},
+	{ "x26", 8, offsetof(struct pt_regs, regs[26])},
+	{ "x27", 8, offsetof(struct pt_regs, regs[27])},
+	{ "x28", 8, offsetof(struct pt_regs, regs[28])},
+	{ "x29", 8, offsetof(struct pt_regs, regs[29])},
+	{ "x30", 8, offsetof(struct pt_regs, regs[30])},
+	{ "sp", 8, offsetof(struct pt_regs, sp)},
+	{ "pc", 8, offsetof(struct pt_regs, pc)},
+	{ "pstate", 8, offsetof(struct pt_regs, pstate)},
+	{ "v0", 16, -1 },
+	{ "v1", 16, -1 },
+	{ "v2", 16, -1 },
+	{ "v3", 16, -1 },
+	{ "v4", 16, -1 },
+	{ "v5", 16, -1 },
+	{ "v6", 16, -1 },
+	{ "v7", 16, -1 },
+	{ "v8", 16, -1 },
+	{ "v9", 16, -1 },
+	{ "v10", 16, -1 },
+	{ "v11", 16, -1 },
+	{ "v12", 16, -1 },
+	{ "v13", 16, -1 },
+	{ "v14", 16, -1 },
+	{ "v15", 16, -1 },
+	{ "v16", 16, -1 },
+	{ "v17", 16, -1 },
+	{ "v18", 16, -1 },
+	{ "v19", 16, -1 },
+	{ "v20", 16, -1 },
+	{ "v21", 16, -1 },
+	{ "v22", 16, -1 },
+	{ "v23", 16, -1 },
+	{ "v24", 16, -1 },
+	{ "v25", 16, -1 },
+	{ "v26", 16, -1 },
+	{ "v27", 16, -1 },
+	{ "v28", 16, -1 },
+	{ "v29", 16, -1 },
+	{ "v30", 16, -1 },
+	{ "v31", 16, -1 },
+	{ "fpsr", 4, -1 },
+	{ "fpcr", 4, -1 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+		       dbg_reg_def[regno].size);
+	else
+		memset(mem, 0, dbg_reg_def[regno].size);
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+		       dbg_reg_def[regno].size);
+	return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+	struct pt_regs *thread_regs;
+
+	/* Initialize to zero */
+	memset((char *)gdb_regs, 0, NUMREGBYTES);
+	thread_regs = task_pt_regs(task);
+	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->pc = pc;
+}
+
+static int compiled_break;
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+			       int err_code, char *remcom_in_buffer,
+			       char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	unsigned long addr;
+	char *ptr;
+	int err;
+
+	switch (remcom_in_buffer[0]) {
+	case 'D':
+	case 'k':
+		/*
+		 * Packet D (Detach), k (kill). No special handling
+		 * is required here. Handle same as c packet.
+		 */
+	case 'c':
+		/*
+		 * Packet c (Continue) to continue executing.
+		 * Set pc to required address.
+		 * Try to read optional parameter and set pc.
+		 * If this was a compiled breakpoint, we need to move
+		 * to the next instruction else we will just breakpoint
+		 * over and over again.
+		 */
+		ptr = &remcom_in_buffer[1];
+		if (kgdb_hex2long(&ptr, &addr))
+			kgdb_arch_set_pc(linux_regs, addr);
+		else if (compiled_break == 1)
+			kgdb_arch_set_pc(linux_regs, linux_regs->pc + 4);
+
+		compiled_break = 0;
+		err = 0;
+		break;
+	default:
+		err = -1;
+	}
+	return err;
+}
+
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+	return 0;
+}
+
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	compiled_break = 1;
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+	return 0;
+}
+
+static struct break_hook kgdb_brkpt_hook = {
+	.esr_mask	= 0xffffffff,
+	.esr_val	= DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM),
+	.fn		= kgdb_brk_fn
+};
+
+static struct break_hook kgdb_compiled_brkpt_hook = {
+	.esr_mask	= 0xffffffff,
+	.esr_val	= DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM),
+	.fn		= kgdb_compiled_brk_fn
+};
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	if (kgdb_handle_exception(1, args->signr, cmd, regs))
+		return NOTIFY_DONE;
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call	= kgdb_notify,
+	/*
+	 * Want to be lowest priority
+	 */
+	.priority	= -INT_MAX,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	int ret = register_die_notifier(&kgdb_notifier);
+
+	if (ret != 0)
+		return ret;
+
+	register_break_hook(&kgdb_brkpt_hook);
+	register_break_hook(&kgdb_compiled_brkpt_hook);
+	return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_break_hook(&kgdb_brkpt_hook);
+	unregister_break_hook(&kgdb_compiled_brkpt_hook);
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * ARM instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr = {
+		KGDB_DYN_BRK_INS_BYTE0,
+		KGDB_DYN_BRK_INS_BYTE1,
+		KGDB_DYN_BRK_INS_BYTE2,
+		KGDB_DYN_BRK_INS_BYTE3,
+	}
+};
-- 
cgit v0.10.2


From 44679a4f142b69ae0c68ed815a48bbd164827281 Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Tue, 28 Jan 2014 11:20:19 +0000
Subject: arm64: KGDB: Add step debugging support

Add KGDB software step debugging support for EL1 debug
in AArch64 mode.

KGDB registers step debug handler with debug monitor.
On receiving 'step' command from GDB tool, target enables
software step debugging and step address is updated in ELR.

Software Step debugging is disabled when 'continue' command
is received

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 4b7a569..75c9cf1 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -137,13 +137,26 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
 
 static int compiled_break;
 
+static void kgdb_arch_update_addr(struct pt_regs *regs,
+				char *remcom_in_buffer)
+{
+	unsigned long addr;
+	char *ptr;
+
+	ptr = &remcom_in_buffer[1];
+	if (kgdb_hex2long(&ptr, &addr))
+		kgdb_arch_set_pc(regs, addr);
+	else if (compiled_break == 1)
+		kgdb_arch_set_pc(regs, regs->pc + 4);
+
+	compiled_break = 0;
+}
+
 int kgdb_arch_handle_exception(int exception_vector, int signo,
 			       int err_code, char *remcom_in_buffer,
 			       char *remcom_out_buffer,
 			       struct pt_regs *linux_regs)
 {
-	unsigned long addr;
-	char *ptr;
 	int err;
 
 	switch (remcom_in_buffer[0]) {
@@ -162,13 +175,36 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
 		 * to the next instruction else we will just breakpoint
 		 * over and over again.
 		 */
-		ptr = &remcom_in_buffer[1];
-		if (kgdb_hex2long(&ptr, &addr))
-			kgdb_arch_set_pc(linux_regs, addr);
-		else if (compiled_break == 1)
-			kgdb_arch_set_pc(linux_regs, linux_regs->pc + 4);
+		kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+		kgdb_single_step =  0;
+
+		/*
+		 * Received continue command, disable single step
+		 */
+		if (kernel_active_single_step())
+			kernel_disable_single_step();
+
+		err = 0;
+		break;
+	case 's':
+		/*
+		 * Update step address value with address passed
+		 * with step packet.
+		 * On debug exception return PC is copied to ELR
+		 * So just update PC.
+		 * If no step address is passed, resume from the address
+		 * pointed by PC. Do not update PC
+		 */
+		kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+		atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+		kgdb_single_step =  1;
 
-		compiled_break = 0;
+		/*
+		 * Enable single step handling
+		 */
+		if (!kernel_active_single_step())
+			kernel_enable_single_step(linux_regs);
 		err = 0;
 		break;
 	default:
@@ -191,6 +227,12 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
 	return 0;
 }
 
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+	return 0;
+}
+
 static struct break_hook kgdb_brkpt_hook = {
 	.esr_mask	= 0xffffffff,
 	.esr_val	= DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM),
@@ -203,6 +245,10 @@ static struct break_hook kgdb_compiled_brkpt_hook = {
 	.fn		= kgdb_compiled_brk_fn
 };
 
+static struct step_hook kgdb_step_hook = {
+	.fn		= kgdb_step_brk_fn
+};
+
 static void kgdb_call_nmi_hook(void *ignored)
 {
 	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
@@ -259,6 +305,7 @@ int kgdb_arch_init(void)
 
 	register_break_hook(&kgdb_brkpt_hook);
 	register_break_hook(&kgdb_compiled_brkpt_hook);
+	register_step_hook(&kgdb_step_hook);
 	return 0;
 }
 
@@ -271,6 +318,7 @@ void kgdb_arch_exit(void)
 {
 	unregister_break_hook(&kgdb_brkpt_hook);
 	unregister_break_hook(&kgdb_compiled_brkpt_hook);
+	unregister_step_hook(&kgdb_step_hook);
 	unregister_die_notifier(&kgdb_notifier);
 }
 
-- 
cgit v0.10.2


From d498d4b47fb3050f2f7840cc49251f87f04d1ca9 Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Tue, 28 Jan 2014 16:50:20 +0530
Subject: KGDB: make kgdb_breakpoint() as noinline

The function kgdb_breakpoint() sets up break point at
compile time by calling arch_kgdb_breakpoint();
Though this call is surrounded by wmb() barrier,
the compile can still re-order the break point,
because this scheduling barrier is not a code motion
barrier in gcc.

Making kgdb_breakpoint() as noinline solves this problem
of code reording around break point instruction and also
avoids problem of being called as inline function from
other places

More details about discussion on this can be found here
http://comments.gmane.org/gmane.linux.ports.arm.kernel/269732

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 334b398..99982a7 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -1035,7 +1035,7 @@ int dbg_io_get_char(void)
  * otherwise as a quick means to stop program execution and "break" into
  * the debugger.
  */
-void kgdb_breakpoint(void)
+noinline void kgdb_breakpoint(void)
 {
 	atomic_inc(&kgdb_setting_breakpoint);
 	wmb(); /* Sync point before breakpoint */
-- 
cgit v0.10.2


From 58dcc204f18af2821f683b235bb376f9db2557f5 Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Tue, 28 Jan 2014 16:50:21 +0530
Subject: misc: debug: remove compilation warnings

typecast instruction_pointer macro to unsigned long to
resolve following compiler warnings like
warning: format '%lx' expects argument of type 'long unsigned int',
but argument 2 has type 'u64' [-Wformat]

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 0e7fa49..5233966 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -164,7 +164,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs)
 	return 0;
 }
 
-#define instruction_pointer(regs)	(regs)->pc
+#define instruction_pointer(regs)	((unsigned long)(regs)->pc)
 
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
-- 
cgit v0.10.2


From 9529247db9ecfc5a723e17093614e7437ab0d5bd Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Tue, 28 Jan 2014 11:20:22 +0000
Subject: arm64: KGDB: Add KGDB config

Add HAVE_ARCH_KGDB for arm64 Kconfig

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 27bbcfc..0cb33a2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -26,6 +26,7 @@ config ARM64
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
-- 
cgit v0.10.2


From d8ed442a009ecfe155b57d58f231db3d6084633d Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Date: Fri, 21 Feb 2014 05:13:49 +0000
Subject: arm64: enable processor debug state for secondary cpus

processor debug state PSTATE.D is unmasked in smp call
clear_os_lock for secondary cpus. So debug state is still
masked in normal kernel context.  With this patch, unmask
debug state on secondary boot for the cpus in normal kernel
context. Now kgdb tests passed with multicore.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 1a8127d..13f87de 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -137,8 +137,6 @@ void disable_debug_monitors(enum debug_el el)
 static void clear_os_lock(void *unused)
 {
 	asm volatile("msr oslar_el1, %0" : : "r" (0));
-	isb();
-	local_dbg_enable();
 }
 
 static int os_lock_notify(struct notifier_block *self,
@@ -157,8 +155,9 @@ static struct notifier_block os_lock_nb = {
 static int debug_monitors_init(void)
 {
 	/* Clear the OS lock. */
-	smp_call_function(clear_os_lock, NULL, 1);
-	clear_os_lock(NULL);
+	on_each_cpu(clear_os_lock, NULL, 1);
+	isb();
+	local_dbg_enable();
 
 	/* Register hotplug handler. */
 	register_cpu_notifier(&os_lock_nb);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 7cfb92a..5070dc3 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -160,6 +160,7 @@ asmlinkage void secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
+	local_dbg_enable();
 	local_irq_enable();
 	local_async_enable();
 
-- 
cgit v0.10.2


From 22bd1c91fe13d59cff734b69b6757adcfbd8dee9 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 4 Feb 2014 16:37:59 +0000
Subject: arm64: Extend the PCI I/O space to 16MB

The patch moves the PCI I/O space (currently at 64K) before the
earlyprintk mapping and extends it to 16MB.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 5e054bf..85e24c4 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -35,11 +35,13 @@ ffffffbc00000000	ffffffbdffffffff	   8GB		vmemmap
 
 ffffffbe00000000	ffffffbffbbfffff	  ~8GB		[guard, future vmmemap]
 
-ffffffbffbc00000	ffffffbffbdfffff	   2MB		earlyprintk device
+ffffffbffa000000	ffffffbffaffffff	  16MB		PCI I/O space
+
+ffffffbffb000000	ffffffbffbbfffff	  12MB		[guard]
 
-ffffffbffbe00000	ffffffbffbe0ffff	  64KB		PCI I/O space
+ffffffbffbc00000	ffffffbffbdfffff	   2MB		earlyprintk device
 
-ffffffbffbe10000	ffffffbcffffffff	  ~2MB		[guard]
+ffffffbffbe00000	ffffffbffbffffff	   2MB		[guard]
 
 ffffffbffc000000	ffffffbfffffffff	  64MB		modules
 
@@ -60,11 +62,13 @@ fffffdfc00000000	fffffdfdffffffff	   8GB		vmemmap
 
 fffffdfe00000000	fffffdfffbbfffff	  ~8GB		[guard, future vmmemap]
 
-fffffdfffbc00000	fffffdfffbdfffff	   2MB		earlyprintk device
+fffffdfffa000000	fffffdfffaffffff	  16MB		PCI I/O space
+
+fffffdfffb000000	fffffdfffbbfffff	  12MB		[guard]
 
-fffffdfffbe00000	fffffdfffbe0ffff	  64KB		PCI I/O space
+fffffdfffbc00000	fffffdfffbdfffff	   2MB		earlyprintk device
 
-fffffdfffbe10000	fffffdfffbffffff	  ~2MB		[guard]
+fffffdfffbe00000	fffffdfffbffffff	   2MB		[guard]
 
 fffffdfffc000000	fffffdffffffffff	  64MB		modules
 
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4cc813e..7846a6b 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -121,7 +121,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  *  I/O port access primitives.
  */
 #define IO_SPACE_LIMIT		0xffff
-#define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_2M))
+#define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_32M))
 
 static inline u8 inb(unsigned long addr)
 {
-- 
cgit v0.10.2


From 020c1427f3ad73c86fd566d86441e673adfe2ecb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 11 Feb 2014 15:22:01 +0000
Subject: arm64: Convert asm/tlb.h to generic mmu_gather

Over the past couple of years, the generic mmu_gather gained range
tracking - 597e1c3580b7 (mm/mmu_gather: enable tlb flush range in generic
mmu_gather), 2b047252d087 (Fix TLB gather virtual address range
invalidation corner cases) - and tlb_fast_mode() has been removed -
29eb77825cc7 (arch, mm: Remove tlb_fast_mode()).

The new mmu_gather structure is now suitable for arm64 and this patch
converts the arch asm/tlb.h to the generic code. One functional
difference is the shift_arg_pages() case where previously the code was
flushing the full mm (no tlb_start_vma call) but now it flushes the
range given to tlb_gather_mmu() (possibly slightly more efficient
previously).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 717031a..72cadf5 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -19,115 +19,44 @@
 #ifndef __ASM_TLB_H
 #define __ASM_TLB_H
 
-#include <linux/pagemap.h>
-#include <linux/swap.h>
 
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-
-#define MMU_GATHER_BUNDLE	8
-
-/*
- * TLB handling.  This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
-	struct mm_struct	*mm;
-	unsigned int		fullmm;
-	struct vm_area_struct	*vma;
-	unsigned long		start, end;
-	unsigned long		range_start;
-	unsigned long		range_end;
-	unsigned int		nr;
-	unsigned int		max;
-	struct page		**pages;
-	struct page		*local[MMU_GATHER_BUNDLE];
-};
+#include <asm-generic/tlb.h>
 
 /*
- * This is unnecessarily complex.  There's three ways the TLB shootdown
- * code is used:
+ * There's three ways the TLB shootdown code is used:
  *  1. Unmapping a range of vmas.  See zap_page_range(), unmap_region().
  *     tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- *     tlb->vma will be non-NULL.
  *  2. Unmapping all vmas.  See exit_mmap().
  *     tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- *     tlb->vma will be non-NULL.  Additionally, page tables will be freed.
+ *     Page tables will be freed.
  *  3. Unmapping argument pages.  See shift_arg_pages().
  *     tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- *     tlb->vma will be NULL.
  */
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	if (tlb->fullmm || !tlb->vma)
+	if (tlb->fullmm) {
 		flush_tlb_mm(tlb->mm);
-	else if (tlb->range_end > 0) {
-		flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
-		tlb->range_start = TASK_SIZE;
-		tlb->range_end = 0;
+	} else if (tlb->end > 0) {
+		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+		flush_tlb_range(&vma, tlb->start, tlb->end);
+		tlb->start = TASK_SIZE;
+		tlb->end = 0;
 	}
 }
 
 static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
 {
 	if (!tlb->fullmm) {
-		if (addr < tlb->range_start)
-			tlb->range_start = addr;
-		if (addr + PAGE_SIZE > tlb->range_end)
-			tlb->range_end = addr + PAGE_SIZE;
-	}
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
-	if (addr) {
-		tlb->pages = (void *)addr;
-		tlb->max = PAGE_SIZE / sizeof(struct page *);
+		tlb->start = min(tlb->start, addr);
+		tlb->end = max(tlb->end, addr + PAGE_SIZE);
 	}
 }
 
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	tlb_flush(tlb);
-	free_pages_and_swap_cache(tlb->pages, tlb->nr);
-	tlb->nr = 0;
-	if (tlb->pages == tlb->local)
-		__tlb_alloc_page(tlb);
-}
-
-static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-	tlb->fullmm = !(start | (end+1));
-	tlb->start = start;
-	tlb->end = end;
-	tlb->vma = NULL;
-	tlb->max = ARRAY_SIZE(tlb->local);
-	tlb->pages = tlb->local;
-	tlb->nr = 0;
-	__tlb_alloc_page(tlb);
-}
-
-static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
-	tlb_flush_mmu(tlb);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	if (tlb->pages != tlb->local)
-		free_pages((unsigned long)tlb->pages, 0);
-}
-
 /*
  * Memorize the range for the TLB flush.
  */
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
+static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
+					  unsigned long addr)
 {
 	tlb_add_flush(tlb, addr);
 }
@@ -137,38 +66,24 @@ tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
  * case where we're doing a full MM flush.  When we're doing a munmap,
  * the vmas are adjusted to only cover the region to be torn down.
  */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+static inline void tlb_start_vma(struct mmu_gather *tlb,
+				 struct vm_area_struct *vma)
 {
 	if (!tlb->fullmm) {
-		tlb->vma = vma;
-		tlb->range_start = TASK_SIZE;
-		tlb->range_end = 0;
+		tlb->start = TASK_SIZE;
+		tlb->end = 0;
 	}
 }
 
-static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+static inline void tlb_end_vma(struct mmu_gather *tlb,
+			       struct vm_area_struct *vma)
 {
 	if (!tlb->fullmm)
 		tlb_flush(tlb);
 }
 
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	tlb->pages[tlb->nr++] = page;
-	VM_BUG_ON(tlb->nr > tlb->max);
-	return tlb->max - tlb->nr;
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	if (!__tlb_remove_page(tlb, page))
-		tlb_flush_mmu(tlb);
-}
-
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
-	unsigned long addr)
+				  unsigned long addr)
 {
 	pgtable_page_dtor(pte);
 	tlb_add_flush(tlb, addr);
@@ -184,16 +99,5 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 }
 #endif
 
-#define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
-#define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
-#define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm)		do { } while (0)
-
-static inline void
-tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
-{
-	tlb_add_flush(tlb, addr);
-}
 
 #endif
-- 
cgit v0.10.2


From ea8c2e1124457f266f82effc3e6558552527943a Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 17 Feb 2014 12:03:25 +0000
Subject: arm64: Extend the idmap to the whole kernel image

This patch changes the idmap page table creation during boot to cover
the whole kernel image, allowing functions like cpu_reset() to be safely
called with the physical address.

This patch also simplifies the create_block_map asm macro to no longer
take an idmap argument and always use the phys/virt/end parameters. For
the idmap case, phys == virt.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0b281ff..61035d6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -384,26 +384,18 @@ ENDPROC(__calc_phys_offset)
  * Preserves:	tbl, flags
  * Corrupts:	phys, start, end, pstate
  */
-	.macro	create_block_map, tbl, flags, phys, start, end, idmap=0
+	.macro	create_block_map, tbl, flags, phys, start, end
 	lsr	\phys, \phys, #BLOCK_SHIFT
-	.if	\idmap
-	and	\start, \phys, #PTRS_PER_PTE - 1	// table index
-	.else
 	lsr	\start, \start, #BLOCK_SHIFT
 	and	\start, \start, #PTRS_PER_PTE - 1	// table index
-	.endif
 	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
-	.ifnc	\start,\end
 	lsr	\end, \end, #BLOCK_SHIFT
 	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
-	.endif
 9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
-	.ifnc	\start,\end
 	add	\start, \start, #1			// next entry
 	add	\phys, \phys, #BLOCK_SIZE		// next block
 	cmp	\start, \end
 	b.ls	9999b
-	.endif
 	.endm
 
 /*
@@ -435,9 +427,13 @@ __create_page_tables:
 	 * Create the identity mapping.
 	 */
 	add	x0, x25, #PAGE_SIZE		// section table address
-	adr	x3, __turn_mmu_on		// virtual/physical address
+	ldr	x3, =KERNEL_START
+	add	x3, x3, x28			// __pa(KERNEL_START)
 	create_pgd_entry x25, x0, x3, x5, x6
-	create_block_map x0, x7, x3, x5, x5, idmap=1
+	ldr	x6, =KERNEL_END
+	mov	x5, x3				// __pa(KERNEL_START)
+	add	x6, x6, x28			// __pa(KERNEL_END)
+	create_block_map x0, x7, x3, x5, x6
 
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
@@ -445,7 +441,7 @@ __create_page_tables:
 	add	x0, x26, #PAGE_SIZE		// section table address
 	mov	x5, #PAGE_OFFSET
 	create_pgd_entry x26, x0, x5, x3, x6
-	ldr	x6, =KERNEL_END - 1
+	ldr	x6, =KERNEL_END
 	mov	x3, x24				// phys offset
 	create_block_map x0, x7, x3, x5, x6
 
-- 
cgit v0.10.2


From 09024aa61e1bc994404683e2e5b363484a15dd12 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Tue, 17 Dec 2013 00:19:29 +0000
Subject: arm64: Fix the soft_restart routine

Change the soft_restart() routine to call cpu_reset() at its identity mapped
physical address.

The cpu_reset() routine must be called at its identity mapped physical address
so that when the MMU is turned off the instruction pointer will be at the correct
location in physical memory.

Signed-off-by: Geoff Levand <geoff@infradead.org> for Huawei, Linaro
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1c0a9be..fa6b5bb 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -72,8 +72,17 @@ static void setup_restart(void)
 
 void soft_restart(unsigned long addr)
 {
+	typedef void (*phys_reset_t)(unsigned long);
+	phys_reset_t phys_reset;
+
 	setup_restart();
-	cpu_reset(addr);
+
+	/* Switch to the identity mapping */
+	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
+	phys_reset(addr);
+
+	/* Should never get here */
+	BUG();
 }
 
 /*
-- 
cgit v0.10.2


From bb10eb7b4d176f408d45fb492df28bed2981a1f3 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <ritesh.harjani@gmail.com>
Date: Thu, 6 Feb 2014 17:21:51 +0530
Subject: arm64: Change misleading function names in dma-mapping

arm64_swiotlb_alloc/free_coherent name can be misleading
somtimes with CMA support being enabled after this
patch (c2104debc235b745265b64d610237a6833fd53)

Change this name to be more generic:
__dma_alloc/free_coherent

Signed-off-by: Ritesh Harjani <ritesh.harjani@gmail.com>
[catalin.marinas@arm.com: renamed arm64_swiotlb_dma_ops to coherent_swiotlb_dma_ops]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index fbd7678..985fc07 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -30,9 +30,9 @@
 struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
-static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
-					  dma_addr_t *dma_handle, gfp_t flags,
-					  struct dma_attrs *attrs)
+static void *__dma_alloc_coherent(struct device *dev, size_t size,
+				  dma_addr_t *dma_handle, gfp_t flags,
+				  struct dma_attrs *attrs)
 {
 	if (dev == NULL) {
 		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
@@ -58,9 +58,9 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 	}
 }
 
-static void arm64_swiotlb_free_coherent(struct device *dev, size_t size,
-					void *vaddr, dma_addr_t dma_handle,
-					struct dma_attrs *attrs)
+static void __dma_free_coherent(struct device *dev, size_t size,
+				void *vaddr, dma_addr_t dma_handle,
+				struct dma_attrs *attrs)
 {
 	if (dev == NULL) {
 		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
@@ -78,9 +78,9 @@ static void arm64_swiotlb_free_coherent(struct device *dev, size_t size,
 	}
 }
 
-static struct dma_map_ops arm64_swiotlb_dma_ops = {
-	.alloc = arm64_swiotlb_alloc_coherent,
-	.free = arm64_swiotlb_free_coherent,
+static struct dma_map_ops coherent_swiotlb_dma_ops = {
+	.alloc = __dma_alloc_coherent,
+	.free = __dma_free_coherent,
 	.map_page = swiotlb_map_page,
 	.unmap_page = swiotlb_unmap_page,
 	.map_sg = swiotlb_map_sg_attrs,
@@ -95,7 +95,7 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = {
 
 void __init arm64_swiotlb_init(void)
 {
-	dma_ops = &arm64_swiotlb_dma_ops;
+	dma_ops = &coherent_swiotlb_dma_ops;
 	swiotlb_init(1);
 }
 
-- 
cgit v0.10.2


From 16fb1a9bec6126162560f159df449e4781560807 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan_lynch@mentor.com>
Date: Tue, 11 Feb 2014 22:28:42 +0000
Subject: arm64: vdso: clean up vdso_pagelist initialization

Remove some unnecessary bits that were apparently carried over from
another architecture's implementation:

- No need to get_page() the vdso text/data - these are part of the
  kernel image.
- No need for ClearPageReserved on the vdso text.
- No need to vmap the first text page to check the ELF header - this
  can be done through &vdso_start.

Also some minor cleanup:
- Use kcalloc for vdso_pagelist array allocation.
- Don't print on allocation failure, slab/slub will do that for us.

Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a7149ca..50384fe 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -106,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
 
 static int __init vdso_init(void)
 {
-	struct page *pg;
-	char *vbase;
-	int i, ret = 0;
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
+	}
 
 	vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
 	pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n",
 		vdso_pages + 1, vdso_pages, 1L, &vdso_start);
 
 	/* Allocate the vDSO pagelist, plus a page for the data. */
-	vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1),
+	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
 				GFP_KERNEL);
-	if (vdso_pagelist == NULL) {
-		pr_err("Failed to allocate vDSO pagelist!\n");
+	if (vdso_pagelist == NULL)
 		return -ENOMEM;
-	}
 
 	/* Grab the vDSO code pages. */
-	for (i = 0; i < vdso_pages; i++) {
-		pg = virt_to_page(&vdso_start + i*PAGE_SIZE);
-		ClearPageReserved(pg);
-		get_page(pg);
-		vdso_pagelist[i] = pg;
-	}
-
-	/* Sanity check the shared object header. */
-	vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL);
-	if (vbase == NULL) {
-		pr_err("Failed to map vDSO pagelist!\n");
-		return -ENOMEM;
-	} else if (memcmp(vbase, "\177ELF", 4)) {
-		pr_err("vDSO is not a valid ELF object!\n");
-		ret = -EINVAL;
-		goto unmap;
-	}
+	for (i = 0; i < vdso_pages; i++)
+		vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE);
 
 	/* Grab the vDSO data page. */
-	pg = virt_to_page(vdso_data);
-	get_page(pg);
-	vdso_pagelist[i] = pg;
+	vdso_pagelist[i] = virt_to_page(vdso_data);
 
-unmap:
-	vunmap(vbase);
-	return ret;
+	return 0;
 }
 arch_initcall(vdso_init);
 
-- 
cgit v0.10.2


From 19e7640d1f2302c20df2733e3e3df49acb17189e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 27 Feb 2014 12:09:22 +0000
Subject: arm64: Replace ZONE_DMA32 with ZONE_DMA

On arm64 we do not have two DMA zones, so it does not make sense to
implement ZONE_DMA32. This patch changes ZONE_DMA32 with ZONE_DMA, the
latter covering 32-bit dma address space to honour GFP_DMA allocations.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0cb33a2..33e8f84 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -86,7 +86,7 @@ config GENERIC_CSUM
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
-config ZONE_DMA32
+config ZONE_DMA
 	def_bool y
 
 config ARCH_DMA_ADDR_T_64BIT
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 985fc07..26b2512c 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -39,9 +39,9 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 		return NULL;
 	}
 
-	if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
-		flags |= GFP_DMA32;
+		flags |= GFP_DMA;
 	if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		struct page *page;
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d0b4c2e..a61a4d5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@
 #include <linux/memblock.h>
 #include <linux/sort.h>
 #include <linux/of_fdt.h>
+#include <linux/dma-mapping.h>
 #include <linux/dma-contiguous.h>
 
 #include <asm/sections.h>
@@ -59,22 +60,22 @@ static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif
 
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
-
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	struct memblock_region *reg;
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-	unsigned long max_dma32 = min;
+	unsigned long max_dma = min;
 
 	memset(zone_size, 0, sizeof(zone_size));
 
-#ifdef CONFIG_ZONE_DMA32
 	/* 4GB maximum for 32-bit only capable devices */
-	max_dma32 = max(min, min(max, MAX_DMA32_PFN));
-	zone_size[ZONE_DMA32] = max_dma32 - min;
-#endif
-	zone_size[ZONE_NORMAL] = max - max_dma32;
+	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+		unsigned long max_dma_phys =
+			(unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1);
+		max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT));
+		zone_size[ZONE_DMA] = max_dma - min;
+	}
+	zone_size[ZONE_NORMAL] = max - max_dma;
 
 	memcpy(zhole_size, zone_size, sizeof(zhole_size));
 
@@ -84,15 +85,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 
 		if (start >= max)
 			continue;
-#ifdef CONFIG_ZONE_DMA32
-		if (start < max_dma32) {
-			unsigned long dma_end = min(end, max_dma32);
-			zhole_size[ZONE_DMA32] -= dma_end - start;
+
+		if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+			unsigned long dma_end = min(end, max_dma);
+			zhole_size[ZONE_DMA] -= dma_end - start;
 		}
-#endif
-		if (end > max_dma32) {
+
+		if (end > max_dma) {
 			unsigned long normal_end = min(end, max);
-			unsigned long normal_start = max(start, max_dma32);
+			unsigned long normal_start = max(start, max_dma);
 			zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
 		}
 	}
-- 
cgit v0.10.2


From 3690951fc6d42f3a0903987677d0e592c49dd8db Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 27 Feb 2014 12:24:57 +0000
Subject: arm64: Use swiotlb late initialisation

Since arm64 does not support ISA, there is no need for early swiotlb
initialisation. This patch switches the DMA mapping code to
swiotlb_tlb_late_init_with_default_size(). A side effect of this is that
GFP_DMA is used for the swiotlb buffer and devices with a 32-bit
coherent mask are correctly supported.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 26b2512c..afa91a2 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -93,11 +93,17 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
 };
 
-void __init arm64_swiotlb_init(void)
+extern int swiotlb_late_init_with_default_size(size_t default_size);
+
+static int __init swiotlb_late_init(void)
 {
+	size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
+
 	dma_ops = &coherent_swiotlb_dma_ops;
-	swiotlb_init(1);
+
+	return swiotlb_late_init_with_default_size(swiotlb_size);
 }
+subsys_initcall(swiotlb_late_init);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES	4096
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a61a4d5..88627c4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -262,8 +262,6 @@ static void __init free_unused_memmap(void)
  */
 void __init mem_init(void)
 {
-	arm64_swiotlb_init();
-
 	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-- 
cgit v0.10.2


From 7363590d2c4691593fd280f94b3deaeb5e83dbbd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 21 May 2013 17:35:19 +0100
Subject: arm64: Implement coherent DMA API based on swiotlb

This patch adds support for DMA API cache maintenance on SoCs without
hardware device cache coherency.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 88932498..4c60e64 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -85,6 +85,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
 }
 
 /*
+ * Cache maintenance functions used by the DMA API. No to be used directly.
+ */
+extern void __dma_map_area(const void *, size_t, int);
+extern void __dma_unmap_area(const void *, size_t, int);
+extern void __dma_flush_range(const void *, const void *);
+
+/*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
  * space" model to handle this.
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index fd0c0c0..3a4572e 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -30,6 +30,8 @@
 
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
 extern struct dma_map_ops *dma_ops;
+extern struct dma_map_ops coherent_swiotlb_dma_ops;
+extern struct dma_map_ops noncoherent_swiotlb_dma_ops;
 
 static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
@@ -47,6 +49,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 		return __generic_dma_ops(dev);
 }
 
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+	dev->archdata.dma_ops = ops;
+}
+
 #include <asm-generic/dma-mapping-common.h>
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 1ea9f26..97fcef5 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -166,3 +166,81 @@ ENTRY(__flush_dcache_area)
 	dsb	sy
 	ret
 ENDPROC(__flush_dcache_area)
+
+/*
+ *	__dma_inv_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_inv_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+	bic	x1, x1, x3
+1:	dc	ivac, x0			// invalidate D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_inv_range)
+
+/*
+ *	__dma_clean_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_clean_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	cvac, x0			// clean D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_clean_range)
+
+/*
+ *	__dma_flush_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(__dma_flush_range)
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_flush_range)
+
+/*
+ *	__dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_map_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_FROM_DEVICE
+	b.eq	__dma_inv_range
+	b	__dma_clean_range
+ENDPROC(__dma_map_area)
+
+/*
+ *	__dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_unmap_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_TO_DEVICE
+	b.ne	__dma_inv_range
+	ret
+ENDPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index afa91a2..88fbc5e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -78,7 +78,166 @@ static void __dma_free_coherent(struct device *dev, size_t size,
 	}
 }
 
-static struct dma_map_ops coherent_swiotlb_dma_ops = {
+static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
+				     dma_addr_t *dma_handle, gfp_t flags,
+				     struct dma_attrs *attrs)
+{
+	struct page *page, **map;
+	void *ptr, *coherent_ptr;
+	int order, i;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
+	if (!ptr)
+		goto no_mem;
+	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
+	if (!map)
+		goto no_map;
+
+	/* remove any dirty cache lines on the kernel alias */
+	__dma_flush_range(ptr, ptr + size);
+
+	/* create a coherent mapping */
+	page = virt_to_page(ptr);
+	for (i = 0; i < (size >> PAGE_SHIFT); i++)
+		map[i] = page + i;
+	coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
+			    pgprot_dmacoherent(pgprot_default));
+	kfree(map);
+	if (!coherent_ptr)
+		goto no_map;
+
+	return coherent_ptr;
+
+no_map:
+	__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
+no_mem:
+	*dma_handle = ~0;
+	return NULL;
+}
+
+static void __dma_free_noncoherent(struct device *dev, size_t size,
+				   void *vaddr, dma_addr_t dma_handle,
+				   struct dma_attrs *attrs)
+{
+	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+	vunmap(vaddr);
+	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
+}
+
+static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	dma_addr_t dev_addr;
+
+	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
+	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+
+	return dev_addr;
+}
+
+
+static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
+				 size_t size, enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
+{
+	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+				  int nelems, enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i, ret;
+
+	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+	for_each_sg(sgl, sg, ret, i)
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+			       sg->length, dir);
+
+	return ret;
+}
+
+static void __swiotlb_unmap_sg_attrs(struct device *dev,
+				     struct scatterlist *sgl, int nelems,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				 sg->length, dir);
+	swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
+static void __swiotlb_sync_single_for_cpu(struct device *dev,
+					  dma_addr_t dev_addr, size_t size,
+					  enum dma_data_direction dir)
+{
+	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+}
+
+static void __swiotlb_sync_single_for_device(struct device *dev,
+					     dma_addr_t dev_addr, size_t size,
+					     enum dma_data_direction dir)
+{
+	swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
+	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+}
+
+static void __swiotlb_sync_sg_for_cpu(struct device *dev,
+				      struct scatterlist *sgl, int nelems,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				 sg->length, dir);
+	swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void __swiotlb_sync_sg_for_device(struct device *dev,
+					 struct scatterlist *sgl, int nelems,
+					 enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+			       sg->length, dir);
+}
+
+struct dma_map_ops noncoherent_swiotlb_dma_ops = {
+	.alloc = __dma_alloc_noncoherent,
+	.free = __dma_free_noncoherent,
+	.map_page = __swiotlb_map_page,
+	.unmap_page = __swiotlb_unmap_page,
+	.map_sg = __swiotlb_map_sg_attrs,
+	.unmap_sg = __swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = __swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
+
+struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.alloc = __dma_alloc_coherent,
 	.free = __dma_free_coherent,
 	.map_page = swiotlb_map_page,
@@ -92,6 +251,7 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.dma_supported = swiotlb_dma_supported,
 	.mapping_error = swiotlb_dma_mapping_error,
 };
+EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
 
 extern int swiotlb_late_init_with_default_size(size_t default_size);
 
-- 
cgit v0.10.2


From 288ac26cc2334e5e6ecad6416e9bf750691afd84 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Fri, 28 Feb 2014 09:57:47 +0000
Subject: arm64: remove redundant "psci:" prefixes

Since 652af899799354049b273af897b798b8f03fdd88 "arm64: factor out spin-table
boot method" psci prefix's been introduced. We have a common pr_fmt, so clean
them up.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 4f97db3..83ebee8 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -251,7 +251,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
 {
 	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
 	if (err)
-		pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err);
+		pr_err("failed to boot CPU%d (%d)\n", cpu, err);
 
 	return err;
 }
@@ -278,7 +278,7 @@ static void cpu_psci_cpu_die(unsigned int cpu)
 
 	ret = psci_ops.cpu_off(state);
 
-	pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret);
+	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
 }
 #endif
 
-- 
cgit v0.10.2


From 64b4f60f497058f1c6ba118a0260249ee5c091a6 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Fri, 28 Feb 2014 09:57:33 +0000
Subject: arm64: remove return value form psci_init()

psci_init() is written to return err code if something goes wrong. However,
the single user, setup_arch(), doesn't care about it. Moreover, every error
path is supplied with a clear message which is enough for pleasant debugging.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index e5312ea..d15ab8b4 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,6 +14,6 @@
 #ifndef __ASM_PSCI_H
 #define __ASM_PSCI_H
 
-int psci_init(void);
+void psci_init(void);
 
 #endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 83ebee8..ea4828a 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -176,22 +176,20 @@ static const struct of_device_id psci_of_match[] __initconst = {
 	{},
 };
 
-int __init psci_init(void)
+void __init psci_init(void)
 {
 	struct device_node *np;
 	const char *method;
 	u32 id;
-	int err = 0;
 
 	np = of_find_matching_node(NULL, psci_of_match);
 	if (!np)
-		return -ENODEV;
+		return;
 
 	pr_info("probing function IDs from device-tree\n");
 
 	if (of_property_read_string(np, "method", &method)) {
 		pr_warning("missing \"method\" property\n");
-		err = -ENXIO;
 		goto out_put_node;
 	}
 
@@ -201,7 +199,6 @@ int __init psci_init(void)
 		invoke_psci_fn = __invoke_psci_fn_smc;
 	} else {
 		pr_warning("invalid \"method\" property: %s\n", method);
-		err = -EINVAL;
 		goto out_put_node;
 	}
 
@@ -227,7 +224,7 @@ int __init psci_init(void)
 
 out_put_node:
 	of_node_put(np);
-	return err;
+	return;
 }
 
 #ifdef CONFIG_SMP
-- 
cgit v0.10.2


From addea9ef055bf5a10f9ec967b1e0443ceb44bd83 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 24 Feb 2014 02:27:57 +0000
Subject: cpufreq: enable ARM drivers on arm64

Enable cpufreq and power kconfig menus on arm64 along with arm cpufreq
drivers. The power menu is needed for OPP support. At least on Calxeda
systems, the same cpufreq driver is used for arm and arm64 based
systems.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 33e8f84..c2056ca 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -304,6 +304,14 @@ source "drivers/cpuidle/Kconfig"
 
 endmenu
 
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+source "drivers/cpufreq/Kconfig"
+
+endmenu
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 4b029c0..1fbe11f 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -200,7 +200,7 @@ source "drivers/cpufreq/Kconfig.x86"
 endmenu
 
 menu "ARM CPU frequency scaling drivers"
-depends on ARM
+depends on ARM || ARM64
 source "drivers/cpufreq/Kconfig.arm"
 endmenu
 
-- 
cgit v0.10.2


From bff705950e2cdcf35641dee35eb14bad9ed49e8f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 14 Aug 2013 09:54:54 +0100
Subject: arm64: remove unnecessary cache flush at boot

Currently we flush the entire dcache at boot within __cpu_setup, but
this is unnecessary as the booting protocol demands that the dcache is
invalid and off upon entering the kernel. The presence of the cache
flush only serves to hide bugs in bootloaders, and is not safe in the
presence of SMP.

In an SMP boot scenario the CPUs enter coherency outside of the kernel,
and the primary CPU enables its caches before bringing up secondary
CPUs. Therefore if any secondary CPU has an entry in its cache (in
violation of the boot protocol), the primary CPU might snoop it even if
the secondary CPU's cache is disabled. The boot-time cache flush only
serves to hide a firmware bug, and slows down a cpu boot unnecessarily.

This patch removes the unnecessary boot-time cache flush.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
[catalin.marinas@arm.com: make __flush_dcache_all local only]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 97fcef5..c46f48b 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -30,7 +30,7 @@
  *
  *	Corrupted registers: x0-x7, x9-x11
  */
-ENTRY(__flush_dcache_all)
+__flush_dcache_all:
 	dsb	sy				// ensure ordering with previous memory accesses
 	mrs	x0, clidr_el1			// read clidr
 	and	x3, x0, #0x7000000		// extract loc from clidr
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 1333e6f9..e0ef63c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -173,12 +173,6 @@ ENDPROC(cpu_do_switch_mm)
  *	value of the SCTLR_EL1 register.
  */
 ENTRY(__cpu_setup)
-	/*
-	 * Preserve the link register across the function call.
-	 */
-	mov	x28, lr
-	bl	__flush_dcache_all
-	mov	lr, x28
 	ic	iallu				// I+BTB cache invalidate
 	tlbi	vmalle1is			// invalidate I + D TLBs
 	dsb	sy
-- 
cgit v0.10.2


From 66d6e3b385778c2eef3b172a037235516207393e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 3 Mar 2014 07:34:44 +0000
Subject: binfmt_elf: add ELF_HWCAP2 to compat auxv entries

Add ELF_HWCAP2 to the set of auxv entries that is passed to
a 32-bit ELF program running in 32-bit compat mode under a
64-bit kernel.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index a81147e..4d24d17 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
 #define	ELF_HWCAP		COMPAT_ELF_HWCAP
 #endif
 
+#ifdef	COMPAT_ELF_HWCAP2
+#undef	ELF_HWCAP2
+#define	ELF_HWCAP2		COMPAT_ELF_HWCAP2
+#endif
+
 #ifdef	COMPAT_ARCH_DLINFO
 #undef	ARCH_DLINFO
 #define	ARCH_DLINFO		COMPAT_ARCH_DLINFO
-- 
cgit v0.10.2


From 28964d32d495a0753986d464c48c8e1ae73699be Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 3 Mar 2014 07:34:45 +0000
Subject: arm64: add AT_HWCAP2 support for 32-bit compat

Add support for the ELF auxv entry AT_HWCAP2 when running 32-bit
ELF binaries in compat mode.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6cddbb0..9a4cbd6 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -41,7 +41,8 @@
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
-extern unsigned int compat_elf_hwcap;
+#define COMPAT_ELF_HWCAP2	(compat_elf_hwcap2)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
 #endif
 
 extern unsigned long elf_hwcap;
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index c8e9eff..349c492 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -69,6 +69,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap);
 				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
 				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
 unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
 #endif
 
 static const char *cpu_name;
-- 
cgit v0.10.2


From 4cf761cdccc3b050f768f25dc36342cdfec4efdd Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 3 Mar 2014 07:34:46 +0000
Subject: arm64: advertise ARMv8 extensions to 32-bit compat ELF binaries

This adds support for advertising the presence of ARMv8 Crypto
Extensions in the Aarch32 execution state to 32-bit ELF binaries
running in 32-bit compat mode under the arm64 kernel.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 9a4cbd6..024c461 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -32,6 +32,12 @@
 #define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
 #define COMPAT_HWCAP_EVTSTRM	(1 << 21)
 
+#define COMPAT_HWCAP2_AES	(1 << 0)
+#define COMPAT_HWCAP2_PMULL	(1 << 1)
+#define COMPAT_HWCAP2_SHA1	(1 << 2)
+#define COMPAT_HWCAP2_SHA2	(1 << 3)
+#define COMPAT_HWCAP2_CRC32	(1 << 4)
+
 #ifndef __ASSEMBLY__
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 349c492..67da307 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -243,6 +243,38 @@ static void __init setup_processor(void)
 	block = (features >> 16) & 0xf;
 	if (block && !(block & 0x8))
 		elf_hwcap |= HWCAP_CRC32;
+
+#ifdef CONFIG_COMPAT
+	/*
+	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+	 * the Aarch32 32-bit execution state.
+	 */
+	features = read_cpuid(ID_ISAR5_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+		case 1:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
 }
 
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
-- 
cgit v0.10.2


From f6e763b93a6cd3411fd8df925344022719bcba62 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Tue, 4 Mar 2014 07:51:17 +0000
Subject: arm64: topology: Implement basic CPU topology support

Add basic CPU topology support to arm64, based on the existing pre-v8
code and some work done by Mark Hambleton.  This patch does not
implement any topology discovery support since that should be based on
information from firmware, it merely implements the scaffolding for
integration of topology support in the architecture.

No locking of the topology data is done since it is only modified during
CPU bringup with external serialisation from the SMP code.

The goal is to separate the architecture hookup for providing topology
information from the DT parsing in order to ease review and avoid
blocking the architecture code (which will be built on by other work)
with the DT code review by providing something simple and basic.

Following patches will implement support for interpreting topology
information from MPIDR and for parsing the DT topology bindings for ARM,
similar patches will be needed for ACPI.

Signed-off-by: Mark Brown <broonie@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
[catalin.marinas@arm.com: removed CONFIG_CPU_TOPOLOGY, always on if SMP]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c2056ca..140cd1a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -165,6 +165,22 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on SMP
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  MultiThreading at a cost of slightly increased overhead in some
+	  places. If unsure say N here.
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 0000000..0172e6d
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,39 @@
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
+
+#ifdef CONFIG_SMP
+
+#include <linux/cpumask.h>
+
+struct cpu_topology {
+	int thread_id;
+	int core_id;
+	int cluster_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+};
+
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].cluster_id)
+#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable()	(cpu_topology[0].cluster_id != -1)
+#define smt_capable()	(cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index e52bcdc..cfee827 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,7 +14,7 @@ arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
-arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o
+arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o topology.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 5070dc3..f0a141d 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -114,6 +114,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	return ret;
 }
 
+static void smp_store_cpu_info(unsigned int cpuid)
+{
+	store_cpu_topology(cpuid);
+}
+
 /*
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
@@ -152,6 +157,8 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	notify_cpu_starting(cpu);
 
+	smp_store_cpu_info(cpu);
+
 	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
 	 * the CPU migration code to notice that the CPU is online
@@ -391,6 +398,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	int err;
 	unsigned int cpu, ncores = num_possible_cpus();
 
+	init_cpu_topology();
+
+	smp_store_cpu_info(smp_processor_id());
+
 	/*
 	 * are we trying to boot more cores than exist?
 	 */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 0000000..3e06b0b
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,95 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
+ *
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/sched.h>
+
+#include <asm/topology.h>
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	if (cpuid_topo->cluster_id == -1) {
+		/*
+		 * DT does not contain topology information for this cpu
+		 * reset it to default behaviour
+		 */
+		pr_debug("CPU%u: No topology information configured\n", cpuid);
+		cpuid_topo->core_id = 0;
+		cpumask_set_cpu(cpuid, &cpuid_topo->core_sibling);
+		cpumask_set_cpu(cpuid, &cpuid_topo->thread_sibling);
+		return;
+	}
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+	update_siblings_masks(cpuid);
+}
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	/* init core mask and power*/
+	for_each_possible_cpu(cpu) {
+		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id =  -1;
+		cpu_topo->cluster_id = -1;
+		cpumask_clear(&cpu_topo->core_sibling);
+		cpumask_clear(&cpu_topo->thread_sibling);
+	}
+}
-- 
cgit v0.10.2


From d152d22a18c240286c19997a6249ee76ea055926 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Mar 2014 10:36:52 +0000
Subject: arm64: barriers: add dmb barrier

Commit 8adbf57fc429 ("irqchip: gic: use dmb ishst instead of dsb when
raising a softirq") added an explicit dmb(...) call to the GIC driver.

This patch adds a simple dmb() macro to arm64, which expands to a DMB SY
instruction.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 409ca37..66eb764 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,6 +25,7 @@
 #define wfi()		asm volatile("wfi" : : : "memory")
 
 #define isb()		asm volatile("isb" : : : "memory")
+#define dmb(opt)	asm volatile("dmb sy" : : : "memory")
 #define dsb(opt)	asm volatile("dsb sy" : : : "memory")
 
 #define mb()		dsb()
-- 
cgit v0.10.2


From de2db7432917a82b62d55bb59635586eeca6d1bd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 12 Mar 2014 16:07:06 +0000
Subject: arm64: Make DMA coherent and strongly ordered mappings not executable

pgprot_{dmacoherent,writecombine,noncached} don't need to generate
executable mappings with side-effects like __sync_icache_dcache() being
called when the mapping is in user space.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Tested-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Cc: <stable@vger.kernel.org>

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b524dcd..2d3cede 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -286,11 +286,11 @@ static inline int has_transparent_hugepage(void)
  * Mark the prot value as uncacheable and unbufferable.
  */
 #define pgprot_noncached(prot) \
-	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
 #define pgprot_writecombine(prot) \
-	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC))
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
 #define pgprot_dmacoherent(prot) \
-	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC))
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-- 
cgit v0.10.2


From 71fdb6bf61bf0692f004f9daf5650392c0cfe300 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 12 Mar 2014 16:28:09 +0000
Subject: arm64: Do not synchronise I and D caches for special ptes

Special pte mappings are not intended to be executable and do not even
have an associated struct page. This patch ensures that we do not call
__sync_icache_dcache() on such ptes.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Steve Capper <Steve.Capper@arm.com>
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Tested-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Cc: <stable@vger.kernel.org>

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2d3cede..72c9ac3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -199,7 +199,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
 	if (pte_valid_user(pte)) {
-		if (pte_exec(pte))
+		if (!pte_special(pte) && pte_exec(pte))
 			__sync_icache_dcache(pte, addr);
 		if (pte_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
-- 
cgit v0.10.2


From 87366d8cf7b3f6dc34633938aa8766e5a390ce33 Mon Sep 17 00:00:00 2001
From: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Date: Fri, 7 Mar 2014 08:49:25 +0000
Subject: arm64: Add boot time configuration of Intermediate Physical Address
 size

ARMv8 supports a range of physical address bit sizes. The PARange bits
from ID_AA64MMFR0_EL1 register are read during boot-time and the
intermediate physical address size bits are written in the translation
control registers (TCR_EL1 and VTCR_EL2).

There is no change in the VA bits and levels of translation.

Signed-off-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Reviewed-by: Will Deacon <Will.deacon@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0eb3986..21ef48d 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -106,7 +106,6 @@
 
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_PS_MASK	(7 << 16)
-#define VTCR_EL2_PS_40B		(2 << 16)
 #define VTCR_EL2_TG0_MASK	(1 << 14)
 #define VTCR_EL2_TG0_4K		(0 << 14)
 #define VTCR_EL2_TG0_64K	(1 << 14)
@@ -129,10 +128,9 @@
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
-				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
-				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
-				 VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
 #define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
 #else
 /*
@@ -142,10 +140,9 @@
  * 4kB pages (TG0 = 0)
  * 3 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
-				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
-				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
-				 VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
 #define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
 #endif
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index b1d2e26..f7af66b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -100,9 +100,9 @@
 #define PTE_HYP			PTE_USER
 
 /*
- * 40-bit physical address supported.
+ * Highest possible physical address supported.
  */
-#define PHYS_MASK_SHIFT		(40)
+#define PHYS_MASK_SHIFT		(48)
 #define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
 
 /*
@@ -122,7 +122,6 @@
 #define TCR_SHARED		((UL(3) << 12) | (UL(3) << 28))
 #define TCR_TG0_64K		(UL(1) << 14)
 #define TCR_TG1_64K		(UL(1) << 30)
-#define TCR_IPS_40BIT		(UL(2) << 32)
 #define TCR_ASID16		(UL(1) << 36)
 #define TCR_TBI0		(UL(1) << 37)
 
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 2b0244d..d968796 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -68,6 +68,12 @@ __do_hyp_init:
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR0_EL1
+	bfi	x4, x5, #16, #3
 	msr	vtcr_el2, x4
 
 	mrs	x4, mair_el1
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index e0ef63c..e085ee6 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -209,8 +209,14 @@ ENTRY(__cpu_setup)
 	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
 	 * both user and kernel.
 	 */
-	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \
+	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
 		      TCR_ASID16 | TCR_TBI0 | (1 << 31)
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+	 * TCR_EL1.
+	 */
+	mrs	x9, ID_AA64MMFR0_EL1
+	bfi	x10, x9, #32, #3
 #ifdef CONFIG_ARM64_64K_PAGES
 	orr	x10, x10, TCR_TG0_64K
 	orr	x10, x10, TCR_TG1_64K
-- 
cgit v0.10.2


From 2ee0d7fd36a3f87bc5b29b1ec54ad6728deedb41 Mon Sep 17 00:00:00 2001
From: Jean Pihet <jean.pihet@linaro.org>
Date: Mon, 3 Feb 2014 19:18:27 +0100
Subject: ARM64: perf: add support for perf registers API

This patch implements the functions required for the perf registers API,
allowing the perf tool to interface kernel register dumps with libunwind
in order to provide userspace backtracing.
Compat mode is also supported.

Only the general purpose user space registers are exported, i.e.:
 PERF_REG_ARM_X0,
 ...
 PERF_REG_ARM_X28,
 PERF_REG_ARM_FP,
 PERF_REG_ARM_LR,
 PERF_REG_ARM_SP,
 PERF_REG_ARM_PC
and not the PERF_REG_ARM_V* registers.

Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 140cd1a..507ab6e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -39,6 +39,8 @@ config ARM64
 	select HAVE_MEMBLOCK
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 5233966..35ff2f9 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -68,6 +68,7 @@
 
 /* Architecturally defined mapping between AArch32 and AArch64 registers */
 #define compat_usr(x)	regs[(x)]
+#define compat_fp	regs[11]
 #define compat_sp	regs[13]
 #define compat_lr	regs[14]
 #define compat_sp_hyp	regs[15]
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index e4b78bd..942376d 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -9,6 +9,7 @@ header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
 header-y += kvm_para.h
+header-y += perf_regs.h
 header-y += param.h
 header-y += ptrace.h
 header-y += setup.h
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000..172b831
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+	PERF_REG_ARM64_X0,
+	PERF_REG_ARM64_X1,
+	PERF_REG_ARM64_X2,
+	PERF_REG_ARM64_X3,
+	PERF_REG_ARM64_X4,
+	PERF_REG_ARM64_X5,
+	PERF_REG_ARM64_X6,
+	PERF_REG_ARM64_X7,
+	PERF_REG_ARM64_X8,
+	PERF_REG_ARM64_X9,
+	PERF_REG_ARM64_X10,
+	PERF_REG_ARM64_X11,
+	PERF_REG_ARM64_X12,
+	PERF_REG_ARM64_X13,
+	PERF_REG_ARM64_X14,
+	PERF_REG_ARM64_X15,
+	PERF_REG_ARM64_X16,
+	PERF_REG_ARM64_X17,
+	PERF_REG_ARM64_X18,
+	PERF_REG_ARM64_X19,
+	PERF_REG_ARM64_X20,
+	PERF_REG_ARM64_X21,
+	PERF_REG_ARM64_X22,
+	PERF_REG_ARM64_X23,
+	PERF_REG_ARM64_X24,
+	PERF_REG_ARM64_X25,
+	PERF_REG_ARM64_X26,
+	PERF_REG_ARM64_X27,
+	PERF_REG_ARM64_X28,
+	PERF_REG_ARM64_X29,
+	PERF_REG_ARM64_LR,
+	PERF_REG_ARM64_SP,
+	PERF_REG_ARM64_PC,
+	PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index cfee827..7d811d9 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -15,8 +15,9 @@ arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o topology.o
+arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
-arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
+arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
new file mode 100644
index 0000000..f2d6f0a
--- /dev/null
+++ b/arch/arm64/kernel/perf_regs.c
@@ -0,0 +1,44 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+		return 0;
+
+	/*
+	 * Compat (i.e. 32 bit) mode:
+	 * - PC has been set in the pt_regs struct in kernel_entry,
+	 * - Handle SP and LR here.
+	 */
+	if (compat_user_mode(regs)) {
+		if ((u32)idx == PERF_REG_ARM64_SP)
+			return regs->compat_sp;
+		if ((u32)idx == PERF_REG_ARM64_LR)
+			return regs->compat_lr;
+	}
+
+	return regs->regs[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (is_compat_thread(task_thread_info(task)))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
-- 
cgit v0.10.2


From 23c7d70d55c6d963f225744cd1b996dee68c88d1 Mon Sep 17 00:00:00 2001
From: Jean Pihet <jean.pihet@linaro.org>
Date: Mon, 3 Feb 2014 19:18:28 +0100
Subject: ARM64: perf: add support for frame pointer unwinding in compat mode

When profiling a 32-bit application, user space callchain unwinding
using the frame pointer is performed in compat mode. The code is taken
over from the AARCH32 code and adapted to work on AARCH64.

Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 5b1cd79..e868c72 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1348,8 +1348,8 @@ early_initcall(init_hw_perf_events);
  * Callchain handling code.
  */
 struct frame_tail {
-	struct frame_tail   __user *fp;
-	unsigned long	    lr;
+	struct frame_tail	__user *fp;
+	unsigned long		lr;
 } __attribute__((packed));
 
 /*
@@ -1386,22 +1386,80 @@ user_backtrace(struct frame_tail __user *tail,
 	return buftail.fp;
 }
 
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+	compat_uptr_t	fp; /* a (struct compat_frame_tail *) in compat mode */
+	u32		sp;
+	u32		lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+compat_user_backtrace(struct compat_frame_tail __user *tail,
+		      struct perf_callchain_entry *entry)
+{
+	struct compat_frame_tail buftail;
+	unsigned long err;
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	perf_callchain_store(entry, buftail.lr);
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail + 1 >= (struct compat_frame_tail __user *)
+			compat_ptr(buftail.fp))
+		return NULL;
+
+	return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+
 void perf_callchain_user(struct perf_callchain_entry *entry,
 			 struct pt_regs *regs)
 {
-	struct frame_tail __user *tail;
-
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 		/* We don't support guest os callchain now */
 		return;
 	}
 
 	perf_callchain_store(entry, regs->pc);
-	tail = (struct frame_tail __user *)regs->regs[29];
 
-	while (entry->nr < PERF_MAX_STACK_DEPTH &&
-	       tail && !((unsigned long)tail & 0xf))
-		tail = user_backtrace(tail, entry);
+	if (!compat_user_mode(regs)) {
+		/* AARCH64 mode */
+		struct frame_tail __user *tail;
+
+		tail = (struct frame_tail __user *)regs->regs[29];
+
+		while (entry->nr < PERF_MAX_STACK_DEPTH &&
+		       tail && !((unsigned long)tail & 0xf))
+			tail = user_backtrace(tail, entry);
+	} else {
+		/* AARCH32 compat mode */
+		struct compat_frame_tail __user *tail;
+
+		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+
+		while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+			tail && !((unsigned long)tail & 0x3))
+			tail = compat_user_backtrace(tail, entry);
+	}
 }
 
 /*
@@ -1429,6 +1487,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	frame.fp = regs->regs[29];
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
+
 	walk_stackframe(&frame, callchain_trace, entry);
 }
 
-- 
cgit v0.10.2


From 5f888a1d33c48900012e6b4c18296ce7c715dc6c Mon Sep 17 00:00:00 2001
From: Jean Pihet <jean.pihet@linaro.org>
Date: Mon, 3 Feb 2014 19:18:29 +0100
Subject: ARM64: perf: support dwarf unwinding in compat mode

Add support for unwinding using the dwarf information in compat
mode. Using the correct user stack pointer allows perf to record
the frames correctly in the native and compat modes.

Note that although the dwarf frame unwinding works ok using
libunwind in native mode (on ARMv7 & ARMv8), some changes are
required to the libunwind code for the compat mode. Those changes
are posted separately on the libunwind mailing list.

Tested on ARMv8 platform with v8 and compat v7 binaries, the latter
are statically built.

Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index fda2704..e71f81f 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -228,7 +228,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
 
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 35ff2f9..c7ba261 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -133,7 +133,7 @@ struct pt_regs {
 	(!((regs)->pstate & PSR_F_BIT))
 
 #define user_stack_pointer(regs) \
-	((regs)->sp)
+	(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
 
 /*
  * Are the current registers suitable for user mode? (used to maintain
-- 
cgit v0.10.2


From 242c04bc4be959ae28618772e439c27e87a7d880 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 5 Mar 2014 05:34:32 +0000
Subject: arm64: debug: make local symbols static

Make local symbols static, because these are used only in this
file.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 13f87de..14ba23c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -189,7 +189,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
 
 /* EL1 Single Step Handler hooks */
 static LIST_HEAD(step_hook);
-DEFINE_RWLOCK(step_hook_lock);
+static DEFINE_RWLOCK(step_hook_lock);
 
 void register_step_hook(struct step_hook *hook)
 {
@@ -276,7 +276,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
  * Use reader/writer locks instead of plain spinlock.
  */
 static LIST_HEAD(break_hook);
-DEFINE_RWLOCK(break_hook_lock);
+static DEFINE_RWLOCK(break_hook_lock);
 
 void register_break_hook(struct break_hook *hook)
 {
-- 
cgit v0.10.2


From 7184659bed3090248e382d98a49a3c1bcfe11174 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 5 Mar 2014 05:35:45 +0000
Subject: arm64: smp: make local symbol static

Make smp_spin_table_cpu_postboot() static, because this function
is used only in this file.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 44c2280..7a530d2 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -128,7 +128,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu)
 	return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
 }
 
-void smp_spin_table_cpu_postboot(void)
+static void smp_spin_table_cpu_postboot(void)
 {
 	/*
 	 * Let the primary processor know we're out of the pen.
-- 
cgit v0.10.2


From 3be1a5c4f75989cf457f13f38ff0913dff6d4996 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 4 Mar 2014 01:10:04 +0000
Subject: arm64: enable generic CPU feature modalias matching for this
 architecture

This enables support for the generic CPU feature modalias implementation that
wires up optional CPU features to udev based module autoprobing.

A file <asm/cpufeature.h> is provided that maps CPU feature numbers to
elf_hwcap bits, which is the standard way on arm64 to advertise optional CPU
features both internally and to user space.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[catalin.marinas@arm.com: removed unnecessary "!!"]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 507ab6e..8cabe36 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -16,6 +16,7 @@ config ARM64
 	select DCACHE_WORD_ACCESS
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 0000000..cd4ac05
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/hwcap.h>
+
+/*
+ * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
+ * in the kernel and for user space to keep track of which optional features
+ * are supported by the current system. So let's map feature 'x' to HWCAP_x.
+ * Note that HWCAP_x constants are bit fields so we need to take the log.
+ */
+
+#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
+#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+
+#endif
-- 
cgit v0.10.2


From e172800e5d3162f97d332b3745e3743ce150ec48 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 14 Mar 2014 17:47:04 +0000
Subject: asm-generic: rwsem: de-PPCify rwsem.h

asm-generic/rwsem.h used to live under arch/powerpc. During its
liberation to common code, a few references to its former home where
preserved, in particular the definition of RWSEM_ACTIVE_MASK is
predicated on CONFIG_PPC64.

This patch updates the ifdefs and comments to architecturally neutral
versions.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Richard Kuo <rkuo@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index bb1e2cd..d48bf5a 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_RWSEM_H
-#define _ASM_POWERPC_RWSEM_H
+#ifndef _ASM_GENERIC_RWSEM_H
+#define _ASM_GENERIC_RWSEM_H
 
 #ifndef _LINUX_RWSEM_H
 #error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
@@ -8,7 +8,7 @@
 #ifdef __KERNEL__
 
 /*
- * R/W semaphores for PPC using the stuff in lib/rwsem.c.
+ * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
  * Adapted largely from include/asm-i386/rwsem.h
  * by Paul Mackerras <paulus@samba.org>.
  */
@@ -16,7 +16,7 @@
 /*
  * the semaphore definition
  */
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_64BIT
 # define RWSEM_ACTIVE_MASK		0xffffffffL
 #else
 # define RWSEM_ACTIVE_MASK		0x0000ffffL
@@ -129,4 +129,4 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 }
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_POWERPC_RWSEM_H */
+#endif	/* _ASM_GENERIC_RWSEM_H */
-- 
cgit v0.10.2


From c209f79940ac0c75ae8d2f503a2b9d86255e266c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 14 Mar 2014 17:47:05 +0000
Subject: arm64: rwsem: use asm-generic rwsem implementation

asm-generic offers an atomic-add based rwsem implementation, which
can avoid the need for heavier, spinlock-based synchronisation on the
fast path.

This patch makes use of the optimised implementation for arm64 CPUs.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cabe36..4723fc1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -77,7 +77,7 @@ config LOCKDEP_SUPPORT
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
+config RWSEM_XCHGADD_ALGORITHM
 	def_bool y
 
 config GENERIC_HWEIGHT
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 71c53ec..41cde70 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -29,6 +29,7 @@ generic-y += pci.h
 generic-y += poll.h
 generic-y += posix_types.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += segment.h
-- 
cgit v0.10.2


From 9c7e535fcc1725fc2e2d4f0d9dd14137f0243e23 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Tue, 25 Feb 2014 10:02:13 +0000
Subject: arm64: mm: Route pmd thp functions through pte equivalents

Rather than have separate hugetlb and transparent huge page pmd
manipulation functions, re-wire our thp functions to simply call the
pte equivalents.

This allows THP to take advantage of the new PTE_WRITE logic introduced
in:
  c2c93e5 arm64: mm: Introduce PTE_WRITE

To represent splitting THPs we use the PTE_SPECIAL bit as this is not
used for pmds.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 72c9ac3..ae10350 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -227,36 +227,36 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 
 #define __HAVE_ARCH_PTE_SPECIAL
 
-/*
- * Software PMD bits for THP
- */
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
 
-#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
-#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 57)
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
 
 /*
  * THP definitions.
  */
-#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
-
-#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
-#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define pmd_trans_splitting(pmd)	pte_special(pmd_pte(pmd))
 #endif
 
-#define PMD_BIT_FUNC(fn,op) \
-static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK))
 
-PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
-PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
-PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
-PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 
 #define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
 
@@ -266,15 +266,6 @@ PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
 
 #define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
-static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
-	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
-			      PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
-			      PMD_SECT_VALID;
-	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
-	return pmd;
-}
-
 #define set_pmd_at(mm, addr, pmdp, pmd)	set_pmd(pmdp, pmd)
 
 static inline int has_transparent_hugepage(void)
@@ -383,6 +374,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return pte;
 }
 
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 
-- 
cgit v0.10.2


From 74397174989e5f704bcefde7af02e12cc32cb164 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Tue, 11 Mar 2014 13:17:31 +0000
Subject: arm64: Fix duplicated Kconfig entries

Probably due to rebasing over the lengthy time it took to get the patch
merged commit addea9ef055b (cpufreq: enable ARM drivers on arm64) added
a duplicate Power management options section.  Add CPUfreq to the CPU
power management section and remove a duplicate include of the main
power section.

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4723fc1..516d8a7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -321,12 +321,6 @@ menu "CPU Power Management"
 
 source "drivers/cpuidle/Kconfig"
 
-endmenu
-
-menu "Power management options"
-
-source "kernel/power/Kconfig"
-
 source "drivers/cpufreq/Kconfig"
 
 endmenu
-- 
cgit v0.10.2


From 31b1e940c5d47ee1a01baeccfb1b2b8890822d1a Mon Sep 17 00:00:00 2001
From: Christopher Covington <cov@codeaurora.org>
Date: Wed, 19 Mar 2014 16:29:37 +0000
Subject: arm64: Fix __range_ok macro

Without this, the following scenario is incorrectly determined
to be invalid.

addr 0x7f_ffffe000 size 8192 addr_limit 0x80_00000000

This behavior was observed while trying to vmsplice the stack
as part of a CRIU dump of a process on a system started with the
norandmaps kernel parameter.

Signed-off-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 6c0f684..3bf8f4e 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -83,7 +83,7 @@ static inline void set_fs(mm_segment_t fs)
  * Returns 1 if the range is valid, 0 otherwise.
  *
  * This is equivalent to the following test:
- * (u65)addr + (u65)size < (u65)current->addr_limit
+ * (u65)addr + (u65)size <= current->addr_limit
  *
  * This needs 65-bit arithmetic.
  */
@@ -91,7 +91,7 @@ static inline void set_fs(mm_segment_t fs)
 ({									\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
-	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc"		\
+	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 		: "=&r" (flag), "=&r" (roksum)				\
 		: "1" (addr), "Ir" (size),				\
 		  "r" (current_thread_info()->addr_limit)		\
-- 
cgit v0.10.2


From 6e8d7968e92f7668a2a615773ad3940f0219dcbd Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Fri, 14 Mar 2014 19:52:23 +0000
Subject: arm64: Implement custom mmap functions for dma mapping

The current dma_ops do not specify an mmap function so maping
falls back to the default implementation. There are at least
two issues with using the default implementation:

1) The pgprot is always pgprot_noncached (strongly ordered)
memory even with coherent operations
2) dma_common_mmap calls virt_to_page on the remapped non-coherent
address which leads to invalid memory being mapped.

Fix both these issue by implementing a custom mmap function which
correctly accounts for remapped addresses and sets vm_pg_prot
appropriately.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
[catalin.marinas@arm.com: replaced "arm64_" with "__" prefix for consistency]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 88fbc5e..81eea3d 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -221,9 +221,52 @@ static void __swiotlb_sync_sg_for_device(struct device *dev,
 			       sg->length, dir);
 }
 
+/* vma->vm_page_prot must be set appropriately before calling this function */
+static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	int ret = -ENXIO;
+	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
+					PAGE_SHIFT;
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      vma->vm_end - vma->vm_start,
+				      vma->vm_page_prot);
+	}
+
+	return ret;
+}
+
+static int __swiotlb_mmap_noncoherent(struct device *dev,
+		struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		struct dma_attrs *attrs)
+{
+	vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot);
+	return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+static int __swiotlb_mmap_coherent(struct device *dev,
+		struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		struct dma_attrs *attrs)
+{
+	/* Just use whatever page_prot attributes were specified */
+	return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
 struct dma_map_ops noncoherent_swiotlb_dma_ops = {
 	.alloc = __dma_alloc_noncoherent,
 	.free = __dma_free_noncoherent,
+	.mmap = __swiotlb_mmap_noncoherent,
 	.map_page = __swiotlb_map_page,
 	.unmap_page = __swiotlb_unmap_page,
 	.map_sg = __swiotlb_map_sg_attrs,
@@ -240,6 +283,7 @@ EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
 struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.alloc = __dma_alloc_coherent,
 	.free = __dma_free_coherent,
+	.mmap = __swiotlb_mmap_coherent,
 	.map_page = swiotlb_map_page,
 	.unmap_page = swiotlb_unmap_page,
 	.map_sg = swiotlb_map_sg_attrs,
-- 
cgit v0.10.2


From 214fdbe74a096c3aeb7af81d7900e2ab966b10d6 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Fri, 14 Mar 2014 19:52:24 +0000
Subject: arm64: Support DMA_ATTR_WRITE_COMBINE

DMA_ATTR_WRITE_COMBINE is currently ignored. Set the pgprot
appropriately for non coherent opperations.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81eea3d..5bba6be 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -30,6 +30,16 @@
 struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
+static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
+				 bool coherent)
+{
+	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+		return pgprot_writecombine(prot);
+	else if (!coherent)
+		return pgprot_dmacoherent(prot);
+	return prot;
+}
+
 static void *__dma_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
 				  struct dma_attrs *attrs)
@@ -104,7 +114,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
 	for (i = 0; i < (size >> PAGE_SHIFT); i++)
 		map[i] = page + i;
 	coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
-			    pgprot_dmacoherent(pgprot_default));
+			    __get_dma_pgprot(attrs, pgprot_default, false));
 	kfree(map);
 	if (!coherent_ptr)
 		goto no_map;
@@ -250,7 +260,7 @@ static int __swiotlb_mmap_noncoherent(struct device *dev,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		struct dma_attrs *attrs)
 {
-	vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot);
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false);
 	return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
 }
 
-- 
cgit v0.10.2


From 196adf2f3015eacac0567278ba538e3ffdd16d0e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 24 Mar 2014 10:35:35 +0000
Subject: arm64: Remove pgprot_dmacoherent()

Since this macro is identical to pgprot_writecombine() and is only used
in a single place, remove it completely to avoid confusion. On ARMv7+
processors, the coherent DMA mapping must be Normal NonCacheable (a.k.a.
writecombine) to avoid mismatched hardware attribute aliases (with the
kernel linear mapping as Normal Cacheable).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ae10350..da92265 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -280,8 +280,6 @@ static inline int has_transparent_hugepage(void)
 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
 #define pgprot_writecombine(prot) \
 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
-#define pgprot_dmacoherent(prot) \
-	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 5bba6be..0ba347e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -33,10 +33,8 @@ EXPORT_SYMBOL(dma_ops);
 static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 				 bool coherent)
 {
-	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+	if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
 		return pgprot_writecombine(prot);
-	else if (!coherent)
-		return pgprot_dmacoherent(prot);
 	return prot;
 }
 
-- 
cgit v0.10.2