676 files changed, 15044 insertions, 9001 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index bec6666..8a8ea71 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -221,6 +221,10 @@ config ARCH_TASK_STRUCT_ALLOCATOR
 config ARCH_THREAD_INFO_ALLOCATOR
 	bool
 
+# Select if arch wants to size task_struct dynamically via arch_task_struct_size:
+config ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	bool
+
 config HAVE_REGS_AND_STACK_ACCESS_API
 	bool
 	help
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index bf9e9d3..f515a4d 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -3,6 +3,7 @@ config ALPHA
 	default y
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_AOUT
 	select HAVE_IDE
 	select HAVE_OPROFILE
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index cde23cd..ffd9cf5 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/mm-arch-hooks.h b/arch/alpha/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b07fd86..0000000
--- a/arch/alpha/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ALPHA_MM_ARCH_HOOKS_H
-#define _ASM_ALPHA_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ALPHA_MM_ARCH_HOOKS_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 37b570d..fed9c6f 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -16,6 +16,11 @@
 #define arch_spin_unlock_wait(x) \
 		do { cpu_relax(); } while ((x)->lock)
 
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+        return lock.lock == 0;
+}
+
 static inline void arch_spin_unlock(arch_spinlock_t * lock)
 {
 	mb();
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 82f738e..cded02c 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -242,12 +242,7 @@ pci_restore_srm_config(void)
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
-	struct pci_dev *dev = bus->self;
-
-	if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
- 		   (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
- 		pci_read_bridge_bases(bus);
-	} 
+	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		pdev_save_srm_config(dev);
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index e7cee0a..bd4670d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -115,6 +115,7 @@ if ISA_ARCOMPACT
 
 config ARC_CPU_750D
 	bool "ARC750D"
+	select ARC_CANT_LLSC
 	help
 	  Support for ARC750 core
 
@@ -312,11 +313,11 @@ config ARC_PAGE_SIZE_8K
 
 config ARC_PAGE_SIZE_16K
 	bool "16KB"
-	depends on ARC_MMU_V3
+	depends on ARC_MMU_V3 || ARC_MMU_V4
 
 config ARC_PAGE_SIZE_4K
 	bool "4KB"
-	depends on ARC_MMU_V3
+	depends on ARC_MMU_V3 || ARC_MMU_V4
 
 endchoice
 
@@ -362,7 +363,12 @@ config ARC_CANT_LLSC
 config ARC_HAS_LLSC
 	bool "Insn: LLOCK/SCOND (efficient atomic ops)"
 	default y
-	depends on !ARC_CPU_750D && !ARC_CANT_LLSC
+	depends on !ARC_CANT_LLSC
+
+config ARC_STAR_9000923308
+	bool "Workaround for llock/scond livelock"
+	default y
+	depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
 
 config ARC_HAS_SWAPE
 	bool "Insn: SWAPE (endian-swap)"
@@ -378,6 +384,10 @@ config ARC_HAS_LL64
 	  dest operands with 2 possible source operands.
 	default y
 
+config ARC_HAS_DIV_REM
+	bool "Insn: div, divu, rem, remu"
+	default y
+
 config ARC_HAS_RTC
 	bool "Local 64-bit r/o cycle counter"
 	default n
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 6107062..8a27a48 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -36,8 +36,16 @@ cflags-$(atleast_gcc44)			+= -fsection-anchors
 cflags-$(CONFIG_ARC_HAS_LLSC)		+= -mlock
 cflags-$(CONFIG_ARC_HAS_SWAPE)		+= -mswape
 
+ifdef CONFIG_ISA_ARCV2
+
 ifndef CONFIG_ARC_HAS_LL64
-cflags-$(CONFIG_ISA_ARCV2)		+= -mno-ll64
+cflags-y				+= -mno-ll64
+endif
+
+ifndef CONFIG_ARC_HAS_DIV_REM
+cflags-y				+= -mno-div-rem
+endif
+
 endif
 
 cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables
@@ -49,7 +57,8 @@ endif
 
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
-cflags-y  += -O3
+# Note: No need to add to cflags-y as that happens anyways
+ARCH_CFLAGS += -O3
 endif
 
 # small data is default for elf32 tool-chain. If not usable, disable it
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 15c8d62..1cd5e82 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -12,7 +12,7 @@
 
 / {
 	compatible = "snps,arc";
-	clock-frequency = <75000000>;
+	clock-frequency = <90000000>;
 	#address-cells = <1>;
 	#size-cells = <1>;
 
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 199d428..2f0b332 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -12,7 +12,7 @@
 
 / {
 	compatible = "snps,arc";
-	clock-frequency = <75000000>;
+	clock-frequency = <90000000>;
 	#address-cells = <1>;
 	#size-cells = <1>;
 
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 1a80cc9..7611b10 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += param.h
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 070f588..c8f57b8 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -89,11 +89,10 @@
 #define ECR_C_BIT_DTLB_LD_MISS		8
 #define ECR_C_BIT_DTLB_ST_MISS		9
 
-
 /* Auxiliary registers */
 #define AUX_IDENTITY		4
 #define AUX_INTR_VEC_BASE	0x25
-
+#define AUX_NON_VOL		0x5e
 
 /*
  * Floating Pt Registers
@@ -240,9 +239,9 @@ struct bcr_extn_xymem {
 
 struct bcr_perip {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int start:8, pad2:8, sz:8, pad:8;
+	unsigned int start:8, pad2:8, sz:8, ver:8;
 #else
-	unsigned int pad:8, sz:8, pad2:8, start:8;
+	unsigned int ver:8, sz:8, pad2:8, start:8;
 #endif
 };
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 03484cb..87d18ae 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,33 +23,60 @@
 
 #define atomic_set(v, i) (((v)->counter) = (i))
 
-#ifdef CONFIG_ISA_ARCV2
-#define PREFETCHW	"	prefetchw   [%1]	\n"
-#else
-#define PREFETCHW
+#ifdef CONFIG_ARC_STAR_9000923308
+
+#define SCOND_FAIL_RETRY_VAR_DEF						\
+	unsigned int delay = 1, tmp;						\
+
+#define SCOND_FAIL_RETRY_ASM							\
+	"	bz	4f			\n"				\
+	"   ; --- scond fail delay ---		\n"				\
+	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
+	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
+	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
+	"	rol	%[delay], %[delay]	\n"	/* delay *= 2 */	\
+	"	b	1b			\n"	/* start over */	\
+	"4: ; --- success ---			\n"				\
+
+#define SCOND_FAIL_RETRY_VARS							\
+	  ,[delay] "+&r" (delay),[tmp] "=&r"	(tmp)				\
+
+#else	/* !CONFIG_ARC_STAR_9000923308 */
+
+#define SCOND_FAIL_RETRY_VAR_DEF
+
+#define SCOND_FAIL_RETRY_ASM							\
+	"	bnz     1b			\n"				\
+
+#define SCOND_FAIL_RETRY_VARS
+
 #endif
 
 #define ATOMIC_OP(op, c_op, asm_op)					\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
-	unsigned int temp;						\
+	unsigned int val;				                \
+	SCOND_FAIL_RETRY_VAR_DEF                                        \
 									\
 	__asm__ __volatile__(						\
-	"1:				\n"				\
-	PREFETCHW							\
-	"	llock   %0, [%1]	\n"				\
-	"	" #asm_op " %0, %0, %2	\n"				\
-	"	scond   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(temp)	/* Early clobber, to prevent reg reuse */	\
-	: "r"(&v->counter), "ir"(i)					\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	SCOND_FAIL_RETRY_ASM						\
+									\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	  SCOND_FAIL_RETRY_VARS						\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
 	: "cc");							\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
-	unsigned int temp;						\
+	unsigned int val;				                \
+	SCOND_FAIL_RETRY_VAR_DEF                                        \
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
@@ -58,19 +85,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	smp_mb();							\
 									\
 	__asm__ __volatile__(						\
-	"1:				\n"				\
-	PREFETCHW							\
-	"	llock   %0, [%1]	\n"				\
-	"	" #asm_op " %0, %0, %2	\n"				\
-	"	scond   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(temp)							\
-	: "r"(&v->counter), "ir"(i)					\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	SCOND_FAIL_RETRY_ASM						\
+									\
+	: [val]	"=&r"	(val)						\
+	  SCOND_FAIL_RETRY_VARS						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
 	: "cc");							\
 									\
 	smp_mb();							\
 									\
-	return temp;							\
+	return val;							\
 }
 
 #else	/* !CONFIG_ARC_HAS_LLSC */
@@ -150,6 +179,9 @@ ATOMIC_OP(and, &=, and)
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
 
 /**
  * __atomic_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 99fe118..57c1f33 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -50,8 +50,7 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
 	 * done for const @nr, but no code is generated due to gcc	\
 	 * const prop.							\
 	 */								\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
+	nr &= 0x1f;							\
 									\
 	__asm__ __volatile__(						\
 	"1:	llock       %0, [%1]		\n"			\
@@ -82,8 +81,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 									\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
+	nr &= 0x1f;							\
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
@@ -129,16 +127,13 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
 	unsigned long temp, flags;					\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	/*								\
 	 * spin lock/unlock provide the needed smp_mb() before/after	\
 	 */								\
 	bitops_lock(flags);						\
 									\
 	temp = *m;							\
-	*m = temp c_op (1UL << nr);					\
+	*m = temp c_op (1UL << (nr & 0x1f));					\
 									\
 	bitops_unlock(flags);						\
 }
@@ -149,17 +144,14 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 	unsigned long old, flags;					\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	bitops_lock(flags);						\
 									\
 	old = *m;							\
-	*m = old c_op (1 << nr);					\
+	*m = old c_op (1UL << (nr & 0x1f));				\
 									\
 	bitops_unlock(flags);						\
 									\
-	return (old & (1 << nr)) != 0;					\
+	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
 #endif /* CONFIG_ARC_HAS_LLSC */
@@ -174,11 +166,8 @@ static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)	\
 	unsigned long temp;						\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	temp = *m;							\
-	*m = temp c_op (1UL << nr);					\
+	*m = temp c_op (1UL << (nr & 0x1f));				\
 }
 
 #define __TEST_N_BIT_OP(op, c_op, asm_op)				\
@@ -187,13 +176,10 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long
 	unsigned long old;						\
 	m += nr >> 5;							\
 									\
-	if (__builtin_constant_p(nr))					\
-		nr &= 0x1f;						\
-									\
 	old = *m;							\
-	*m = old c_op (1 << nr);					\
+	*m = old c_op (1UL << (nr & 0x1f));				\
 									\
-	return (old & (1 << nr)) != 0;					\
+	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
 #define BIT_OPS(op, c_op, asm_op)					\
@@ -224,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr)
 
 	addr += nr >> 5;
 
-	if (__builtin_constant_p(nr))
-		nr &= 0x1f;
-
-	mask = 1 << nr;
+	mask = 1UL << (nr & 0x1f);
 
 	return ((mask & *addr) != 0);
 }
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 05b5aaf..70cfe16 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -16,12 +16,40 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
+#ifdef CONFIG_ARC_HAS_LLSC
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
+							\
+	__asm__ __volatile__(				\
+	"1:	llock	%1, [%2]		\n"	\
+		insn				"\n"	\
+	"2:	scond	%0, [%2]		\n"	\
+	"	bnz	1b			\n"	\
+	"	mov %0, 0			\n"	\
+	"3:					\n"	\
+	"	.section .fixup,\"ax\"		\n"	\
+	"	.align  4			\n"	\
+	"4:	mov %0, %4			\n"	\
+	"	b   3b				\n"	\
+	"	.previous			\n"	\
+	"	.section __ex_table,\"a\"	\n"	\
+	"	.align  4			\n"	\
+	"	.word   1b, 4b			\n"	\
+	"	.word   2b, 4b			\n"	\
+	"	.previous			\n"	\
+							\
+	: "=&r" (ret), "=&r" (oldval)			\
+	: "r" (uaddr), "r" (oparg), "ir" (-EFAULT)	\
+	: "cc", "memory")
+
+#else	/* !CONFIG_ARC_HAS_LLSC */
+
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
 							\
 	__asm__ __volatile__(				\
-	"1:	ld  %1, [%2]			\n"	\
+	"1:	ld	%1, [%2]		\n"	\
 		insn				"\n"	\
-	"2:	st  %0, [%2]			\n"	\
+	"2:	st	%0, [%2]		\n"	\
 	"	mov %0, 0			\n"	\
 	"3:					\n"	\
 	"	.section .fixup,\"ax\"		\n"	\
@@ -39,6 +67,8 @@
 	: "r" (uaddr), "r" (oparg), "ir" (-EFAULT)	\
 	: "cc", "memory")
 
+#endif
+
 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
@@ -123,11 +153,17 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
 
 	pagefault_disable();
 
-	/* TBD : can use llock/scond */
 	__asm__ __volatile__(
-	"1:	ld    %0, [%3]	\n"
-	"	brne  %0, %1, 3f	\n"
-	"2:	st    %2, [%3]	\n"
+#ifdef CONFIG_ARC_HAS_LLSC
+	"1:	llock	%0, [%3]		\n"
+	"	brne	%0, %1, 3f		\n"
+	"2:	scond	%2, [%3]		\n"
+	"	bnz	1b			\n"
+#else
+	"1:	ld	%0, [%3]		\n"
+	"	brne	%0, %1, 3f		\n"
+	"2:	st	%2, [%3]		\n"
+#endif
 	"3:	\n"
 	"	.section .fixup,\"ax\"	\n"
 	"4:	mov %0, %4	\n"
diff --git a/arch/arc/include/asm/mm-arch-hooks.h b/arch/arc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index c37541c..0000000
--- a/arch/arc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARC_MM_ARCH_HOOKS_H
-#define _ASM_ARC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ARC_MM_ARCH_HOOKS_H */
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 9175597..69095da 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -20,20 +20,20 @@
 struct pt_regs {
 
 	/* Real registers */
-	long bta;	/* bta_l1, bta_l2, erbta */
+	unsigned long bta;	/* bta_l1, bta_l2, erbta */
 
-	long lp_start, lp_end, lp_count;
+	unsigned long lp_start, lp_end, lp_count;
 
-	long status32;	/* status32_l1, status32_l2, erstatus */
-	long ret;	/* ilink1, ilink2 or eret */
-	long blink;
-	long fp;
-	long r26;	/* gp */
+	unsigned long status32;	/* status32_l1, status32_l2, erstatus */
+	unsigned long ret;	/* ilink1, ilink2 or eret */
+	unsigned long blink;
+	unsigned long fp;
+	unsigned long r26;	/* gp */
 
-	long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+	unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
 
-	long sp;	/* user/kernel sp depending on where we came from  */
-	long orig_r0;
+	unsigned long sp;	/* User/Kernel depending on where we came from */
+	unsigned long orig_r0;
 
 	/*
 	 * To distinguish bet excp, syscall, irq
@@ -55,13 +55,13 @@ struct pt_regs {
 		unsigned long event;
 	};
 
-	long user_r25;
+	unsigned long user_r25;
 };
 #else
 
 struct pt_regs {
 
-	long orig_r0;
+	unsigned long orig_r0;
 
 	union {
 		struct {
@@ -76,26 +76,26 @@ struct pt_regs {
 		unsigned long event;
 	};
 
-	long bta;	/* bta_l1, bta_l2, erbta */
+	unsigned long bta;	/* bta_l1, bta_l2, erbta */
 
-	long user_r25;
+	unsigned long user_r25;
 
-	long r26;	/* gp */
-	long fp;
-	long sp;	/* user/kernel sp depending on where we came from  */
+	unsigned long r26;	/* gp */
+	unsigned long fp;
+	unsigned long sp;	/* user/kernel sp depending on where we came from  */
 
-	long r12;
+	unsigned long r12;
 
 	/*------- Below list auto saved by h/w -----------*/
-	long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
+	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
 
-	long blink;
-	long lp_end, lp_start, lp_count;
+	unsigned long blink;
+	unsigned long lp_end, lp_start, lp_count;
 
-	long ei, ldi, jli;
+	unsigned long ei, ldi, jli;
 
-	long ret;
-	long status32;
+	unsigned long ret;
+	unsigned long status32;
 };
 
 #endif
@@ -103,7 +103,7 @@ struct pt_regs {
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
 struct callee_regs {
-	long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
+	unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
 #define instruction_pointer(regs)	((regs)->ret)
@@ -142,7 +142,7 @@ struct callee_regs {
 
 static inline long regs_return_value(struct pt_regs *regs)
 {
-	return regs->r0;
+	return (long)regs->r0;
 }
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index e1651df..db8c59d 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -18,9 +18,518 @@
 #define arch_spin_unlock_wait(x) \
 	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
 
+#ifdef CONFIG_ARC_HAS_LLSC
+
+/*
+ * A normal LLOCK/SCOND based system, w/o need for livelock workaround
+ */
+#ifndef CONFIG_ARC_STAR_9000923308
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+	unsigned int val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 1b	\n"	/* spin while LOCKED */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int val, got_it = 0;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 4f	\n"	/* already LOCKED, just bail */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bnz	1b			\n"
+	"	mov	%[got_it], 1		\n"
+	"4:					\n"
+	"					\n"
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+
+	lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+	smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * zero means writer holds the lock exclusively, deny Reader.
+	 * Otherwise grant lock to first/subseq reader
+	 *
+	 * 	if (rw->counter > 0) {
+	 *		rw->counter--;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 1b\n"	/* <= 0: spin while write locked */
+	"	sub	%[val], %[val], 1	\n"	/* reader lock */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 4f\n"	/* <= 0: already write locked, bail */
+	"	sub	%[val], %[val], 1	\n"	/* counter-- */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"	/* retry if collided with someone */
+	"	mov	%[got_it], 1		\n"
+	"					\n"
+	"4: ; --- done ---			\n"
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+	 * deny writer. Otherwise if unlocked grant to writer
+	 * Hence the claim that Linux rwlocks are unfair to writers.
+	 * (can be starved for an indefinite time by readers).
+	 *
+	 *	if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+	 *		rw->counter = 0;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 1b	\n"	/* while !UNLOCKED spin */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 4f	\n"	/* !UNLOCKED, bail */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"	/* retry if collided with someone */
+	"	mov	%[got_it], 1		\n"
+	"					\n"
+	"4: ; --- done ---			\n"
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * rw->counter++;
+	 */
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	add	%[val], %[val], 1	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter))
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	smp_mb();
+
+	rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+
+	smp_mb();
+}
+
+#else	/* CONFIG_ARC_STAR_9000923308 */
+
+/*
+ * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
+ * coherency transactions in the SCU. The exclusive line state keeps rotating
+ * among contenting cores leading to a never ending cycle. So break the cycle
+ * by deferring the retry of failed exclusive access (SCOND). The actual delay
+ * needed is function of number of contending cores as well as the unrelated
+ * coherency traffic from other cores. To keep the code simple, start off with
+ * small delay of 1 which would suffice most cases and in case of contention
+ * double the delay. Eventually the delay is sufficient such that the coherency
+ * pipeline is drained, thus a subsequent exclusive access would succeed.
+ */
+
+#define SCOND_FAIL_RETRY_VAR_DEF						\
+	unsigned int delay, tmp;						\
+
+#define SCOND_FAIL_RETRY_ASM							\
+	"   ; --- scond fail delay ---		\n"				\
+	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
+	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
+	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
+	"	rol	%[delay], %[delay]	\n"	/* delay *= 2 */	\
+	"	b	1b			\n"	/* start over */	\
+	"					\n"				\
+	"4: ; --- done ---			\n"				\
+
+#define SCOND_FAIL_RETRY_VARS							\
+	  ,[delay] "=&r" (delay), [tmp] "=&r"	(tmp)				\
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int val;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 0b	\n"	/* spin while LOCKED */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bz	4f			\n"	/* done */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val)
+	  SCOND_FAIL_RETRY_VARS
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int val, got_it = 0;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 4f	\n"	/* already LOCKED, just bail */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bz.d	4f			\n"
+	"	mov.z	%[got_it], 1		\n"	/* got it */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	  SCOND_FAIL_RETRY_VARS
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+
+	lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+	smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	/*
+	 * zero means writer holds the lock exclusively, deny Reader.
+	 * Otherwise grant lock to first/subseq reader
+	 *
+	 * 	if (rw->counter > 0) {
+	 *		rw->counter--;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 0b\n"	/* <= 0: spin while write locked */
+	"	sub	%[val], %[val], 1	\n"	/* reader lock */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz	4f			\n"	/* done */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 4f\n"	/* <= 0: already write locked, bail */
+	"	sub	%[val], %[val], 1	\n"	/* counter-- */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz.d	4f			\n"
+	"	mov.z	%[got_it], 1		\n"	/* got it */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	/*
+	 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+	 * deny writer. Otherwise if unlocked grant to writer
+	 * Hence the claim that Linux rwlocks are unfair to writers.
+	 * (can be starved for an indefinite time by readers).
+	 *
+	 *	if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+	 *		rw->counter = 0;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 0b	\n"	/* while !UNLOCKED spin */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz	4f			\n"
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 4f	\n"	/* !UNLOCKED, bail */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz.d	4f			\n"
+	"	mov.z	%[got_it], 1		\n"	/* got it */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * rw->counter++;
+	 */
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	add	%[val], %[val], 1	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter))
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+	 */
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	scond	%[UNLOCKED], [%[rwlock]]\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"r"	(__ARCH_RW_LOCK_UNLOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
+
+#endif	/* CONFIG_ARC_STAR_9000923308 */
+
+#else	/* !CONFIG_ARC_HAS_LLSC */
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
 
 	/*
 	 * This smp_mb() is technically superfluous, we only need the one
@@ -33,7 +542,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	__asm__ __volatile__(
 	"1:	ex  %0, [%1]		\n"
 	"	breq  %0, %2, 1b	\n"
-	: "+&r" (tmp)
+	: "+&r" (val)
 	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
 	: "memory");
 
@@ -48,26 +557,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	smp_mb();
 }
 
+/* 1 - lock taken successfully */
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-	unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+	unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
 
 	smp_mb();
 
 	__asm__ __volatile__(
 	"1:	ex  %0, [%1]		\n"
-	: "+r" (tmp)
+	: "+r" (val)
 	: "r"(&(lock->slock))
 	: "memory");
 
 	smp_mb();
 
-	return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
+	return (val == __ARCH_SPIN_LOCK_UNLOCKED__);
 }
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+	unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__;
 
 	/*
 	 * RELEASE barrier: given the instructions avail on ARCv2, full barrier
@@ -77,7 +587,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 	__asm__ __volatile__(
 	"	ex  %0, [%1]		\n"
-	: "+r" (tmp)
+	: "+r" (val)
 	: "r"(&(lock->slock))
 	: "memory");
 
@@ -90,19 +600,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 /*
  * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
  *
  * The spinlock itself is contained in @counter and access to it is
  * serialized with @lock_mutex.
- *
- * Unfair locking as Writers could be starved indefinitely by Reader(s)
  */
 
-/* Would read_trylock() succeed? */
-#define arch_read_can_lock(x)	((x)->counter > 0)
-
-/* Would write_trylock() succeed? */
-#define arch_write_can_lock(x)	((x)->counter == __ARCH_RW_LOCK_UNLOCKED__)
-
 /* 1 - lock taken successfully */
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
@@ -173,6 +676,11 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	arch_spin_unlock(&(rw->lock_mutex));
 }
 
+#endif
+
+#define arch_read_can_lock(x)	((x)->counter > 0)
+#define arch_write_can_lock(x)	((x)->counter == __ARCH_RW_LOCK_UNLOCKED__)
+
 #define arch_read_lock_flags(lock, flags)	arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags)	arch_write_lock(lock)
 
diff --git a/arch/arc/include/asm/spinlock_types.h b/arch/arc/include/asm/spinlock_types.h
index 662627c..4e1ef5f 100644
--- a/arch/arc/include/asm/spinlock_types.h
+++ b/arch/arc/include/asm/spinlock_types.h
@@ -26,7 +26,9 @@ typedef struct {
  */
 typedef struct {
 	volatile unsigned int	counter;
+#ifndef CONFIG_ARC_HAS_LLSC
 	arch_spinlock_t		lock_mutex;
+#endif
 } arch_rwlock_t;
 
 #define __ARCH_RW_LOCK_UNLOCKED__	0x01000000
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 76a7739..0b3ef63 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -32,20 +32,20 @@
 */
 struct user_regs_struct {
 
-	long pad;
+	unsigned long pad;
 	struct {
-		long bta, lp_start, lp_end, lp_count;
-		long status32, ret, blink, fp, gp;
-		long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
-		long sp;
+		unsigned long bta, lp_start, lp_end, lp_count;
+		unsigned long status32, ret, blink, fp, gp;
+		unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+		unsigned long sp;
 	} scratch;
-	long pad2;
+	unsigned long pad2;
 	struct {
-		long r25, r24, r23, r22, r21, r20;
-		long r19, r18, r17, r16, r15, r14, r13;
+		unsigned long r25, r24, r23, r22, r21, r20;
+		unsigned long r19, r18, r17, r16, r15, r14, r13;
 	} callee;
-	long efa;	/* break pt addr, for break points in delay slots */
-	long stop_pc;	/* give dbg stop_pc after ensuring brkpt trap */
+	unsigned long efa;	/* break pt addr, for break points in delay slots */
+	unsigned long stop_pc;	/* give dbg stop_pc after ensuring brkpt trap */
 };
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 6208c63..26c1568 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -12,7 +12,6 @@
 #include <linux/of.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
 #include <asm/irq.h>
 
 /*
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index fcdddb6..039fac3 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -12,7 +12,6 @@
 #include <linux/of.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
 #include <asm/irq.h>
 
 /*
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 30284e8..2fb8658 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -175,7 +175,6 @@ void mcip_init_early_smp(void)
 #include <linux/irqchip.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include "../../drivers/irqchip/irqchip.h"
 
 /*
  * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
@@ -218,11 +217,28 @@ static void idu_irq_unmask(struct irq_data *data)
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 }
 
+#ifdef CONFIG_SMP
 static int
-idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f)
+idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
+		     bool force)
 {
+	unsigned long flags;
+	cpumask_t online;
+
+	/* errout if no online cpu per @cpumask */
+	if (!cpumask_and(&online, cpumask, cpu_online_mask))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	idu_set_dest(data->hwirq, cpumask_bits(&online)[0]);
+	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
 	return IRQ_SET_MASK_OK;
 }
+#endif
 
 static struct irq_chip idu_irq_chip = {
 	.name			= "MCIP IDU Intc",
@@ -330,8 +346,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
 		if (!i)
 			idu_first_irq = irq;
 
-		irq_set_handler_data(irq, domain);
-		irq_set_chained_handler(irq, idu_cascade_isr);
+		irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain);
 	}
 
 	__mcip_cmd(CMD_IDU_ENABLE, 0);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index a3d1862..cabde9d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -47,6 +47,7 @@ static void read_arc_build_cfg_regs(void)
 	struct bcr_perip uncached_space;
 	struct bcr_generic bcr;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	unsigned long perip_space;
 	FIX_PTR(cpu);
 
 	READ_BCR(AUX_IDENTITY, cpu->core);
@@ -56,7 +57,12 @@ static void read_arc_build_cfg_regs(void)
 	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
 	READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
-	BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE);
+        if (uncached_space.ver < 3)
+		perip_space = uncached_space.start << 24;
+	else
+		perip_space = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
+
+	BUG_ON(perip_space != ARC_UNCACHED_ADDR_SPACE);
 
 	READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
 
@@ -142,17 +148,22 @@ static void read_arc_build_cfg_regs(void)
 }
 
 static const struct cpuinfo_data arc_cpu_tbl[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
 	{ {0x20, "ARC 600"      }, 0x2F},
 	{ {0x30, "ARC 700"      }, 0x33},
 	{ {0x34, "ARC 700 R4.10"}, 0x34},
 	{ {0x35, "ARC 700 R4.11"}, 0x35},
-	{ {0x50, "ARC HS38"	}, 0x51},
+#else
+	{ {0x50, "ARC HS38 R2.0"}, 0x51},
+	{ {0x52, "ARC HS38 R2.1"}, 0x52},
+#endif
 	{ {0x00, NULL		} }
 };
 
-#define IS_AVAIL1(v, str)	((v) ? str : "")
-#define IS_USED(cfg)		(IS_ENABLED(cfg) ? "" : "(not used) ")
-#define IS_AVAIL2(v, str, cfg)  IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg))
+#define IS_AVAIL1(v, s)		((v) ? s : "")
+#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
+#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
+#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 
 static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
@@ -226,7 +237,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 			n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
 		}
 		n += scnprintf(buf + n, len - n, "%s",
-			       IS_USED(CONFIG_ARC_HAS_HW_MPY));
+			       IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY));
 	}
 
 	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
@@ -325,6 +336,10 @@ static void arc_chk_core_config(void)
 		pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
 	else if (!cpu->extn.fpu_dp && fpu_enabled)
 		panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
+
+	if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+	    !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
+		panic("llock/scond livelock workaround missing\n");
 }
 
 /*
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 3364d2b..4294761 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -203,34 +203,24 @@ static int arc_clkevent_set_next_event(unsigned long delta,
 	return 0;
 }
 
-static void arc_clkevent_set_mode(enum clock_event_mode mode,
-				  struct clock_event_device *dev)
+static int arc_clkevent_set_periodic(struct clock_event_device *dev)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-                /*
-                 * At X Hz, 1 sec = 1000ms -> X cycles;
-                 *                    10ms -> X / 100 cycles
-                 */
-		arc_timer_event_setup(arc_get_core_freq() / HZ);
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-		break;
-	default:
-		break;
-	}
-
-	return;
+	/*
+	 * At X Hz, 1 sec = 1000ms -> X cycles;
+	 *		      10ms -> X / 100 cycles
+	 */
+	arc_timer_event_setup(arc_get_core_freq() / HZ);
+	return 0;
 }
 
 static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
-	.name		= "ARC Timer0",
-	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
-	.mode		= CLOCK_EVT_MODE_UNUSED,
-	.rating		= 300,
-	.irq		= TIMER0_IRQ,	/* hardwired, no need for resources */
-	.set_next_event = arc_clkevent_set_next_event,
-	.set_mode	= arc_clkevent_set_mode,
+	.name			= "ARC Timer0",
+	.features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_PERIODIC,
+	.rating			= 300,
+	.irq			= TIMER0_IRQ,	/* hardwired, no need for resources */
+	.set_next_event		= arc_clkevent_set_next_event,
+	.set_state_periodic	= arc_clkevent_set_periodic,
 };
 
 static irqreturn_t timer_irq_handler(int irq, void *dev_id)
@@ -240,7 +230,7 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id)
 	 * irq_set_chip_and_handler() asked for handle_percpu_devid_irq()
 	 */
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
-	int irq_reenable = evt->mode == CLOCK_EVT_MODE_PERIODIC;
+	int irq_reenable = clockevent_state_periodic(evt);
 
 	/*
 	 * Any write to CTRL reg ACks the interrupt, we rewrite the
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 807f7d6..a6f91e8 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -58,7 +58,6 @@ static void show_callee_regs(struct callee_regs *cregs)
 
 static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 {
-	struct path path;
 	char *path_nm = NULL;
 	struct mm_struct *mm;
 	struct file *exe_file;
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
index 1b2b3ac..0cab0b8 100644
--- a/arch/arc/lib/memcpy-archs.S
+++ b/arch/arc/lib/memcpy-archs.S
@@ -206,7 +206,7 @@ unalignedOffby3:
 	ld.ab	r6, [r1, 4]
 	prefetch [r1, 28]	;Prefetch the next read location
 	ld.ab	r8, [r1,4]
-	prefetch [r3, 32]	;Prefetch the next write location
+	prefetchw [r3, 32]	;Prefetch the next write location
 
 	SHIFT_1	(r7, r6, 8)
 	or	r7, r7, r5
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index 92d573c..365b183 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -10,12 +10,6 @@
 
 #undef PREALLOC_NOT_AVAIL
 
-#ifdef PREALLOC_NOT_AVAIL
-#define PREWRITE(A,B)	prefetchw [(A),(B)]
-#else
-#define PREWRITE(A,B)	prealloc [(A),(B)]
-#endif
-
 ENTRY(memset)
 	prefetchw [r0]		; Prefetch the write location
 	mov.f	0, r2
@@ -51,9 +45,15 @@ ENTRY(memset)
 
 ;;; Convert len to Dwords, unfold x8
 	lsr.f	lp_count, lp_count, 6
+
 	lpnz	@.Lset64bytes
 	;; LOOP START
-	PREWRITE(r3, 64)	;Prefetch the next write location
+#ifdef PREALLOC_NOT_AVAIL
+	prefetchw [r3, 64]	;Prefetch the next write location
+#else
+	prealloc  [r3, 64]
+#endif
+#ifdef CONFIG_ARC_HAS_LL64
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
@@ -62,16 +62,45 @@ ENTRY(memset)
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
+#else
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+#endif
 .Lset64bytes:
 
 	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
 	lpnz	.Lset32bytes
 	;; LOOP START
 	prefetchw   [r3, 32]	;Prefetch the next write location
+#ifdef CONFIG_ARC_HAS_LL64
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
+#else
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+#endif
 .Lset32bytes:
 
 	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index b29d62e..1cd6695 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -468,10 +468,18 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
+	/*
+	 * SLC is shared between all cores and concurrent aux operations from
+	 * multiple cores need to be serialized using a spinlock
+	 * A concurrent operation can be silently ignored and/or the old/new
+	 * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+	 * below)
+	 */
+	static DEFINE_SPINLOCK(lock);
 	unsigned long flags;
 	unsigned int ctrl;
 
-	local_irq_save(flags);
+	spin_lock_irqsave(&lock, flags);
 
 	/*
 	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
@@ -504,7 +512,7 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
 
 	while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
 
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&lock, flags);
 #endif
 }
 
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 74a637a..57706a9 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -60,8 +60,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 
 	/* This is kernel Virtual address (0x7000_0000 based) */
 	kvaddr = ioremap_nocache((unsigned long)paddr, size);
-	if (kvaddr != NULL)
-		memset(kvaddr, 0, size);
+	if (kvaddr == NULL)
+		return NULL;
 
 	/* This is bus address, platform dependent */
 	*dma_handle = (dma_addr_t)paddr;
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 99f7da5..e7769c3 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -389,6 +389,21 @@ axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
 
 static void __init axs103_early_init(void)
 {
+	/*
+	 * AXS103 configurations for SMP/QUAD configurations share device tree
+	 * which defaults to 90 MHz. However recent failures of Quad config
+	 * revealed P&R timing violations so clamp it down to safe 50 MHz
+	 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
+	 *
+	 * This hack is really hacky as of now. Fix it properly by getting the
+	 * number of cores as return value of platform's early SMP callback
+	 */
+#ifdef CONFIG_ARC_MCIP
+	unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
+	if (num_cores > 2)
+		arc_set_core_freq(50 * 1000000);
+#endif
+
 	switch (arc_get_core_freq()/1000000) {
 	case 33:
 		axs103_set_freq(1, 1, 1);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a750c14..1c50210 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1693,6 +1693,12 @@ config HIGHMEM
 config HIGHPTE
 	bool "Allocate 2nd-level pagetables from highmem"
 	depends on HIGHMEM
+	help
+	  The VM uses one page of physical memory for each page table.
+	  For systems with a lot of processes, this can use a lot of
+	  precious low memory, eventually leading to low memory being
+	  consumed by page tables.  Setting this option will allow
+	  user-space 2nd level page tables to reside in high memory.
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index f1b1579..a2e16f9 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1635,7 +1635,7 @@ config PID_IN_CONTEXTIDR
 
 config DEBUG_SET_MODULE_RONX
 	bool "Set loadable kernel module data as NX and text as RO"
-	depends on MODULES
+	depends on MODULES && MMU
 	---help---
 	  This option helps catch unintended modifications to loadable
 	  kernel module's text and read-only data. It also prevents execution
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 07ab3d2..7451b44 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -312,6 +312,9 @@ INSTALL_TARGETS	= zinstall uinstall install
 
 PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
 
+bootpImage uImage: zImage
+zImage: Image
+
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
 
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 901739f..5c42d25 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -80,3 +80,7 @@
 		status = "okay";
 	};
 };
+
+&rtc {
+	system-power-controller;
+};
diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts
index 0d35ab6..7106114 100644
--- a/arch/arm/boot/dts/am335x-pepper.dts
+++ b/arch/arm/boot/dts/am335x-pepper.dts
@@ -74,6 +74,7 @@
 	audio_codec: tlv320aic3106@1b {
 		compatible = "ti,tlv320aic3106";
 		reg = <0x1b>;
+		ai3x-micbias-vg = <0x2>;
 	};
 
 	accel: lis331dlh@1d {
@@ -153,7 +154,7 @@
 	ti,audio-routing =
 		"Headphone Jack",	"HPLOUT",
 		"Headphone Jack",	"HPROUT",
-		"LINE1L",		"Line In";
+		"MIC3L",		"Mic3L Switch";
 };
 
 &mcasp0 {
@@ -438,41 +439,50 @@
 	regulators {
 		dcdc1_reg: regulator@0 {
 			/* VDD_1V8 system supply */
+			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
 			/* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
-			regulator-max-microvolt = <1325000>;
+			regulator-max-microvolt = <1150000>;
 			regulator-boot-on;
+			regulator-always-on;
 		};
 
 		dcdc3_reg: regulator@2 {
 			/* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
-			regulator-max-microvolt = <1150000>;
+			regulator-max-microvolt = <1325000>;
 			regulator-boot-on;
+			regulator-always-on;
 		};
 
 		ldo1_reg: regulator@3 {
 			/* VRTC 1.8V always-on supply */
+			regulator-name = "vrtc,vdds";
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
 			/* 3.3V rail */
+			regulator-name = "vdd_3v3aux";
+			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
 			/* VDD_3V3A 3.3V rail */
+			regulator-name = "vdd_3v3a";
 			regulator-min-microvolt = <3300000>;
 			regulator-max-microvolt = <3300000>;
 		};
 
 		ldo4_reg: regulator@6 {
 			/* VDD_3V3B 3.3V rail */
+			regulator-name = "vdd_3v3b";
+			regulator-always-on;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 21fcc44..b76f9a2 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -210,7 +210,7 @@
 		};
 
 		uart0: serial@44e09000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart1";
 			clock-frequency = <48000000>;
 			reg = <0x44e09000 0x2000>;
@@ -221,7 +221,7 @@
 		};
 
 		uart1: serial@48022000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 			reg = <0x48022000 0x2000>;
@@ -232,7 +232,7 @@
 		};
 
 		uart2: serial@48024000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 			reg = <0x48024000 0x2000>;
@@ -243,7 +243,7 @@
 		};
 
 		uart3: serial@481a6000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
 			reg = <0x481a6000 0x2000>;
@@ -252,7 +252,7 @@
 		};
 
 		uart4: serial@481a8000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart5";
 			clock-frequency = <48000000>;
 			reg = <0x481a8000 0x2000>;
@@ -261,7 +261,7 @@
 		};
 
 		uart5: serial@481aa000 {
-			compatible = "ti,omap3-uart";
+			compatible = "ti,am3352-uart", "ti,omap3-uart";
 			ti,hwmods = "uart6";
 			clock-frequency = <48000000>;
 			reg = <0x481aa000 0x2000>;
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index c80a3e2..ade28c79 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -132,6 +132,12 @@
 			};
 		};
 
+		emif: emif@4c000000 {
+			compatible = "ti,emif-am4372";
+			reg = <0x4c000000 0x1000000>;
+			ti,hwmods = "emif";
+		};
+
 		edma: edma@49000000 {
 			compatible = "ti,edma3";
 			ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2";
@@ -941,6 +947,7 @@
 				ti,hwmods = "dss_rfbi";
 				clocks = <&disp_clk>;
 				clock-names = "fck";
+				status = "disabled";
 			};
 		};
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index a42cc37..f9a4b31 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -605,6 +605,10 @@
 	phy-supply = <&ldousb_reg>;
 };
 
+&usb2_phy2 {
+	phy-supply = <&ldousb_reg>;
+};
+
 &usb1 {
 	dr_mode = "host";
 	pinctrl-names = "default";
@@ -689,3 +693,7 @@
 		};
 	};
 };
+
+&pcie1 {
+	gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
+};
diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi
index 5dfd3a4..3e21311 100644
--- a/arch/arm/boot/dts/atlas7.dtsi
+++ b/arch/arm/boot/dts/atlas7.dtsi
@@ -135,6 +135,1025 @@
 			compatible = "sirf,atlas7-ioc";
 			reg = <0x18880000 0x1000>,
 				<0x10E40000 0x1000>;
+
+			audio_ac97_pmx: audio_ac97@0 {
+				audio_ac97 {
+					groups = "audio_ac97_grp";
+					function = "audio_ac97";
+				};
+			};
+
+			audio_func_dbg_pmx: audio_func_dbg@0 {
+				audio_func_dbg {
+					groups = "audio_func_dbg_grp";
+					function = "audio_func_dbg";
+				};
+			};
+
+			audio_i2s_pmx: audio_i2s@0 {
+				audio_i2s {
+					groups = "audio_i2s_grp";
+					function = "audio_i2s";
+				};
+			};
+
+			audio_i2s_2ch_pmx: audio_i2s_2ch@0 {
+				audio_i2s_2ch {
+					groups = "audio_i2s_2ch_grp";
+					function = "audio_i2s_2ch";
+				};
+			};
+
+			audio_i2s_extclk_pmx: audio_i2s_extclk@0 {
+				audio_i2s_extclk {
+					groups = "audio_i2s_extclk_grp";
+					function = "audio_i2s_extclk";
+				};
+			};
+
+			audio_uart0_pmx: audio_uart0@0 {
+				audio_uart0 {
+					groups = "audio_uart0_grp";
+					function = "audio_uart0";
+				};
+			};
+
+			audio_uart1_pmx: audio_uart1@0 {
+				audio_uart1 {
+					groups = "audio_uart1_grp";
+					function = "audio_uart1";
+				};
+			};
+
+			audio_uart2_pmx0: audio_uart2@0 {
+				audio_uart2_0 {
+					groups = "audio_uart2_grp0";
+					function = "audio_uart2_m0";
+				};
+			};
+
+			audio_uart2_pmx1: audio_uart2@1 {
+				audio_uart2_1 {
+					groups = "audio_uart2_grp1";
+					function = "audio_uart2_m1";
+				};
+			};
+
+			c_can_trnsvr_pmx: c_can_trnsvr@0 {
+				c_can_trnsvr {
+					groups = "c_can_trnsvr_grp";
+					function = "c_can_trnsvr";
+				};
+			};
+
+			c0_can_pmx0: c0_can@0 {
+				c0_can_0 {
+					groups = "c0_can_grp0";
+					function = "c0_can_m0";
+				};
+			};
+
+			c0_can_pmx1: c0_can@1 {
+				c0_can_1 {
+					groups = "c0_can_grp1";
+					function = "c0_can_m1";
+				};
+			};
+
+			c1_can_pmx0: c1_can@0 {
+				c1_can_0 {
+					groups = "c1_can_grp0";
+					function = "c1_can_m0";
+				};
+			};
+
+			c1_can_pmx1: c1_can@1 {
+				c1_can_1 {
+					groups = "c1_can_grp1";
+					function = "c1_can_m1";
+				};
+			};
+
+			c1_can_pmx2: c1_can@2 {
+				c1_can_2 {
+					groups = "c1_can_grp2";
+					function = "c1_can_m2";
+				};
+			};
+
+			ca_audio_lpc_pmx: ca_audio_lpc@0 {
+				ca_audio_lpc {
+					groups = "ca_audio_lpc_grp";
+					function = "ca_audio_lpc";
+				};
+			};
+
+			ca_bt_lpc_pmx: ca_bt_lpc@0 {
+				ca_bt_lpc {
+					groups = "ca_bt_lpc_grp";
+					function = "ca_bt_lpc";
+				};
+			};
+
+			ca_coex_pmx: ca_coex@0 {
+				ca_coex {
+					groups = "ca_coex_grp";
+					function = "ca_coex";
+				};
+			};
+
+			ca_curator_lpc_pmx: ca_curator_lpc@0 {
+				ca_curator_lpc {
+					groups = "ca_curator_lpc_grp";
+					function = "ca_curator_lpc";
+				};
+			};
+
+			ca_pcm_debug_pmx: ca_pcm_debug@0 {
+				ca_pcm_debug {
+					groups = "ca_pcm_debug_grp";
+					function = "ca_pcm_debug";
+				};
+			};
+
+			ca_pio_pmx: ca_pio@0 {
+				ca_pio {
+					groups = "ca_pio_grp";
+					function = "ca_pio";
+				};
+			};
+
+			ca_sdio_debug_pmx: ca_sdio_debug@0 {
+				ca_sdio_debug {
+					groups = "ca_sdio_debug_grp";
+					function = "ca_sdio_debug";
+				};
+			};
+
+			ca_spi_pmx: ca_spi@0 {
+				ca_spi {
+					groups = "ca_spi_grp";
+					function = "ca_spi";
+				};
+			};
+
+			ca_trb_pmx: ca_trb@0 {
+				ca_trb {
+					groups = "ca_trb_grp";
+					function = "ca_trb";
+				};
+			};
+
+			ca_uart_debug_pmx: ca_uart_debug@0 {
+				ca_uart_debug {
+					groups = "ca_uart_debug_grp";
+					function = "ca_uart_debug";
+				};
+			};
+
+			clkc_pmx0: clkc@0 {
+				clkc_0 {
+					groups = "clkc_grp0";
+					function = "clkc_m0";
+				};
+			};
+
+			clkc_pmx1: clkc@1 {
+				clkc_1 {
+					groups = "clkc_grp1";
+					function = "clkc_m1";
+				};
+			};
+
+			gn_gnss_i2c_pmx: gn_gnss_i2c@0 {
+				gn_gnss_i2c {
+					groups = "gn_gnss_i2c_grp";
+					function = "gn_gnss_i2c";
+				};
+			};
+
+			gn_gnss_uart_nopause_pmx: gn_gnss_uart_nopause@0 {
+				gn_gnss_uart_nopause {
+					groups = "gn_gnss_uart_nopause_grp";
+					function = "gn_gnss_uart_nopause";
+				};
+			};
+
+			gn_gnss_uart_pmx: gn_gnss_uart@0 {
+				gn_gnss_uart {
+					groups = "gn_gnss_uart_grp";
+					function = "gn_gnss_uart";
+				};
+			};
+
+			gn_trg_spi_pmx0: gn_trg_spi@0 {
+				gn_trg_spi_0 {
+					groups = "gn_trg_spi_grp0";
+					function = "gn_trg_spi_m0";
+				};
+			};
+
+			gn_trg_spi_pmx1: gn_trg_spi@1 {
+				gn_trg_spi_1 {
+					groups = "gn_trg_spi_grp1";
+					function = "gn_trg_spi_m1";
+				};
+			};
+
+			cvbs_dbg_pmx: cvbs_dbg@0 {
+				cvbs_dbg {
+					groups = "cvbs_dbg_grp";
+					function = "cvbs_dbg";
+				};
+			};
+
+			cvbs_dbg_test_pmx0: cvbs_dbg_test@0 {
+				cvbs_dbg_test_0 {
+					groups = "cvbs_dbg_test_grp0";
+					function = "cvbs_dbg_test_m0";
+				};
+			};
+
+			cvbs_dbg_test_pmx1: cvbs_dbg_test@1 {
+				cvbs_dbg_test_1 {
+					groups = "cvbs_dbg_test_grp1";
+					function = "cvbs_dbg_test_m1";
+				};
+			};
+
+			cvbs_dbg_test_pmx2: cvbs_dbg_test@2 {
+				cvbs_dbg_test_2 {
+					groups = "cvbs_dbg_test_grp2";
+					function = "cvbs_dbg_test_m2";
+				};
+			};
+
+			cvbs_dbg_test_pmx3: cvbs_dbg_test@3 {
+				cvbs_dbg_test_3 {
+					groups = "cvbs_dbg_test_grp3";
+					function = "cvbs_dbg_test_m3";
+				};
+			};
+
+			cvbs_dbg_test_pmx4: cvbs_dbg_test@4 {
+				cvbs_dbg_test_4 {
+					groups = "cvbs_dbg_test_grp4";
+					function = "cvbs_dbg_test_m4";
+				};
+			};
+
+			cvbs_dbg_test_pmx5: cvbs_dbg_test@5 {
+				cvbs_dbg_test_5 {
+					groups = "cvbs_dbg_test_grp5";
+					function = "cvbs_dbg_test_m5";
+				};
+			};
+
+			cvbs_dbg_test_pmx6: cvbs_dbg_test@6 {
+				cvbs_dbg_test_6 {
+					groups = "cvbs_dbg_test_grp6";
+					function = "cvbs_dbg_test_m6";
+				};
+			};
+
+			cvbs_dbg_test_pmx7: cvbs_dbg_test@7 {
+				cvbs_dbg_test_7 {
+					groups = "cvbs_dbg_test_grp7";
+					function = "cvbs_dbg_test_m7";
+				};
+			};
+
+			cvbs_dbg_test_pmx8: cvbs_dbg_test@8 {
+				cvbs_dbg_test_8 {
+					groups = "cvbs_dbg_test_grp8";
+					function = "cvbs_dbg_test_m8";
+				};
+			};
+
+			cvbs_dbg_test_pmx9: cvbs_dbg_test@9 {
+				cvbs_dbg_test_9 {
+					groups = "cvbs_dbg_test_grp9";
+					function = "cvbs_dbg_test_m9";
+				};
+			};
+
+			cvbs_dbg_test_pmx10: cvbs_dbg_test@10 {
+				cvbs_dbg_test_10 {
+					groups = "cvbs_dbg_test_grp10";
+					function = "cvbs_dbg_test_m10";
+				};
+			};
+
+			cvbs_dbg_test_pmx11: cvbs_dbg_test@11 {
+				cvbs_dbg_test_11 {
+					groups = "cvbs_dbg_test_grp11";
+					function = "cvbs_dbg_test_m11";
+				};
+			};
+
+			cvbs_dbg_test_pmx12: cvbs_dbg_test@12 {
+				cvbs_dbg_test_12 {
+					groups = "cvbs_dbg_test_grp12";
+					function = "cvbs_dbg_test_m12";
+				};
+			};
+
+			cvbs_dbg_test_pmx13: cvbs_dbg_test@13 {
+				cvbs_dbg_test_13 {
+					groups = "cvbs_dbg_test_grp13";
+					function = "cvbs_dbg_test_m13";
+				};
+			};
+
+			cvbs_dbg_test_pmx14: cvbs_dbg_test@14 {
+				cvbs_dbg_test_14 {
+					groups = "cvbs_dbg_test_grp14";
+					function = "cvbs_dbg_test_m14";
+				};
+			};
+
+			cvbs_dbg_test_pmx15: cvbs_dbg_test@15 {
+				cvbs_dbg_test_15 {
+					groups = "cvbs_dbg_test_grp15";
+					function = "cvbs_dbg_test_m15";
+				};
+			};
+
+			gn_gnss_power_pmx: gn_gnss_power@0 {
+				gn_gnss_power {
+					groups = "gn_gnss_power_grp";
+					function = "gn_gnss_power";
+				};
+			};
+
+			gn_gnss_sw_status_pmx: gn_gnss_sw_status@0 {
+				gn_gnss_sw_status {
+					groups = "gn_gnss_sw_status_grp";
+					function = "gn_gnss_sw_status";
+				};
+			};
+
+			gn_gnss_eclk_pmx: gn_gnss_eclk@0 {
+				gn_gnss_eclk {
+					groups = "gn_gnss_eclk_grp";
+					function = "gn_gnss_eclk";
+				};
+			};
+
+			gn_gnss_irq1_pmx0: gn_gnss_irq1@0 {
+				gn_gnss_irq1_0 {
+					groups = "gn_gnss_irq1_grp0";
+					function = "gn_gnss_irq1_m0";
+				};
+			};
+
+			gn_gnss_irq2_pmx0: gn_gnss_irq2@0 {
+				gn_gnss_irq2_0 {
+					groups = "gn_gnss_irq2_grp0";
+					function = "gn_gnss_irq2_m0";
+				};
+			};
+
+			gn_gnss_tm_pmx: gn_gnss_tm@0 {
+				gn_gnss_tm {
+					groups = "gn_gnss_tm_grp";
+					function = "gn_gnss_tm";
+				};
+			};
+
+			gn_gnss_tsync_pmx: gn_gnss_tsync@0 {
+				gn_gnss_tsync {
+					groups = "gn_gnss_tsync_grp";
+					function = "gn_gnss_tsync";
+				};
+			};
+
+			gn_io_gnsssys_sw_cfg_pmx: gn_io_gnsssys_sw_cfg@0 {
+				gn_io_gnsssys_sw_cfg {
+					groups = "gn_io_gnsssys_sw_cfg_grp";
+					function = "gn_io_gnsssys_sw_cfg";
+				};
+			};
+
+			gn_trg_pmx0: gn_trg@0 {
+				gn_trg_0 {
+					groups = "gn_trg_grp0";
+					function = "gn_trg_m0";
+				};
+			};
+
+			gn_trg_pmx1: gn_trg@1 {
+				gn_trg_1 {
+					groups = "gn_trg_grp1";
+					function = "gn_trg_m1";
+				};
+			};
+
+			gn_trg_shutdown_pmx0: gn_trg_shutdown@0 {
+				gn_trg_shutdown_0 {
+					groups = "gn_trg_shutdown_grp0";
+					function = "gn_trg_shutdown_m0";
+				};
+			};
+
+			gn_trg_shutdown_pmx1: gn_trg_shutdown@1 {
+				gn_trg_shutdown_1 {
+					groups = "gn_trg_shutdown_grp1";
+					function = "gn_trg_shutdown_m1";
+				};
+			};
+
+			gn_trg_shutdown_pmx2: gn_trg_shutdown@2 {
+				gn_trg_shutdown_2 {
+					groups = "gn_trg_shutdown_grp2";
+					function = "gn_trg_shutdown_m2";
+				};
+			};
+
+			gn_trg_shutdown_pmx3: gn_trg_shutdown@3 {
+				gn_trg_shutdown_3 {
+					groups = "gn_trg_shutdown_grp3";
+					function = "gn_trg_shutdown_m3";
+				};
+			};
+
+			i2c0_pmx: i2c0@0 {
+				i2c0 {
+					groups = "i2c0_grp";
+					function = "i2c0";
+				};
+			};
+
+			i2c1_pmx: i2c1@0 {
+				i2c1 {
+					groups = "i2c1_grp";
+					function = "i2c1";
+				};
+			};
+
+			jtag_pmx0: jtag@0 {
+				jtag_0 {
+					groups = "jtag_grp0";
+					function = "jtag_m0";
+				};
+			};
+
+			ks_kas_spi_pmx0: ks_kas_spi@0 {
+				ks_kas_spi_0 {
+					groups = "ks_kas_spi_grp0";
+					function = "ks_kas_spi_m0";
+				};
+			};
+
+			ld_ldd_pmx: ld_ldd@0 {
+				ld_ldd {
+					groups = "ld_ldd_grp";
+					function = "ld_ldd";
+				};
+			};
+
+			ld_ldd_16bit_pmx: ld_ldd_16bit@0 {
+				ld_ldd_16bit {
+					groups = "ld_ldd_16bit_grp";
+					function = "ld_ldd_16bit";
+				};
+			};
+
+			ld_ldd_fck_pmx: ld_ldd_fck@0 {
+				ld_ldd_fck {
+					groups = "ld_ldd_fck_grp";
+					function = "ld_ldd_fck";
+				};
+			};
+
+			ld_ldd_lck_pmx: ld_ldd_lck@0 {
+				ld_ldd_lck {
+					groups = "ld_ldd_lck_grp";
+					function = "ld_ldd_lck";
+				};
+			};
+
+			lr_lcdrom_pmx: lr_lcdrom@0 {
+				lr_lcdrom {
+					groups = "lr_lcdrom_grp";
+					function = "lr_lcdrom";
+				};
+			};
+
+			lvds_analog_pmx: lvds_analog@0 {
+				lvds_analog {
+					groups = "lvds_analog_grp";
+					function = "lvds_analog";
+				};
+			};
+
+			nd_df_pmx: nd_df@0 {
+				nd_df {
+					groups = "nd_df_grp";
+					function = "nd_df";
+				};
+			};
+
+			nd_df_nowp_pmx: nd_df_nowp@0 {
+				nd_df_nowp {
+					groups = "nd_df_nowp_grp";
+					function = "nd_df_nowp";
+				};
+			};
+
+			ps_pmx: ps@0 {
+				ps {
+					groups = "ps_grp";
+					function = "ps";
+				};
+			};
+
+			pwc_core_on_pmx: pwc_core_on@0 {
+				pwc_core_on {
+					groups = "pwc_core_on_grp";
+					function = "pwc_core_on";
+				};
+			};
+
+			pwc_ext_on_pmx: pwc_ext_on@0 {
+				pwc_ext_on {
+					groups = "pwc_ext_on_grp";
+					function = "pwc_ext_on";
+				};
+			};
+
+			pwc_gpio3_clk_pmx: pwc_gpio3_clk@0 {
+				pwc_gpio3_clk {
+					groups = "pwc_gpio3_clk_grp";
+					function = "pwc_gpio3_clk";
+				};
+			};
+
+			pwc_io_on_pmx: pwc_io_on@0 {
+				pwc_io_on {
+					groups = "pwc_io_on_grp";
+					function = "pwc_io_on";
+				};
+			};
+
+			pwc_lowbatt_b_pmx0: pwc_lowbatt_b@0 {
+				pwc_lowbatt_b_0 {
+					groups = "pwc_lowbatt_b_grp0";
+					function = "pwc_lowbatt_b_m0";
+				};
+			};
+
+			pwc_mem_on_pmx: pwc_mem_on@0 {
+				pwc_mem_on {
+					groups = "pwc_mem_on_grp";
+					function = "pwc_mem_on";
+				};
+			};
+
+			pwc_on_key_b_pmx0: pwc_on_key_b@0 {
+				pwc_on_key_b_0 {
+					groups = "pwc_on_key_b_grp0";
+					function = "pwc_on_key_b_m0";
+				};
+			};
+
+			pwc_wakeup_src0_pmx: pwc_wakeup_src0@0 {
+				pwc_wakeup_src0 {
+					groups = "pwc_wakeup_src0_grp";
+					function = "pwc_wakeup_src0";
+				};
+			};
+
+			pwc_wakeup_src1_pmx: pwc_wakeup_src1@0 {
+				pwc_wakeup_src1 {
+					groups = "pwc_wakeup_src1_grp";
+					function = "pwc_wakeup_src1";
+				};
+			};
+
+			pwc_wakeup_src2_pmx: pwc_wakeup_src2@0 {
+				pwc_wakeup_src2 {
+					groups = "pwc_wakeup_src2_grp";
+					function = "pwc_wakeup_src2";
+				};
+			};
+
+			pwc_wakeup_src3_pmx: pwc_wakeup_src3@0 {
+				pwc_wakeup_src3 {
+					groups = "pwc_wakeup_src3_grp";
+					function = "pwc_wakeup_src3";
+				};
+			};
+
+			pw_cko0_pmx0: pw_cko0@0 {
+				pw_cko0_0 {
+					groups = "pw_cko0_grp0";
+					function = "pw_cko0_m0";
+				};
+			};
+
+			pw_cko0_pmx1: pw_cko0@1 {
+				pw_cko0_1 {
+					groups = "pw_cko0_grp1";
+					function = "pw_cko0_m1";
+				};
+			};
+
+			pw_cko0_pmx2: pw_cko0@2 {
+				pw_cko0_2 {
+					groups = "pw_cko0_grp2";
+					function = "pw_cko0_m2";
+				};
+			};
+
+			pw_cko1_pmx0: pw_cko1@0 {
+				pw_cko1_0 {
+					groups = "pw_cko1_grp0";
+					function = "pw_cko1_m0";
+				};
+			};
+
+			pw_cko1_pmx1: pw_cko1@1 {
+				pw_cko1_1 {
+					groups = "pw_cko1_grp1";
+					function = "pw_cko1_m1";
+				};
+			};
+
+			pw_i2s01_clk_pmx0: pw_i2s01_clk@0 {
+				pw_i2s01_clk_0 {
+					groups = "pw_i2s01_clk_grp0";
+					function = "pw_i2s01_clk_m0";
+				};
+			};
+
+			pw_i2s01_clk_pmx1: pw_i2s01_clk@1 {
+				pw_i2s01_clk_1 {
+					groups = "pw_i2s01_clk_grp1";
+					function = "pw_i2s01_clk_m1";
+				};
+			};
+
+			pw_pwm0_pmx: pw_pwm0@0 {
+				pw_pwm0 {
+					groups = "pw_pwm0_grp";
+					function = "pw_pwm0";
+				};
+			};
+
+			pw_pwm1_pmx: pw_pwm1@0 {
+				pw_pwm1 {
+					groups = "pw_pwm1_grp";
+					function = "pw_pwm1";
+				};
+			};
+
+			pw_pwm2_pmx0: pw_pwm2@0 {
+				pw_pwm2_0 {
+					groups = "pw_pwm2_grp0";
+					function = "pw_pwm2_m0";
+				};
+			};
+
+			pw_pwm2_pmx1: pw_pwm2@1 {
+				pw_pwm2_1 {
+					groups = "pw_pwm2_grp1";
+					function = "pw_pwm2_m1";
+				};
+			};
+
+			pw_pwm3_pmx0: pw_pwm3@0 {
+				pw_pwm3_0 {
+					groups = "pw_pwm3_grp0";
+					function = "pw_pwm3_m0";
+				};
+			};
+
+			pw_pwm3_pmx1: pw_pwm3@1 {
+				pw_pwm3_1 {
+					groups = "pw_pwm3_grp1";
+					function = "pw_pwm3_m1";
+				};
+			};
+
+			pw_pwm_cpu_vol_pmx0: pw_pwm_cpu_vol@0 {
+				pw_pwm_cpu_vol_0 {
+					groups = "pw_pwm_cpu_vol_grp0";
+					function = "pw_pwm_cpu_vol_m0";
+				};
+			};
+
+			pw_pwm_cpu_vol_pmx1: pw_pwm_cpu_vol@1 {
+				pw_pwm_cpu_vol_1 {
+					groups = "pw_pwm_cpu_vol_grp1";
+					function = "pw_pwm_cpu_vol_m1";
+				};
+			};
+
+			pw_backlight_pmx0: pw_backlight@0 {
+				pw_backlight_0 {
+					groups = "pw_backlight_grp0";
+					function = "pw_backlight_m0";
+				};
+			};
+
+			pw_backlight_pmx1: pw_backlight@1 {
+				pw_backlight_1 {
+					groups = "pw_backlight_grp1";
+					function = "pw_backlight_m1";
+				};
+			};
+
+			rg_eth_mac_pmx: rg_eth_mac@0 {
+				rg_eth_mac {
+					groups = "rg_eth_mac_grp";
+					function = "rg_eth_mac";
+				};
+			};
+
+			rg_gmac_phy_intr_n_pmx: rg_gmac_phy_intr_n@0 {
+				rg_gmac_phy_intr_n {
+					groups = "rg_gmac_phy_intr_n_grp";
+					function = "rg_gmac_phy_intr_n";
+				};
+			};
+
+			rg_rgmii_mac_pmx: rg_rgmii_mac@0 {
+				rg_rgmii_mac {
+					groups = "rg_rgmii_mac_grp";
+					function = "rg_rgmii_mac";
+				};
+			};
+
+			rg_rgmii_phy_ref_clk_pmx0: rg_rgmii_phy_ref_clk@0 {
+				rg_rgmii_phy_ref_clk_0 {
+					groups =
+						"rg_rgmii_phy_ref_clk_grp0";
+					function =
+						"rg_rgmii_phy_ref_clk_m0";
+				};
+			};
+
+			rg_rgmii_phy_ref_clk_pmx1: rg_rgmii_phy_ref_clk@1 {
+				rg_rgmii_phy_ref_clk_1 {
+					groups =
+						"rg_rgmii_phy_ref_clk_grp1";
+					function =
+						"rg_rgmii_phy_ref_clk_m1";
+				};
+			};
+
+			sd0_pmx: sd0@0 {
+				sd0 {
+					groups = "sd0_grp";
+					function = "sd0";
+				};
+			};
+
+			sd0_4bit_pmx: sd0_4bit@0 {
+				sd0_4bit {
+					groups = "sd0_4bit_grp";
+					function = "sd0_4bit";
+				};
+			};
+
+			sd1_pmx: sd1@0 {
+				sd1 {
+					groups = "sd1_grp";
+					function = "sd1";
+				};
+			};
+
+			sd1_4bit_pmx0: sd1_4bit@0 {
+				sd1_4bit_0 {
+					groups = "sd1_4bit_grp0";
+					function = "sd1_4bit_m0";
+				};
+			};
+
+			sd1_4bit_pmx1: sd1_4bit@1 {
+				sd1_4bit_1 {
+					groups = "sd1_4bit_grp1";
+					function = "sd1_4bit_m1";
+				};
+			};
+
+			sd2_pmx0: sd2@0 {
+				sd2_0 {
+					groups = "sd2_grp0";
+					function = "sd2_m0";
+				};
+			};
+
+			sd2_no_cdb_pmx0: sd2_no_cdb@0 {
+				sd2_no_cdb_0 {
+					groups = "sd2_no_cdb_grp0";
+					function = "sd2_no_cdb_m0";
+				};
+			};
+
+			sd3_pmx: sd3@0 {
+				sd3 {
+					groups = "sd3_grp";
+					function = "sd3";
+				};
+			};
+
+			sd5_pmx: sd5@0 {
+				sd5 {
+					groups = "sd5_grp";
+					function = "sd5";
+				};
+			};
+
+			sd6_pmx0: sd6@0 {
+				sd6_0 {
+					groups = "sd6_grp0";
+					function = "sd6_m0";
+				};
+			};
+
+			sd6_pmx1: sd6@1 {
+				sd6_1 {
+					groups = "sd6_grp1";
+					function = "sd6_m1";
+				};
+			};
+
+			sp0_ext_ldo_on_pmx: sp0_ext_ldo_on@0 {
+				sp0_ext_ldo_on {
+					groups = "sp0_ext_ldo_on_grp";
+					function = "sp0_ext_ldo_on";
+				};
+			};
+
+			sp0_qspi_pmx: sp0_qspi@0 {
+				sp0_qspi {
+					groups = "sp0_qspi_grp";
+					function = "sp0_qspi";
+				};
+			};
+
+			sp1_spi_pmx: sp1_spi@0 {
+				sp1_spi {
+					groups = "sp1_spi_grp";
+					function = "sp1_spi";
+				};
+			};
+
+			tpiu_trace_pmx: tpiu_trace@0 {
+				tpiu_trace {
+					groups = "tpiu_trace_grp";
+					function = "tpiu_trace";
+				};
+			};
+
+			uart0_pmx: uart0@0 {
+				uart0 {
+					groups = "uart0_grp";
+					function = "uart0";
+				};
+			};
+
+			uart0_nopause_pmx: uart0_nopause@0 {
+				uart0_nopause {
+					groups = "uart0_nopause_grp";
+					function = "uart0_nopause";
+				};
+			};
+
+			uart1_pmx: uart1@0 {
+				uart1 {
+					groups = "uart1_grp";
+					function = "uart1";
+				};
+			};
+
+			uart2_pmx: uart2@0 {
+				uart2 {
+					groups = "uart2_grp";
+					function = "uart2";
+				};
+			};
+
+			uart3_pmx0: uart3@0 {
+				uart3_0 {
+					groups = "uart3_grp0";
+					function = "uart3_m0";
+				};
+			};
+
+			uart3_pmx1: uart3@1 {
+				uart3_1 {
+					groups = "uart3_grp1";
+					function = "uart3_m1";
+				};
+			};
+
+			uart3_pmx2: uart3@2 {
+				uart3_2 {
+					groups = "uart3_grp2";
+					function = "uart3_m2";
+				};
+			};
+
+			uart3_pmx3: uart3@3 {
+				uart3_3 {
+					groups = "uart3_grp3";
+					function = "uart3_m3";
+				};
+			};
+
+			uart3_nopause_pmx0: uart3_nopause@0 {
+				uart3_nopause_0 {
+					groups = "uart3_nopause_grp0";
+					function = "uart3_nopause_m0";
+				};
+			};
+
+			uart3_nopause_pmx1: uart3_nopause@1 {
+				uart3_nopause_1 {
+					groups = "uart3_nopause_grp1";
+					function = "uart3_nopause_m1";
+				};
+			};
+
+			uart4_pmx0: uart4@0 {
+				uart4_0 {
+					groups = "uart4_grp0";
+					function = "uart4_m0";
+				};
+			};
+
+			uart4_pmx1: uart4@1 {
+				uart4_1 {
+					groups = "uart4_grp1";
+					function = "uart4_m1";
+				};
+			};
+
+			uart4_pmx2: uart4@2 {
+				uart4_2 {
+					groups = "uart4_grp2";
+					function = "uart4_m2";
+				};
+			};
+
+			uart4_nopause_pmx: uart4_nopause@0 {
+				uart4_nopause {
+					groups = "uart4_nopause_grp";
+					function = "uart4_nopause";
+				};
+			};
+
+			usb0_drvvbus_pmx: usb0_drvvbus@0 {
+				usb0_drvvbus {
+					groups = "usb0_drvvbus_grp";
+					function = "usb0_drvvbus";
+				};
+			};
+
+			usb1_drvvbus_pmx: usb1_drvvbus@0 {
+				usb1_drvvbus {
+					groups = "usb1_drvvbus_grp";
+					function = "usb1_drvvbus";
+				};
+			};
+
+			visbus_dout_pmx: visbus_dout@0 {
+				visbus_dout {
+					groups = "visbus_dout_grp";
+					function = "visbus_dout";
+				};
+			};
+
+			vi_vip1_pmx: vi_vip1@0 {
+				vi_vip1 {
+					groups = "vi_vip1_grp";
+					function = "vi_vip1";
+				};
+			};
+
+			vi_vip1_ext_pmx: vi_vip1_ext@0 {
+				vi_vip1_ext {
+					groups = "vi_vip1_ext_grp";
+					function = "vi_vip1_ext";
+				};
+			};
+
+			vi_vip1_low8bit_pmx: vi_vip1_low8bit@0 {
+				vi_vip1_low8bit {
+					groups = "vi_vip1_low8bit_grp";
+					function = "vi_vip1_low8bit";
+				};
+			};
+
+			vi_vip1_high8bit_pmx: vi_vip1_high8bit@0 {
+				vi_vip1_high8bit {
+					groups = "vi_vip1_high8bit_grp";
+					function = "vi_vip1_high8bit";
+				};
+			};
 		};
 
 		pmipc {
@@ -356,6 +1375,12 @@
 				clock-names = "gpio0_io";
 				gpio-controller;
 				interrupt-controller;
+
+				gpio-banks = <2>;
+				gpio-ranges = <&pinctrl 0 0 0>,
+						<&pinctrl 32 0 0>;
+				gpio-ranges-group-names = "lvds_gpio_grp",
+							"uart_nand_gpio_grp";
 			};
 
 			nand@17050000 {
@@ -461,11 +1486,22 @@
 				#interrupt-cells = <2>;
 				compatible = "sirf,atlas7-gpio";
 				reg = <0x13300000 0x1000>;
-				interrupts = <0 43 0>, <0 44 0>, <0 45 0>;
+				interrupts = <0 43 0>, <0 44 0>,
+						<0 45 0>, <0 46 0>;
 				clocks = <&car 84>;
 				clock-names = "gpio1_io";
 				gpio-controller;
 				interrupt-controller;
+
+				gpio-banks = <4>;
+				gpio-ranges = <&pinctrl 0 0 0>,
+						<&pinctrl 32 0 0>,
+						<&pinctrl 64 0 0>,
+						<&pinctrl 96 0 0>;
+				gpio-ranges-group-names = "gnss_gpio_grp",
+							"lcd_vip_gpio_grp",
+							"sdio_i2s_gpio_grp",
+							"sp_rgmii_gpio_grp";
 			};
 
 			sd2: sdhci@14200000 {
@@ -744,6 +1780,10 @@
 				interrupts = <0 47 0>;
 				gpio-controller;
 				interrupt-controller;
+
+				gpio-banks = <1>;
+				gpio-ranges = <&pinctrl 0 0 0>;
+				gpio-ranges-group-names = "rtc_gpio_grp";
 			};
 
 			rtc-iobg@18840000 {
diff --git a/arch/arm/boot/dts/cros-ec-keyboard.dtsi b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
index 9c7fb0a..4e42f30c 100644
--- a/arch/arm/boot/dts/cros-ec-keyboard.dtsi
+++ b/arch/arm/boot/dts/cros-ec-keyboard.dtsi
@@ -22,6 +22,7 @@
 			MATRIX_KEY(0x00, 0x02, KEY_F1)
 			MATRIX_KEY(0x00, 0x03, KEY_B)
 			MATRIX_KEY(0x00, 0x04, KEY_F10)
+			MATRIX_KEY(0x00, 0x05, KEY_RO)
 			MATRIX_KEY(0x00, 0x06, KEY_N)
 			MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
 			MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
@@ -34,6 +35,7 @@
 			MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
 			MATRIX_KEY(0x01, 0x09, KEY_F9)
 			MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
+			MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)
 
 			MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
 			MATRIX_KEY(0x02, 0x01, KEY_TAB)
@@ -45,6 +47,7 @@
 			MATRIX_KEY(0x02, 0x07, KEY_102ND)
 			MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
 			MATRIX_KEY(0x02, 0x09, KEY_F8)
+			MATRIX_KEY(0x02, 0x0a, KEY_YEN)
 
 			MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
 			MATRIX_KEY(0x03, 0x02, KEY_F2)
@@ -53,6 +56,7 @@
 			MATRIX_KEY(0x03, 0x06, KEY_6)
 			MATRIX_KEY(0x03, 0x08, KEY_MINUS)
 			MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
+			MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)
 
 			MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
 			MATRIX_KEY(0x04, 0x01, KEY_A)
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index aa46590..096f68b 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -686,7 +686,8 @@
 
 &dcan1 {
 	status = "ok";
-	pinctrl-names = "default", "sleep";
-	pinctrl-0 = <&dcan1_pins_default>;
+	pinctrl-names = "default", "sleep", "active";
+	pinctrl-0 = <&dcan1_pins_sleep>;
 	pinctrl-1 = <&dcan1_pins_sleep>;
+	pinctrl-2 = <&dcan1_pins_default>;
 };
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 8f1e25b..b058b31 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -116,7 +116,7 @@
 				ranges = <0 0x2000 0x2000>;
 
 				scm_conf: scm_conf@0 {
-					compatible = "syscon";
+					compatible = "syscon", "simple-bus";
 					reg = <0x0 0x1400>;
 					#address-cells = <1>;
 					#size-cells = <1>;
@@ -211,7 +211,7 @@
 			#address-cells = <1>;
 			ranges = <0x51000000 0x51000000 0x3000
 				  0x0	     0x20000000 0x10000000>;
-			pcie@51000000 {
+			pcie1: pcie@51000000 {
 				compatible = "ti,dra7-pcie";
 				reg = <0x51000000 0x2000>, <0x51002000 0x14c>, <0x1000 0x2000>;
 				reg-names = "rc_dbics", "ti_conf", "config";
@@ -397,7 +397,7 @@
 		};
 
 		uart1: serial@4806a000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4806a000 0x100>;
 			interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart1";
@@ -408,7 +408,7 @@
 		};
 
 		uart2: serial@4806c000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
 			interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
@@ -419,7 +419,7 @@
 		};
 
 		uart3: serial@48020000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
 			interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
@@ -430,7 +430,7 @@
 		};
 
 		uart4: serial@4806e000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
 			interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
@@ -441,7 +441,7 @@
 		};
 
 		uart5: serial@48066000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48066000 0x100>;
 			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart5";
@@ -452,7 +452,7 @@
 		};
 
 		uart6: serial@48068000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48068000 0x100>;
 			interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart6";
@@ -463,7 +463,7 @@
 		};
 
 		uart7: serial@48420000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48420000 0x100>;
 			interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart7";
@@ -472,7 +472,7 @@
 		};
 
 		uart8: serial@48422000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48422000 0x100>;
 			interrupts = <GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart8";
@@ -481,7 +481,7 @@
 		};
 
 		uart9: serial@48424000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x48424000 0x100>;
 			interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart9";
@@ -490,7 +490,7 @@
 		};
 
 		uart10: serial@4ae2b000 {
-			compatible = "ti,omap4-uart";
+			compatible = "ti,dra742-uart", "ti,omap4-uart";
 			reg = <0x4ae2b000 0x100>;
 			interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart10";
@@ -1140,6 +1140,7 @@
 				ctrl-module = <&omap_control_sata>;
 				clocks = <&sys_clkin1>, <&sata_ref_clk>;
 				clock-names = "sysclk", "refclk";
+				syscon-pllreset = <&scm_conf 0x3fc>;
 				#phy-cells = <0>;
 			};
 
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index 4e1b605..8037384 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -587,9 +587,10 @@
 
 &dcan1 {
 	status = "ok";
-	pinctrl-names = "default", "sleep";
-	pinctrl-0 = <&dcan1_pins_default>;
+	pinctrl-names = "default", "sleep", "active";
+	pinctrl-0 = <&dcan1_pins_sleep>;
 	pinctrl-1 = <&dcan1_pins_sleep>;
+	pinctrl-2 = <&dcan1_pins_default>;
 };
 
 &qspi {
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index d720133..2db9943 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -138,8 +138,8 @@
 
 		mipi_phy: video-phy@10020710 {
 			compatible = "samsung,s5pv210-mipi-video-phy";
-			reg = <0x10020710 8>;
 			#phy-cells = <1>;
+			syscon = <&pmu_system_controller>;
 		};
 
 		pd_cam: cam-power-domain@10023C00 {
diff --git a/arch/arm/boot/dts/exynos4210-origen.dts b/arch/arm/boot/dts/exynos4210-origen.dts
index e0abfc3..e050d85 100644
--- a/arch/arm/boot/dts/exynos4210-origen.dts
+++ b/arch/arm/boot/dts/exynos4210-origen.dts
@@ -127,6 +127,10 @@
 	};
 };
 
+&cpu0 {
+	cpu0-supply = <&buck1_reg>;
+};
+
 &fimd {
 	pinctrl-0 = <&lcd_en &lcd_clk &lcd_data24 &pwm0_out>;
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts
index 98f3ce6..ba34886 100644
--- a/arch/arm/boot/dts/exynos4210-trats.dts
+++ b/arch/arm/boot/dts/exynos4210-trats.dts
@@ -188,6 +188,10 @@
 	};
 };
 
+&cpu0 {
+	cpu0-supply = <&varm_breg>;
+};
+
 &dsi_0 {
 	vddcore-supply = <&vusb_reg>;
 	vddio-supply = <&vmipi_reg>;
diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts
index d4f2b11..775892b 100644
--- a/arch/arm/boot/dts/exynos4210-universal_c210.dts
+++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts
@@ -548,6 +548,10 @@
 	};
 };
 
+&cpu0 {
+	cpu0-supply = <&vdd_arm_reg>;
+};
+
 &pinctrl_1 {
 	hdmi_hpd: hdmi-hpd {
 		samsung,pins = "gpx3-7";
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 10d3c17..3e5ba66 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -40,6 +40,18 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0x900>;
+			clocks = <&clock CLK_ARM_CLK>;
+			clock-names = "cpu";
+			clock-latency = <160000>;
+
+			operating-points = <
+				1200000 1250000
+				1000000 1150000
+				800000	1075000
+				500000	975000
+				400000	975000
+				200000	950000
+			>;
 			cooling-min-level = <4>;
 			cooling-max-level = <2>;
 			#cooling-cells = <2>; /* min followed by max */
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index c892d58..b995333 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -468,6 +468,7 @@
 				interrupts = <36 37 38 39 40 41 42 43 44>;
 				status = "disabled";
 				clocks = <&clks 26>;
+				#io-channel-cells = <1>;
 			};
 
 			spdif@80054000 {
diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts
index dd45e69..9351296 100644
--- a/arch/arm/boot/dts/imx25-pdk.dts
+++ b/arch/arm/boot/dts/imx25-pdk.dts
@@ -10,6 +10,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
 #include "imx25.dtsi"
 
@@ -114,8 +115,8 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
-	cd-gpios = <&gpio2 1 0>;
-	wp-gpios = <&gpio2 0 0>;
+	cd-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index bc215e4..b69be5c 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -108,7 +108,7 @@
 			};
 
 			gpt1: timer@10003000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10003000 0x1000>;
 				interrupts = <26>;
 				clocks = <&clks IMX27_CLK_GPT1_IPG_GATE>,
@@ -117,7 +117,7 @@
 			};
 
 			gpt2: timer@10004000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10004000 0x1000>;
 				interrupts = <25>;
 				clocks = <&clks IMX27_CLK_GPT2_IPG_GATE>,
@@ -126,7 +126,7 @@
 			};
 
 			gpt3: timer@10005000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10005000 0x1000>;
 				interrupts = <24>;
 				clocks = <&clks IMX27_CLK_GPT3_IPG_GATE>,
@@ -376,7 +376,7 @@
 			};
 
 			gpt4: timer@10019000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x10019000 0x1000>;
 				interrupts = <4>;
 				clocks = <&clks IMX27_CLK_GPT4_IPG_GATE>,
@@ -385,7 +385,7 @@
 			};
 
 			gpt5: timer@1001a000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x1001a000 0x1000>;
 				interrupts = <3>;
 				clocks = <&clks IMX27_CLK_GPT5_IPG_GATE>,
@@ -436,7 +436,7 @@
 			};
 
 			gpt6: timer@1001f000 {
-				compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+				compatible = "fsl,imx27-gpt", "fsl,imx21-gpt";
 				reg = <0x1001f000 0x1000>;
 				interrupts = <2>;
 				clocks = <&clks IMX27_CLK_GPT6_IPG_GATE>,
diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
index b6478e9..e6540b5 100644
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@ -286,8 +286,8 @@
 			can1: can@53fe4000 {
 				compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
 				reg = <0x53fe4000 0x1000>;
-				clocks = <&clks 33>;
-				clock-names = "ipg";
+				clocks = <&clks 33>, <&clks 33>;
+				clock-names = "ipg", "per";
 				interrupts = <43>;
 				status = "disabled";
 			};
@@ -295,8 +295,8 @@
 			can2: can@53fe8000 {
 				compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
 				reg = <0x53fe8000 0x1000>;
-				clocks = <&clks 34>;
-				clock-names = "ipg";
+				clocks = <&clks 34>, <&clks 34>;
+				clock-names = "ipg", "per";
 				interrupts = <44>;
 				status = "disabled";
 			};
diff --git a/arch/arm/boot/dts/imx51-apf51dev.dts b/arch/arm/boot/dts/imx51-apf51dev.dts
index 93d3ea1..0f3fe29 100644
--- a/arch/arm/boot/dts/imx51-apf51dev.dts
+++ b/arch/arm/boot/dts/imx51-apf51dev.dts
@@ -98,7 +98,7 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
-	cd-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio2 29 GPIO_ACTIVE_LOW>;
 	bus-width = <4>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts
index e9337ad..3bc1883 100644
--- a/arch/arm/boot/dts/imx53-ard.dts
+++ b/arch/arm/boot/dts/imx53-ard.dts
@@ -103,8 +103,8 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
-	cd-gpios = <&gpio1 1 0>;
-	wp-gpios = <&gpio1 9 0>;
+	cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts
index d0e0f57..53f4088 100644
--- a/arch/arm/boot/dts/imx53-m53evk.dts
+++ b/arch/arm/boot/dts/imx53-m53evk.dts
@@ -124,8 +124,8 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
-	cd-gpios = <&gpio1 1 0>;
-	wp-gpios = <&gpio1 9 0>;
+	cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index 181ae5e..b0d5542 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi
@@ -147,8 +147,8 @@
 &esdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc3>;
-	cd-gpios = <&gpio3 11 0>;
-	wp-gpios = <&gpio3 12 0>;
+	cd-gpios = <&gpio3 11 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio3 12 GPIO_ACTIVE_HIGH>;
 	bus-width = <8>;
 	status = "okay";
 };
@@ -295,9 +295,10 @@
 &tve {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_vga_sync>;
+	ddc-i2c-bus = <&i2c2>;
 	fsl,tve-mode = "vga";
-	fsl,hsync-pin = <4>;
-	fsl,vsync-pin = <6>;
+	fsl,hsync-pin = <7>;	/* IPU DI1 PIN7 via EIM_OE */
+	fsl,vsync-pin = <8>;	/* IPU DI1 PIN8 via EIM_RW */
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx53-smd.dts b/arch/arm/boot/dts/imx53-smd.dts
index 1d32557..fc89ce1 100644
--- a/arch/arm/boot/dts/imx53-smd.dts
+++ b/arch/arm/boot/dts/imx53-smd.dts
@@ -41,8 +41,8 @@
 &esdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
-	cd-gpios = <&gpio3 13 0>;
-	wp-gpios = <&gpio4 11 0>;
+	cd-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio4 11 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi
index 4f1f0e2..e03373a 100644
--- a/arch/arm/boot/dts/imx53-tqma53.dtsi
+++ b/arch/arm/boot/dts/imx53-tqma53.dtsi
@@ -41,8 +41,8 @@
 	pinctrl-0 = <&pinctrl_esdhc2>,
 		    <&pinctrl_esdhc2_cdwp>;
 	vmmc-supply = <&reg_3p3v>;
-	wp-gpios = <&gpio1 2 0>;
-	cd-gpios = <&gpio1 4 0>;
+	wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
 	status = "disabled";
 };
 
diff --git a/arch/arm/boot/dts/imx53-tx53.dtsi b/arch/arm/boot/dts/imx53-tx53.dtsi
index 704bd72..d3e50b2 100644
--- a/arch/arm/boot/dts/imx53-tx53.dtsi
+++ b/arch/arm/boot/dts/imx53-tx53.dtsi
@@ -183,7 +183,7 @@
 };
 
 &esdhc1 {
-	cd-gpios = <&gpio3 24 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
 	fsl,wp-controller;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc1>;
@@ -191,7 +191,7 @@
 };
 
 &esdhc2 {
-	cd-gpios = <&gpio3 25 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>;
 	fsl,wp-controller;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc2>;
diff --git a/arch/arm/boot/dts/imx53-voipac-bsb.dts b/arch/arm/boot/dts/imx53-voipac-bsb.dts
index c17d3ad..fc51b87 100644
--- a/arch/arm/boot/dts/imx53-voipac-bsb.dts
+++ b/arch/arm/boot/dts/imx53-voipac-bsb.dts
@@ -119,8 +119,8 @@
 &esdhc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc2>;
-	cd-gpios = <&gpio3 25 0>;
-	wp-gpios = <&gpio2 19 0>;
+	cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6dl-riotboard.dts b/arch/arm/boot/dts/imx6dl-riotboard.dts
index 43cb3fd..5111f51 100644
--- a/arch/arm/boot/dts/imx6dl-riotboard.dts
+++ b/arch/arm/boot/dts/imx6dl-riotboard.dts
@@ -305,8 +305,8 @@
 &usdhc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
-	cd-gpios = <&gpio1 4 0>;
-	wp-gpios = <&gpio1 2 0>;
+	cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
@@ -314,8 +314,8 @@
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio7 0 0>;
-	wp-gpios = <&gpio7 1 0>;
+	cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6q-arm2.dts b/arch/arm/boot/dts/imx6q-arm2.dts
index 78df05e..d6515f7 100644
--- a/arch/arm/boot/dts/imx6q-arm2.dts
+++ b/arch/arm/boot/dts/imx6q-arm2.dts
@@ -11,6 +11,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 #include "imx6q.dtsi"
 
 / {
@@ -196,8 +197,8 @@
 };
 
 &usdhc3 {
-	cd-gpios = <&gpio6 11 0>;
-	wp-gpios = <&gpio6 14 0>;
+	cd-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3
diff --git a/arch/arm/boot/dts/imx6q-gk802.dts b/arch/arm/boot/dts/imx6q-gk802.dts
index 703539c..00bd63e 100644
--- a/arch/arm/boot/dts/imx6q-gk802.dts
+++ b/arch/arm/boot/dts/imx6q-gk802.dts
@@ -7,6 +7,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 #include "imx6q.dtsi"
 
 / {
@@ -161,7 +162,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
 	bus-width = <4>;
-	cd-gpios = <&gpio6 11 0>;
+	cd-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts
index a43abfa..5645d52 100644
--- a/arch/arm/boot/dts/imx6q-tbs2910.dts
+++ b/arch/arm/boot/dts/imx6q-tbs2910.dts
@@ -251,7 +251,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	bus-width = <4>;
-	cd-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
@@ -260,7 +260,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
 	bus-width = <4>;
-	cd-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi
index e6d9195..f4d6ae5 100644
--- a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi
@@ -173,7 +173,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	vmmc-supply = <&reg_3p3v>;
-	cd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio4 7 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 
@@ -181,7 +181,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	vmmc-supply = <&reg_3p3v>;
-	cd-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio4 8 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
index 1d85de2..a47a039 100644
--- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
@@ -392,7 +392,7 @@
 &usdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
-	cd-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
 	no-1-8-v;
 	status = "okay";
 };
@@ -400,7 +400,7 @@
 &usdhc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
-	cd-gpios = <&gpio4 5 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio4 5 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio2 10 GPIO_ACTIVE_HIGH>;
 	no-1-8-v;
 	status = "okay";
diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
index 59e5d15..ff41f83 100644
--- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi
@@ -258,6 +258,6 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_cubox_i_usdhc2_aux &pinctrl_cubox_i_usdhc2>;
 	vmmc-supply = <&reg_3p3v>;
-	cd-gpios = <&gpio1 4 0>;
+	cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi
index 2c253d6..45e7c39 100644
--- a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi
@@ -1,3 +1,5 @@
+#include <dt-bindings/gpio/gpio.h>
+
 / {
 	regulators {
 		compatible = "simple-bus";
@@ -181,7 +183,7 @@
 &usdhc2 { /* module slot */
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
-	cd-gpios = <&gpio2 2 0>;
+	cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
index b5756c2..4493f6e 100644
--- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
@@ -318,7 +318,7 @@
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
index 86f03c1..a857d12 100644
--- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
@@ -324,7 +324,7 @@
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
index 4a8d97f..1afe338 100644
--- a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
@@ -417,7 +417,7 @@
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi
index 62a82f3..6dd0b76 100644
--- a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi
@@ -299,6 +299,6 @@
 		&pinctrl_hummingboard_usdhc2
 	>;
 	vmmc-supply = <&reg_3p3v>;
-	cd-gpios = <&gpio1 4 0>;
+	cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
index 3af16df..d7fe667 100644
--- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
@@ -453,7 +453,7 @@
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio7 0 0>;
+	cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
@@ -461,7 +461,7 @@
 &usdhc4 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc4>;
-	cd-gpios = <&gpio2 6 0>;
+	cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
index 1ce6133..9e6ecd9 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
@@ -409,8 +409,8 @@
 &usdhc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
-	cd-gpios = <&gpio1 4 0>;
-	wp-gpios = <&gpio1 2 0>;
+	cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
 	status = "disabled";
 };
 
@@ -418,7 +418,7 @@
         pinctrl-names = "default";
         pinctrl-0 = <&pinctrl_usdhc3
 		     &pinctrl_usdhc3_cdwp>;
-        cd-gpios = <&gpio1 27 0>;
-        wp-gpios = <&gpio1 29 0>;
+	cd-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>;
         status = "disabled";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi
index 488a640..3373fd9 100644
--- a/arch/arm/boot/dts/imx6qdl-rex.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi
@@ -342,7 +342,7 @@
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	bus-width = <4>;
 	cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
-	wp-gpios = <&gpio2 3 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
@@ -351,6 +351,6 @@
 	pinctrl-0 = <&pinctrl_usdhc3>;
 	bus-width = <4>;
 	cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
-	wp-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 3b24b126..e329ca5 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -467,8 +467,8 @@
 	pinctrl-0 = <&pinctrl_usdhc3>;
 	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
 	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
-	cd-gpios = <&gpio6 15 0>;
-	wp-gpios = <&gpio1 13 0>;
+	cd-gpios = <&gpio6 15 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
index e00c44f..7823793 100644
--- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
@@ -448,8 +448,8 @@
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio7 0 0>;
-	wp-gpios = <&gpio7 1 0>;
+	cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
@@ -457,7 +457,7 @@
 &usdhc4 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc4>;
-	cd-gpios = <&gpio2 6 0>;
+	cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index a626e6d..944eb81 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -562,8 +562,8 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	bus-width = <8>;
-	cd-gpios = <&gpio2 2 0>;
-	wp-gpios = <&gpio2 3 0>;
+	cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
@@ -571,8 +571,8 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
 	bus-width = <8>;
-	cd-gpios = <&gpio2 0 0>;
-	wp-gpios = <&gpio2 1 0>;
+	cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl-tx6.dtsi b/arch/arm/boot/dts/imx6qdl-tx6.dtsi
index f02b80b..da08de3 100644
--- a/arch/arm/boot/dts/imx6qdl-tx6.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-tx6.dtsi
@@ -680,7 +680,7 @@
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	bus-width = <4>;
 	no-1-8-v;
-	cd-gpios = <&gpio7 2 0>;
+	cd-gpios = <&gpio7 2 GPIO_ACTIVE_LOW>;
 	fsl,wp-controller;
 	status = "okay";
 };
@@ -690,7 +690,7 @@
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	bus-width = <4>;
 	no-1-8-v;
-	cd-gpios = <&gpio7 3 0>;
+	cd-gpios = <&gpio7 3 GPIO_ACTIVE_LOW>;
 	fsl,wp-controller;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
index 5fb0916..9e096d8 100644
--- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
@@ -9,6 +9,8 @@
  *
  */
 
+#include <dt-bindings/gpio/gpio.h>
+
 / {
 	regulators {
 		compatible = "simple-bus";
@@ -250,13 +252,13 @@
 &usdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
-	cd-gpios = <&gpio1 2 0>;
+	cd-gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 
 &usdhc3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc3>;
-	cd-gpios = <&gpio3 9 0>;
+	cd-gpios = <&gpio3 9 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index e6d1359..10d0b26 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -181,10 +181,10 @@
 			interrupt-names = "msi";
 			#interrupt-cells = <1>;
 			interrupt-map-mask = <0 0 0 0x7>;
-			interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
-			                <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
-			                <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
-			                <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+			                <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+			                <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+			                <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&clks IMX6QDL_CLK_PCIE_AXI>,
 				 <&clks IMX6QDL_CLK_LVDS1_GATE>,
 				 <&clks IMX6QDL_CLK_PCIE_REF_125M>;
@@ -836,10 +836,31 @@
 			reg = <0x02100000 0x100000>;
 			ranges;
 
-			caam@02100000 {
-				reg = <0x02100000 0x40000>;
-				interrupts = <0 105 IRQ_TYPE_LEVEL_HIGH>,
-					     <0 106 IRQ_TYPE_LEVEL_HIGH>;
+			crypto: caam@2100000 {
+				compatible = "fsl,sec-v4.0";
+				fsl,sec-era = <4>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0x2100000 0x10000>;
+				ranges = <0 0x2100000 0x10000>;
+				interrupt-parent = <&intc>;
+				clocks = <&clks IMX6QDL_CLK_CAAM_MEM>,
+					 <&clks IMX6QDL_CLK_CAAM_ACLK>,
+					 <&clks IMX6QDL_CLK_CAAM_IPG>,
+					 <&clks IMX6QDL_CLK_EIM_SLOW>;
+				clock-names = "mem", "aclk", "ipg", "emi_slow";
+
+				sec_jr0: jr0@1000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x1000 0x1000>;
+					interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+				};
+
+				sec_jr1: jr1@2000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x2000 0x1000>;
+					interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+				};
 			};
 
 			aipstz@0217c000 { /* AIPSTZ2 */
diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts
index 945887d..b84dff2 100644
--- a/arch/arm/boot/dts/imx6sl-evk.dts
+++ b/arch/arm/boot/dts/imx6sl-evk.dts
@@ -617,8 +617,8 @@
 	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
 	pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
 	bus-width = <8>;
-	cd-gpios = <&gpio4 7 0>;
-	wp-gpios = <&gpio4 6 0>;
+	cd-gpios = <&gpio4 7 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio4 6 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
@@ -627,8 +627,8 @@
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	pinctrl-1 = <&pinctrl_usdhc2_100mhz>;
 	pinctrl-2 = <&pinctrl_usdhc2_200mhz>;
-	cd-gpios = <&gpio5 0 0>;
-	wp-gpios = <&gpio4 29 0>;
+	cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio4 29 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
 
@@ -637,6 +637,6 @@
 	pinctrl-0 = <&pinctrl_usdhc3>;
 	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
 	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
-	cd-gpios = <&gpio3 22 0>;
+	cd-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts
index e3c0b63..115f3fd 100644
--- a/arch/arm/boot/dts/imx6sx-sabreauto.dts
+++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts
@@ -49,7 +49,7 @@
 	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
 	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
 	bus-width = <8>;
-	cd-gpios = <&gpio7 10 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
 	keep-power-in-suspend;
 	enable-sdio-wakeup;
@@ -61,7 +61,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc4>;
 	bus-width = <8>;
-	cd-gpios = <&gpio7 11 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>;
 	no-1-8-v;
 	keep-power-in-suspend;
 	enable-sdio-wakup;
diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi
index cef04ce..ac88c34 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dtsi
+++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi
@@ -293,7 +293,7 @@
 	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
 	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
 	bus-width = <8>;
-	cd-gpios = <&gpio2 10 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio2 10 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>;
 	keep-power-in-suspend;
 	enable-sdio-wakeup;
@@ -304,7 +304,7 @@
 &usdhc4 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc4>;
-	cd-gpios = <&gpio6 21 GPIO_ACTIVE_HIGH>;
+	cd-gpios = <&gpio6 21 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio6 20 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 708175d..e6223d8 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -738,6 +738,33 @@
 			reg = <0x02100000 0x100000>;
 			ranges;
 
+			crypto: caam@2100000 {
+				compatible = "fsl,sec-v4.0";
+				fsl,sec-era = <4>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0x2100000 0x10000>;
+				ranges = <0 0x2100000 0x10000>;
+				interrupt-parent = <&intc>;
+				clocks = <&clks IMX6SX_CLK_CAAM_MEM>,
+					 <&clks IMX6SX_CLK_CAAM_ACLK>,
+					 <&clks IMX6SX_CLK_CAAM_IPG>,
+					 <&clks IMX6SX_CLK_EIM_SLOW>;
+				clock-names = "mem", "aclk", "ipg", "emi_slow";
+
+				sec_jr0: jr0@1000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x1000 0x1000>;
+					interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+				};
+
+				sec_jr1: jr1@2000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x2000 0x1000>;
+					interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+				};
+			};
+
 			usbotg1: usb@02184000 {
 				compatible = "fsl,imx6sx-usb", "fsl,imx27-usb";
 				reg = <0x02184000 0x200>;
diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts
index 4d1a4b9..fdd1d7c 100644
--- a/arch/arm/boot/dts/imx7d-sdb.dts
+++ b/arch/arm/boot/dts/imx7d-sdb.dts
@@ -234,8 +234,8 @@
 &usdhc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
-	cd-gpios = <&gpio5 0 0>;
-	wp-gpios = <&gpio5 1 0>;
+	cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>;
 	enable-sdio-wakeup;
 	keep-power-in-suspend;
 	status = "okay";
diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi
index 4773d6a..d56d68f 100644
--- a/arch/arm/boot/dts/k2e-clocks.dtsi
+++ b/arch/arm/boot/dts/k2e-clocks.dtsi
@@ -13,9 +13,8 @@ clocks {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,main-pll-clock";
 		clocks = <&refclksys>;
-		reg = <0x02620350 4>, <0x02310110 4>;
-		reg-names = "control", "multiplier";
-		fixed-postdiv = <2>;
+		reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>;
+		reg-names = "control", "multiplier", "post-divider";
 	};
 
 	papllclk: papllclk@2620358 {
diff --git a/arch/arm/boot/dts/k2e.dtsi b/arch/arm/boot/dts/k2e.dtsi
index 50e555e..675fb8e 100644
--- a/arch/arm/boot/dts/k2e.dtsi
+++ b/arch/arm/boot/dts/k2e.dtsi
@@ -86,7 +86,7 @@
 			gpio,syscon-dev = <&devctrl 0x240>;
 		};
 
-		pcie@21020000 {
+		pcie1: pcie@21020000 {
 			compatible = "ti,keystone-pcie","snps,dw-pcie";
 			clocks = <&clkpcie1>;
 			clock-names = "pcie";
@@ -96,6 +96,7 @@
 			ranges = <0x81000000 0 0 0x23260000 0x4000 0x4000
 				0x82000000 0 0x60000000 0x60000000 0 0x10000000>;
 
+			status = "disabled";
 			device_type = "pci";
 			num-lanes = <2>;
 
@@ -130,10 +131,17 @@
 					<GIC_SPI 376 IRQ_TYPE_EDGE_RISING>;
 			};
 		};
+
+		mdio: mdio@24200f00 {
+			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x24200f00 0x100>;
+			status = "disabled";
+			clocks = <&clkcpgmac>;
+			clock-names = "fck";
+			bus_freq	= <2500000>;
+		};
 		/include/ "k2e-netcp.dtsi"
 	};
 };
-
-&mdio {
-	reg = <0x24200f00 0x100>;
-};
diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi
index d5adee3..af9b719 100644
--- a/arch/arm/boot/dts/k2hk-clocks.dtsi
+++ b/arch/arm/boot/dts/k2hk-clocks.dtsi
@@ -22,9 +22,8 @@ clocks {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,main-pll-clock";
 		clocks = <&refclksys>;
-		reg = <0x02620350 4>, <0x02310110 4>;
-		reg-names = "control", "multiplier";
-		fixed-postdiv = <2>;
+		reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>;
+		reg-names = "control", "multiplier", "post-divider";
 	};
 
 	papllclk: papllclk@2620358 {
diff --git a/arch/arm/boot/dts/k2hk.dtsi b/arch/arm/boot/dts/k2hk.dtsi
index ae64724..d0810a5 100644
--- a/arch/arm/boot/dts/k2hk.dtsi
+++ b/arch/arm/boot/dts/k2hk.dtsi
@@ -98,6 +98,17 @@
 			#gpio-cells = <2>;
 			gpio,syscon-dev = <&devctrl 0x25c>;
 		};
+
+		mdio: mdio@02090300 {
+			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x02090300 0x100>;
+			status = "disabled";
+			clocks = <&clkcpgmac>;
+			clock-names = "fck";
+			bus_freq	= <2500000>;
+		};
 		/include/ "k2hk-netcp.dtsi"
 	};
 };
diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi
index eb1e3e2..ef8464b 100644
--- a/arch/arm/boot/dts/k2l-clocks.dtsi
+++ b/arch/arm/boot/dts/k2l-clocks.dtsi
@@ -22,9 +22,8 @@ clocks {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,main-pll-clock";
 		clocks = <&refclksys>;
-		reg = <0x02620350 4>, <0x02310110 4>;
-		reg-names = "control", "multiplier";
-		fixed-postdiv = <2>;
+		reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>;
+		reg-names = "control", "multiplier", "post-divider";
 	};
 
 	papllclk: papllclk@2620358 {
diff --git a/arch/arm/boot/dts/k2l.dtsi b/arch/arm/boot/dts/k2l.dtsi
index 0e00748..49fd414 100644
--- a/arch/arm/boot/dts/k2l.dtsi
+++ b/arch/arm/boot/dts/k2l.dtsi
@@ -29,7 +29,6 @@
 	};
 
 	soc {
-
 		/include/ "k2l-clocks.dtsi"
 
 		uart2: serial@02348400 {
@@ -79,6 +78,17 @@
 			#gpio-cells = <2>;
 			gpio,syscon-dev = <&devctrl 0x24c>;
 		};
+
+		mdio: mdio@26200f00 {
+			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x26200f00 0x100>;
+			status = "disabled";
+			clocks = <&clkcpgmac>;
+			clock-names = "fck";
+			bus_freq	= <2500000>;
+		};
 		/include/ "k2l-netcp.dtsi"
 	};
 };
@@ -96,7 +106,3 @@
        /* Pin muxed. Enabled and configured by Bootloader */
        status = "disabled";
 };
-
-&mdio {
-	reg = <0x26200f00 0x100>;
-};
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
index c06542b..72816d6 100644
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -267,17 +267,6 @@
 				  1 0 0x21000A00 0x00000100>;
 		};
 
-		mdio: mdio@02090300 {
-			compatible	= "ti,keystone_mdio", "ti,davinci_mdio";
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg		= <0x02090300 0x100>;
-			status = "disabled";
-			clocks = <&clkpa>;
-			clock-names = "fck";
-			bus_freq	= <2500000>;
-		};
-
 		kirq0: keystone_irq@26202a0 {
 			compatible = "ti,keystone-irq";
 			interrupts = <GIC_SPI 4 IRQ_TYPE_EDGE_RISING>;
@@ -286,7 +275,7 @@
 			ti,syscon-dev = <&devctrl 0x2a0>;
 		};
 
-		pcie@21800000 {
+		pcie0: pcie@21800000 {
 			compatible = "ti,keystone-pcie", "snps,dw-pcie";
 			clocks = <&clkpcie>;
 			clock-names = "pcie";
@@ -296,6 +285,7 @@
 			ranges = <0x81000000 0 0 0x23250000 0 0x4000
 				0x82000000 0 0x50000000 0x50000000 0 0x10000000>;
 
+			status = "disabled";
 			device_type = "pci";
 			num-lanes = <2>;
 
diff --git a/arch/arm/boot/dts/kirkwood-d2net.dts b/arch/arm/boot/dts/kirkwood-d2net.dts
index 6b78560..e1c25c3 100644
--- a/arch/arm/boot/dts/kirkwood-d2net.dts
+++ b/arch/arm/boot/dts/kirkwood-d2net.dts
@@ -10,6 +10,7 @@
 
 /dts-v1/;
 
+#include <dt-bindings/leds/leds-ns2.h>
 #include "kirkwood-netxbig.dtsi"
 
 / {
@@ -28,6 +29,10 @@
 			label = "d2net_v2:blue:sata";
 			slow-gpio = <&gpio0 29 GPIO_ACTIVE_HIGH>;
 			cmd-gpio = <&gpio0 30 GPIO_ACTIVE_HIGH>;
+			modes-map = <NS_V2_LED_OFF  1 0
+				     NS_V2_LED_ON   0 1
+				     NS_V2_LED_ON   1 1
+				     NS_V2_LED_SATA 0 0>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/kirkwood-is2.dts b/arch/arm/boot/dts/kirkwood-is2.dts
index da674bb..4121674 100644
--- a/arch/arm/boot/dts/kirkwood-is2.dts
+++ b/arch/arm/boot/dts/kirkwood-is2.dts
@@ -1,5 +1,6 @@
 /dts-v1/;
 
+#include <dt-bindings/leds/leds-ns2.h>
 #include "kirkwood-ns2-common.dtsi"
 
 / {
@@ -27,6 +28,10 @@
 			label = "ns2:blue:sata";
 			slow-gpio = <&gpio0 29 0>;
 			cmd-gpio = <&gpio0 30 0>;
+			modes-map = <NS_V2_LED_OFF  1 0
+				     NS_V2_LED_ON   0 1
+				     NS_V2_LED_ON   1 1
+				     NS_V2_LED_SATA 0 0>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/kirkwood-ns2.dts b/arch/arm/boot/dts/kirkwood-ns2.dts
index 53368d1..190189d 100644
--- a/arch/arm/boot/dts/kirkwood-ns2.dts
+++ b/arch/arm/boot/dts/kirkwood-ns2.dts
@@ -1,5 +1,6 @@
 /dts-v1/;
 
+#include <dt-bindings/leds/leds-ns2.h>
 #include "kirkwood-ns2-common.dtsi"
 
 / {
@@ -27,6 +28,10 @@
 			label = "ns2:blue:sata";
 			slow-gpio = <&gpio0 29 0>;
 			cmd-gpio = <&gpio0 30 0>;
+			modes-map = <NS_V2_LED_OFF  1 0
+				     NS_V2_LED_ON   0 1
+				     NS_V2_LED_ON   1 1
+				     NS_V2_LED_SATA 0 0>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/kirkwood-ns2max.dts b/arch/arm/boot/dts/kirkwood-ns2max.dts
index 72c78d0..55cc41d 100644
--- a/arch/arm/boot/dts/kirkwood-ns2max.dts
+++ b/arch/arm/boot/dts/kirkwood-ns2max.dts
@@ -1,5 +1,6 @@
 /dts-v1/;
 
+#include <dt-bindings/leds/leds-ns2.h>
 #include "kirkwood-ns2-common.dtsi"
 
 / {
@@ -46,6 +47,10 @@
 			label = "ns2:blue:sata";
 			slow-gpio = <&gpio0 29 0>;
 			cmd-gpio = <&gpio0 30 0>;
+			modes-map = <NS_V2_LED_OFF  1 0
+				     NS_V2_LED_ON   0 1
+				     NS_V2_LED_ON   1 1
+				     NS_V2_LED_SATA 0 0>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/kirkwood-ns2mini.dts b/arch/arm/boot/dts/kirkwood-ns2mini.dts
index c441bf6..9935f3e 100644
--- a/arch/arm/boot/dts/kirkwood-ns2mini.dts
+++ b/arch/arm/boot/dts/kirkwood-ns2mini.dts
@@ -1,5 +1,6 @@
 /dts-v1/;
 
+#include <dt-bindings/leds/leds-ns2.h>
 #include "kirkwood-ns2-common.dtsi"
 
 / {
@@ -47,6 +48,10 @@
 			label = "ns2:blue:sata";
 			slow-gpio = <&gpio0 29 0>;
 			cmd-gpio = <&gpio0 30 0>;
+			modes-map = <NS_V2_LED_OFF  1 0
+				     NS_V2_LED_ON   0 1
+				     NS_V2_LED_ON   1 1
+				     NS_V2_LED_SATA 0 0>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi
index 11a7963..2390f38 100644
--- a/arch/arm/boot/dts/omap2430.dtsi
+++ b/arch/arm/boot/dts/omap2430.dtsi
@@ -51,7 +51,8 @@
 				};
 
 				scm_conf: scm_conf@270 {
-					compatible = "syscon";
+					compatible = "syscon",
+						     "simple-bus";
 					reg = <0x270 0x240>;
 					#address-cells = <1>;
 					#size-cells = <1>;
diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
index 233c69e..df8908a 100644
--- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
@@ -120,7 +120,7 @@
 
 	lcd0: display@0 {
 		compatible = "lgphilips,lb035q02";
-		label = "lcd";
+		label = "lcd35";
 
 		reg = <1>;					/* CS1 */
 		spi-max-frequency = <10000000>;
diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
index f5395b7..048fd21 100644
--- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
@@ -98,7 +98,7 @@
 
 	lcd0: display@0 {
 		compatible = "samsung,lte430wq-f0c", "panel-dpi";
-		label = "lcd";
+		label = "lcd43";
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&lte430_pins>;
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index f884d6a..abc4473 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -191,7 +191,8 @@
 				};
 
 				omap4_padconf_global: omap4_padconf_global@5a0 {
-					compatible = "syscon";
+					compatible = "syscon",
+						     "simple-bus";
 					reg = <0x5a0 0x170>;
 					#address-cells = <1>;
 					#size-cells = <1>;
@@ -551,6 +552,7 @@
 			reg = <0x4a066000 0x100>;
 			interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_dsp";
+			#iommu-cells = <0>;
 		};
 
 		mmu_ipu: mmu@55082000 {
@@ -558,6 +560,7 @@
 			reg = <0x55082000 0x100>;
 			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_ipu";
+			#iommu-cells = <0>;
 			ti,iommu-bus-err-back;
 		};
 
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 7d24ae0..b1a1263 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -180,7 +180,8 @@
 				};
 
 				omap5_padconf_global: omap5_padconf_global@5a0 {
-					compatible = "syscon";
+					compatible = "syscon",
+						     "simple-bus";
 					reg = <0x5a0 0xec>;
 					#address-cells = <1>;
 					#size-cells = <1>;
@@ -612,6 +613,7 @@
 			reg = <0x4a066000 0x100>;
 			interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_dsp";
+			#iommu-cells = <0>;
 		};
 
 		mmu_ipu: mmu@55082000 {
@@ -619,6 +621,7 @@
 			reg = <0x55082000 0x100>;
 			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "mmu_ipu";
+			#iommu-cells = <0>;
 			ti,iommu-bus-err-back;
 		};
 
diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
index bd35b06..9bc72a3 100644
--- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
+++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
@@ -17,3 +17,13 @@
 		status = "ok";
 	};
 };
+
+&spmi_bus {
+	pm8941@0 {
+		coincell@2800 {
+			status = "ok";
+			qcom,rset-ohms = <2100>;
+			qcom,vset-millivolts = <3000>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/qcom-pm8941.dtsi b/arch/arm/boot/dts/qcom-pm8941.dtsi
index aa774e6..968f104 100644
--- a/arch/arm/boot/dts/qcom-pm8941.dtsi
+++ b/arch/arm/boot/dts/qcom-pm8941.dtsi
@@ -125,6 +125,12 @@
 			interrupts = <0x0 0x36 0x0 IRQ_TYPE_EDGE_RISING>;
 			qcom,external-resistor-micro-ohms = <10000>;
 		};
+
+		coincell@2800 {
+			compatible = "qcom,pm8941-coincell";
+			reg = <0x2800>;
+			status = "disabled";
+		};
 	};
 
 	usid1: pm8941@1 {
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
index 71468a7..5e17fd1 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
@@ -60,27 +60,27 @@
 	rxc-skew-ps = <2000>;
 };
 
-&mmc0 {
-	vmmc-supply = <&regulator_3_3v>;
-	vqmmc-supply = <&regulator_3_3v>;
-};
-
-&usb1 {
-	status = "okay";
-};
-
 &gpio2 {
 	status = "okay";
 };
 
-&i2c1{
+&i2c1 {
 	status = "okay";
 
-	accel1: accel1@53{
-		compatible = "adxl34x";
+	accel1: accelerometer@53 {
+		compatible = "adi,adxl345";
 		reg = <0x53>;
 
-		interrupt-parent = < &portc >;
+		interrupt-parent = <&portc>;
 		interrupts = <3 2>;
 	};
 };
+
+&mmc0 {
+	vmmc-supply = <&regulator_3_3v>;
+	vqmmc-supply = <&regulator_3_3v>;
+};
+
+&usb1 {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts
index d42c84b..e488572 100644
--- a/arch/arm/boot/dts/spear1310-evb.dts
+++ b/arch/arm/boot/dts/spear1310-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr1310 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1310.dtsi b/arch/arm/boot/dts/spear1310.dtsi
index 9d34292..54bc6d3 100644
--- a/arch/arm/boot/dts/spear1310.dtsi
+++ b/arch/arm/boot/dts/spear1310.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr1310 SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts
index b23e05e..c611f56 100644
--- a/arch/arm/boot/dts/spear1340-evb.dts
+++ b/arch/arm/boot/dts/spear1340-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr1340 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi
index 13e1aa3..df2232d 100644
--- a/arch/arm/boot/dts/spear1340.dtsi
+++ b/arch/arm/boot/dts/spear1340.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr1340 SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 40accc8..14594ce 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr13xx SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear300-evb.dts b/arch/arm/boot/dts/spear300-evb.dts
index 5de1431..e859e82 100644
--- a/arch/arm/boot/dts/spear300-evb.dts
+++ b/arch/arm/boot/dts/spear300-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr300 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index f79b3df..f4e92e5 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr300 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear310-evb.dts b/arch/arm/boot/dts/spear310-evb.dts
index b096329..070f2c1 100644
--- a/arch/arm/boot/dts/spear310-evb.dts
+++ b/arch/arm/boot/dts/spear310-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr310 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index 9537208..da210b4 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr310 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts
index fdedbb5..1b10344 100644
--- a/arch/arm/boot/dts/spear320-evb.dts
+++ b/arch/arm/boot/dts/spear320-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index ffea342..22be6e5 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi
index f0e3fcf..118135d 100644
--- a/arch/arm/boot/dts/spear3xx.dtsi
+++ b/arch/arm/boot/dts/spear3xx.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr3xx SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
+ * Copyright 2012 Viresh Kumar <vireshk@kernel.org>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/ste-ccu8540.dts b/arch/arm/boot/dts/ste-ccu8540.dts
index 32dd55e..6eaaf63 100644
--- a/arch/arm/boot/dts/ste-ccu8540.dts
+++ b/arch/arm/boot/dts/ste-ccu8540.dts
@@ -17,6 +17,13 @@
 	model = "ST-Ericsson U8540 platform with Device Tree";
 	compatible = "st-ericsson,ccu8540", "st-ericsson,u8540";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	memory@0 {
 		device_type = "memory";
 		reg = <0x20000000 0x1f000000>, <0xc0000000 0x3f000000>;
diff --git a/arch/arm/boot/dts/ste-ccu9540.dts b/arch/arm/boot/dts/ste-ccu9540.dts
index 651c56d..c8b8158 100644
--- a/arch/arm/boot/dts/ste-ccu9540.dts
+++ b/arch/arm/boot/dts/ste-ccu9540.dts
@@ -16,6 +16,13 @@
 	model = "ST-Ericsson CCU9540 platform with Device Tree";
 	compatible = "st-ericsson,ccu9540", "st-ericsson,u9540";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	memory {
 		reg = <0x00000000 0x20000000>;
 	};
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index 853684a..c5fbde3 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -15,6 +15,33 @@
 #include "skeleton.dtsi"
 
 / {
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		enable-method = "ste,dbx500-smp";
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&CPU0>;
+				};
+				core1 {
+					cpu = <&CPU1>;
+				};
+			};
+		};
+		CPU0: cpu@300 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0x300>;
+		};
+		CPU1: cpu@301 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0x301>;
+		};
+	};
+
 	soc {
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -22,32 +49,6 @@
 		interrupt-parent = <&intc>;
 		ranges;
 
-		cpus {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			cpu-map {
-				cluster0 {
-					core0 {
-						cpu = <&CPU0>;
-					};
-					core1 {
-						cpu = <&CPU1>;
-					};
-				};
-			};
-			CPU0: cpu@0 {
-				device_type = "cpu";
-				compatible = "arm,cortex-a9";
-				reg = <0>;
-			};
-			CPU1: cpu@1 {
-				device_type = "cpu";
-				compatible = "arm,cortex-a9";
-				reg = <1>;
-			};
-		};
-
 		ptm@801ae000 {
 			compatible = "arm,coresight-etm3x", "arm,primecell";
 			reg = <0x801ae000 0x1000>;
@@ -219,6 +220,13 @@
 
 		clocks {
 			compatible = "stericsson,u8500-clks";
+			/*
+			 * Registers for the CLKRST block on peripheral
+			 * groups 1, 2, 3, 5, 6,
+			 */
+			reg = <0x8012f000 0x1000>, <0x8011f000 0x1000>,
+			    <0x8000f000 0x1000>, <0xa03ff000 0x1000>,
+			    <0xa03cf000 0x1000>;
 
 			prcmu_clk: prcmu-clock {
 				#clock-cells = <1>;
@@ -971,7 +979,7 @@
 			power-domains = <&pm_domains DOMAIN_VAPE>;
 		};
 
-		uart@80120000 {
+		ux500_serial0: uart@80120000 {
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x80120000 0x1000>;
 			interrupts = <0 11 IRQ_TYPE_LEVEL_HIGH>;
@@ -986,7 +994,7 @@
 			status = "disabled";
 		};
 
-		uart@80121000 {
+		ux500_serial1: uart@80121000 {
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x80121000 0x1000>;
 			interrupts = <0 19 IRQ_TYPE_LEVEL_HIGH>;
@@ -1001,7 +1009,7 @@
 			status = "disabled";
 		};
 
-		uart@80007000 {
+		ux500_serial2: uart@80007000 {
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x80007000 0x1000>;
 			interrupts = <0 26 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index 744c1e3..6d8ce15 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -32,11 +32,11 @@
 			status = "okay";
 		};
 
+		/* This UART is unused and thus left disabled */
 		uart@80121000 {
 			pinctrl-names = "default", "sleep";
 			pinctrl-0 = <&uart1_default_mode>;
 			pinctrl-1 = <&uart1_sleep_mode>;
-			status = "okay";
 		};
 
 		uart@80007000 {
diff --git a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
index 2b1cb5b..18e9795 100644
--- a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
+++ b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts
@@ -17,6 +17,13 @@
 	model = "ST-Ericsson HREF (pre-v60) and ST UIB";
 	compatible = "st-ericsson,mop500", "st-ericsson,u8500";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	soc {
 		/* Reset line for the BU21013 touchscreen */
 		i2c@80110000 {
diff --git a/arch/arm/boot/dts/ste-hrefprev60-tvk.dts b/arch/arm/boot/dts/ste-hrefprev60-tvk.dts
index 59523f8..2473991 100644
--- a/arch/arm/boot/dts/ste-hrefprev60-tvk.dts
+++ b/arch/arm/boot/dts/ste-hrefprev60-tvk.dts
@@ -16,4 +16,11 @@
 / {
 	model = "ST-Ericsson HREF (pre-v60) and TVK1281618 UIB";
 	compatible = "st-ericsson,mop500", "st-ericsson,u8500";
+
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
 };
diff --git a/arch/arm/boot/dts/ste-hrefprev60.dtsi b/arch/arm/boot/dts/ste-hrefprev60.dtsi
index 7f3975b..b0278f4 100644
--- a/arch/arm/boot/dts/ste-hrefprev60.dtsi
+++ b/arch/arm/boot/dts/ste-hrefprev60.dtsi
@@ -23,6 +23,11 @@
 	};
 
 	soc {
+		/* Enable UART1 on this board */
+		uart@80121000 {
+			status = "okay";
+		};
+
 		i2c@80004000 {
 			tps61052@33 {
 				compatible = "tps61052";
diff --git a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
index 8c6a2de..c2e1ba0 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
+++ b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts
@@ -19,6 +19,13 @@
 	model = "ST-Ericsson HREF (v60+) and ST UIB";
 	compatible = "st-ericsson,hrefv60+", "st-ericsson,u8500";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	soc {
 		/* Reset line for the BU21013 touchscreen */
 		i2c@80110000 {
diff --git a/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts b/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts
index d53cccd..ebd8547 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts
+++ b/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts
@@ -18,4 +18,11 @@
 / {
 	model = "ST-Ericsson HREF (v60+) and TVK1281618 UIB";
 	compatible = "st-ericsson,hrefv60+", "st-ericsson,u8500";
+
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
 };
diff --git a/arch/arm/boot/dts/ste-hrefv60plus.dtsi b/arch/arm/boot/dts/ste-hrefv60plus.dtsi
index a4bc9e7..810cda7 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus.dtsi
+++ b/arch/arm/boot/dts/ste-hrefv60plus.dtsi
@@ -43,15 +43,26 @@
 				  <&vaudio_hf_hrefv60_mode>,
 				  <&gbf_hrefv60_mode>,
 				  <&hdtv_hrefv60_mode>,
-				  <&touch_hrefv60_mode>;
+				  <&touch_hrefv60_mode>,
+				  <&gpios_hrefv60_mode>;
 
 			sdi0 {
-				/* SD card detect GPIO pin, extend default state */
 				sdi0_default_mode: sdi0_default {
+					/* SD card detect GPIO pin, extend default state */
 					default_hrefv60_cfg1 {
 						pins = "GPIO95_E8";
 						ste,config = <&gpio_in_pu>;
 					};
+					/* VMMCI level-shifter enable */
+					default_hrefv60_cfg2 {
+						pins = "GPIO169_D22";
+						ste,config = <&gpio_out_lo>;
+					};
+					/* VMMCI level-shifter voltage select */
+					default_hrefv60_cfg3 {
+						pins = "GPIO5_AG6";
+						ste,config = <&gpio_out_hi>;
+					};
 				};
 			};
 			ipgpio {
@@ -213,6 +224,16 @@
 					};
 				};
 			};
+			gpios {
+				/* Dangling GPIO pins */
+				gpios_hrefv60_mode: gpios_hrefv60 {
+					default_cfg1 {
+						/* Normally UART1 RXD, now dangling */
+						pins = "GPIO4_AH6";
+						ste,config = <&in_pu>;
+					};
+				};
+			};
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
index 3d0b875..3d25dba 100644
--- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts
+++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
@@ -17,6 +17,7 @@
 	};
 
 	aliases {
+		serial1 = &uart1;
 		stmpe-i2c0 = &stmpe0;
 		stmpe-i2c1 = &stmpe1;
 	};
diff --git a/arch/arm/boot/dts/ste-nomadik-s8815.dts b/arch/arm/boot/dts/ste-nomadik-s8815.dts
index 85d3b95..3c140d0 100644
--- a/arch/arm/boot/dts/ste-nomadik-s8815.dts
+++ b/arch/arm/boot/dts/ste-nomadik-s8815.dts
@@ -15,6 +15,10 @@
 		bootargs = "root=/dev/ram0 console=ttyAMA1,115200n8 earlyprintk";
 	};
 
+	aliases {
+		serial1 = &uart1;
+	};
+
 	src@101e0000 {
 		/* These chrystal drivers are not used on this board */
 		disable-sxtalo;
diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index 9a5f2ba..ef794a3 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -757,6 +757,7 @@
 			clock-names = "uartclk", "apb_pclk";
 			pinctrl-names = "default";
 			pinctrl-0 = <&uart0_default_mux>;
+			status = "disabled";
 		};
 
 		uart1: uart@101fb000 {
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 9edadc3..32a5ccb 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -18,6 +18,13 @@
 	model = "Calao Systems Snowball platform with device tree";
 	compatible = "calaosystems,snowball-a9500", "st-ericsson,u9500";
 
+	/* This stablilizes the serial port enumeration */
+	aliases {
+		serial0 = &ux500_serial0;
+		serial1 = &ux500_serial1;
+		serial2 = &ux500_serial2;
+	};
+
 	memory {
 		reg = <0x00000000 0x20000000>;
 	};
@@ -223,11 +230,11 @@
 			status = "okay";
 		};
 
+		/* This UART is unused and thus left disabled */
 		uart@80121000 {
 			pinctrl-names = "default", "sleep";
 			pinctrl-0 = <&uart1_default_mode>;
 			pinctrl-1 = <&uart1_sleep_mode>;
-			status = "okay";
 		};
 
 		uart@80007000 {
@@ -452,7 +459,21 @@
 						pins = "GPIO21_AB3"; /* DAT31DIR */
 						ste,config = <&out_hi>;
 					};
-
+					/* SD card detect GPIO pin, extend default state */
+					snowball_cfg2 {
+						pins = "GPIO218_AH11";
+						ste,config = <&gpio_in_pu>;
+					};
+					/* VMMCI level-shifter enable */
+					snowball_cfg3 {
+						pins = "GPIO217_AH12";
+						ste,config = <&gpio_out_lo>;
+					};
+					/* VMMCI level-shifter voltage select */
+					snowball_cfg4 {
+						pins = "GPIO228_AJ6";
+						ste,config = <&gpio_out_hi>;
+					};
 				};
 			};
 			ssp0 {
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 61c03d1..adaa57b 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -241,6 +241,7 @@
 			compatible = "allwinner,sun4i-a10-axi-gates-clk";
 			reg = <0x01c2005c 0x4>;
 			clocks = <&axi>;
+			clock-indices = <0>;
 			clock-output-names = "axi_dram";
 		};
 
@@ -257,17 +258,36 @@
 			compatible = "allwinner,sun4i-a10-ahb-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb>;
+			clock-indices = <0>, <1>,
+					<2>, <3>,
+					<4>, <5>, <6>,
+					<7>, <8>, <9>,
+					<10>, <11>, <12>,
+					<13>, <14>, <16>,
+					<17>, <18>, <20>,
+					<21>, <22>, <23>,
+					<24>, <25>, <26>,
+					<32>, <33>, <34>,
+					<35>, <36>, <37>,
+					<40>, <41>, <43>,
+					<44>, <45>,
+					<46>, <47>,
+					<50>, <52>;
 			clock-output-names = "ahb_usb0", "ahb_ehci0",
-				"ahb_ohci0", "ahb_ehci1", "ahb_ohci1", "ahb_ss",
-				"ahb_dma", "ahb_bist", "ahb_mmc0", "ahb_mmc1",
-				"ahb_mmc2", "ahb_mmc3", "ahb_ms", "ahb_nand",
-				"ahb_sdram", "ahb_ace",	"ahb_emac", "ahb_ts",
-				"ahb_spi0", "ahb_spi1", "ahb_spi2", "ahb_spi3",
-				"ahb_pata", "ahb_sata", "ahb_gps", "ahb_ve",
-				"ahb_tvd", "ahb_tve0", "ahb_tve1", "ahb_lcd0",
-				"ahb_lcd1", "ahb_csi0", "ahb_csi1", "ahb_hdmi",
-				"ahb_de_be0", "ahb_de_be1", "ahb_de_fe0",
-				"ahb_de_fe1", "ahb_mp", "ahb_mali400";
+					     "ahb_ohci0", "ahb_ehci1",
+					     "ahb_ohci1", "ahb_ss", "ahb_dma",
+					     "ahb_bist", "ahb_mmc0", "ahb_mmc1",
+					     "ahb_mmc2", "ahb_mmc3", "ahb_ms",
+					     "ahb_nand", "ahb_sdram", "ahb_ace",
+					     "ahb_emac", "ahb_ts", "ahb_spi0",
+					     "ahb_spi1", "ahb_spi2", "ahb_spi3",
+					     "ahb_pata", "ahb_sata", "ahb_gps",
+					     "ahb_ve", "ahb_tvd", "ahb_tve0",
+					     "ahb_tve1", "ahb_lcd0", "ahb_lcd1",
+					     "ahb_csi0", "ahb_csi1", "ahb_hdmi",
+					     "ahb_de_be0", "ahb_de_be1",
+					     "ahb_de_fe0", "ahb_de_fe1",
+					     "ahb_mp", "ahb_mali400";
 		};
 
 		apb0: apb0@01c20054 {
@@ -283,9 +303,14 @@
 			compatible = "allwinner,sun4i-a10-apb0-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb0>;
+			clock-indices = <0>, <1>,
+					<2>, <3>,
+					<5>, <6>,
+					<7>, <10>;
 			clock-output-names = "apb0_codec", "apb0_spdif",
-				"apb0_ac97", "apb0_iis", "apb0_pio", "apb0_ir0",
-				"apb0_ir1", "apb0_keypad";
+					     "apb0_ac97", "apb0_iis",
+					     "apb0_pio", "apb0_ir0",
+					     "apb0_ir1", "apb0_keypad";
 		};
 
 		apb1: clk@01c20058 {
@@ -301,12 +326,22 @@
 			compatible = "allwinner,sun4i-a10-apb1-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb1>;
+			clock-indices = <0>, <1>,
+					<2>, <4>,
+					<5>, <6>,
+					<7>, <16>,
+					<17>, <18>,
+					<19>, <20>,
+					<21>, <22>,
+					<23>;
 			clock-output-names = "apb1_i2c0", "apb1_i2c1",
-				"apb1_i2c2", "apb1_can", "apb1_scr",
-				"apb1_ps20", "apb1_ps21", "apb1_uart0",
-				"apb1_uart1", "apb1_uart2", "apb1_uart3",
-				"apb1_uart4", "apb1_uart5", "apb1_uart6",
-				"apb1_uart7";
+					     "apb1_i2c2", "apb1_can",
+					     "apb1_scr", "apb1_ps20",
+					     "apb1_ps21", "apb1_uart0",
+					     "apb1_uart1", "apb1_uart2",
+					     "apb1_uart3", "apb1_uart4",
+					     "apb1_uart5", "apb1_uart6",
+					     "apb1_uart7";
 		};
 
 		nand_clk: clk@01c20080 {
@@ -643,6 +678,14 @@
 			status = "disabled";
 		};
 
+		crypto: crypto-engine@01c15000 {
+			compatible = "allwinner,sun4i-a10-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <86>;
+			clocks = <&ahb_gates 5>, <&ss_clk>;
+			clock-names = "ahb", "mod";
+		};
+
 		spi2: spi@01c17000 {
 			compatible = "allwinner,sun4i-a10-spi";
 			reg = <0x01c17000 0x1000>;
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index f11efb7..a513b41 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -85,6 +85,17 @@
 			compatible = "allwinner,sun5i-a10s-ahb-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb>;
+			clock-indices = <0>, <1>,
+					<2>, <5>, <6>,
+					<7>, <8>, <9>,
+					<10>, <13>,
+					<14>, <17>, <18>,
+					<20>, <21>, <22>,
+					<26>, <28>, <32>,
+					<34>, <36>, <40>,
+					<43>, <44>,
+					<46>, <51>,
+					<52>;
 			clock-output-names = "ahb_usbotg", "ahb_ehci",
 					     "ahb_ohci", "ahb_ss", "ahb_dma",
 					     "ahb_bist", "ahb_mmc0", "ahb_mmc1",
@@ -103,6 +114,9 @@
 			compatible = "allwinner,sun5i-a10s-apb0-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb0>;
+			clock-indices = <0>, <3>,
+					<5>, <6>,
+					<10>;
 			clock-output-names = "apb0_codec", "apb0_iis",
 					     "apb0_pio", "apb0_ir",
 					     "apb0_keypad";
@@ -113,9 +127,14 @@
 			compatible = "allwinner,sun5i-a10s-apb1-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb1>;
+			clock-indices = <0>, <1>,
+					<2>, <16>,
+					<17>, <18>,
+					<19>;
 			clock-output-names = "apb1_i2c0", "apb1_i2c1",
-				"apb1_i2c2", "apb1_uart0", "apb1_uart1",
-				"apb1_uart2", "apb1_uart3";
+					     "apb1_i2c2", "apb1_uart0",
+					     "apb1_uart1", "apb1_uart2",
+					     "apb1_uart3";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index 976d4fa..f3631c9 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -104,6 +104,16 @@
 			compatible = "allwinner,sun5i-a13-ahb-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb>;
+			clock-indices = <0>, <1>,
+					<2>, <5>, <6>,
+					<7>, <8>, <9>,
+					<10>, <13>,
+					<14>, <20>,
+					<21>, <22>,
+				        <28>, <32>, <36>,
+				        <40>, <44>,
+					<46>, <51>,
+					<52>;
 			clock-output-names = "ahb_usbotg", "ahb_ehci",
 					     "ahb_ohci", "ahb_ss", "ahb_dma",
 					     "ahb_bist", "ahb_mmc0", "ahb_mmc1",
@@ -121,6 +131,8 @@
 			compatible = "allwinner,sun5i-a13-apb0-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb0>;
+			clock-indices = <0>, <5>,
+					<6>;
 			clock-output-names = "apb0_codec", "apb0_pio",
 					     "apb0_ir";
 		};
@@ -130,8 +142,12 @@
 			compatible = "allwinner,sun5i-a13-apb1-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb1>;
+			clock-indices = <0>, <1>,
+					<2>, <17>,
+					<19>;
 			clock-output-names = "apb1_i2c0", "apb1_i2c1",
-				"apb1_i2c2", "apb1_uart1", "apb1_uart3";
+					     "apb1_i2c2", "apb1_uart1",
+					     "apb1_uart3";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i.dtsi
index 54b0978..427c0e7 100644
--- a/arch/arm/boot/dts/sun5i.dtsi
+++ b/arch/arm/boot/dts/sun5i.dtsi
@@ -178,6 +178,7 @@
 			compatible = "allwinner,sun4i-a10-axi-gates-clk";
 			reg = <0x01c2005c 0x4>;
 			clocks = <&axi>;
+			clock-indices = <0>;
 			clock-output-names = "axi_dram";
 		};
 
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index 008047a..e4d3484 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -252,6 +252,20 @@
 			compatible = "allwinner,sun6i-a31-ahb1-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb1>;
+			clock-indices = <1>, <5>,
+					<6>, <8>, <9>,
+					<10>, <11>, <12>,
+					<13>, <14>,
+					<17>, <18>, <19>,
+					<20>, <21>, <22>,
+					<23>, <24>, <26>,
+					<27>, <29>,
+					<30>, <31>, <32>,
+					<36>, <37>, <40>,
+					<43>, <44>, <45>,
+					<46>, <47>, <50>,
+					<52>, <55>, <56>,
+					<57>, <58>;
 			clock-output-names = "ahb1_mipidsi", "ahb1_ss",
 					"ahb1_dma", "ahb1_mmc0", "ahb1_mmc1",
 					"ahb1_mmc2", "ahb1_mmc3", "ahb1_nand1",
@@ -281,6 +295,9 @@
 			compatible = "allwinner,sun6i-a31-apb1-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb1>;
+			clock-indices = <0>, <4>,
+					<5>, <12>,
+					<13>;
 			clock-output-names = "apb1_codec", "apb1_digital_mic",
 					"apb1_pio", "apb1_daudio0",
 					"apb1_daudio1";
@@ -299,6 +316,10 @@
 			compatible = "allwinner,sun6i-a31-apb2-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb2>;
+			clock-indices = <0>, <1>,
+					<2>, <3>, <16>,
+					<17>, <18>, <19>,
+					<20>, <21>;
 			clock-output-names = "apb2_i2c0", "apb2_i2c1",
 					     "apb2_i2c2", "apb2_i2c3",
 					     "apb2_uart0", "apb2_uart1",
@@ -346,6 +367,14 @@
 					     "mmc3_sample";
 		};
 
+		ss_clk: clk@01c2009c {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-a10-mod0-clk";
+			reg = <0x01c2009c 0x4>;
+			clocks = <&osc24M>, <&pll6 0>;
+			clock-output-names = "ss";
+		};
+
 		spi0_clk: clk@01c200a0 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-mod0-clk";
@@ -384,6 +413,9 @@
 			compatible = "allwinner,sun6i-a31-usb-clk";
 			reg = <0x01c200cc 0x4>;
 			clocks = <&osc24M>;
+			clock-indices = <8>, <9>, <10>,
+					<16>, <17>,
+					<18>;
 			clock-output-names = "usb_phy0", "usb_phy1", "usb_phy2",
 					     "usb_ohci0", "usb_ohci1",
 					     "usb_ohci2";
@@ -870,6 +902,16 @@
 			#size-cells = <0>;
 		};
 
+		crypto: crypto-engine@01c15000 {
+			compatible = "allwinner,sun4i-a10-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ahb1_gates 5>, <&ss_clk>;
+			clock-names = "ahb", "mod";
+			resets = <&ahb1_rst 5>;
+			reset-names = "ahb";
+		};
+
 		timer@01c60000 {
 			compatible = "allwinner,sun6i-a31-hstimer",
 				     "allwinner,sun7i-a20-hstimer";
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 6a63f30..d3b2f26 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -267,6 +267,19 @@
 			compatible = "allwinner,sun7i-a20-ahb-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb>;
+			clock-indices = <0>, <1>,
+					<2>, <3>, <4>,
+					<5>, <6>, <7>, <8>,
+					<9>, <10>, <11>, <12>,
+					<13>, <14>, <16>,
+					<17>, <18>, <20>, <21>,
+					<22>, <23>, <25>,
+					<28>, <32>, <33>, <34>,
+					<35>, <36>, <37>, <40>,
+					<41>, <42>, <43>,
+					<44>, <45>, <46>,
+					<47>, <49>, <50>,
+					<52>;
 			clock-output-names = "ahb_usb0", "ahb_ehci0",
 				"ahb_ohci0", "ahb_ehci1", "ahb_ohci1",
 				"ahb_ss", "ahb_dma", "ahb_bist", "ahb_mmc0",
@@ -295,6 +308,10 @@
 			compatible = "allwinner,sun7i-a20-apb0-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb0>;
+			clock-indices = <0>, <1>,
+					<2>, <3>, <4>,
+					<5>, <6>, <7>,
+					<8>, <10>;
 			clock-output-names = "apb0_codec", "apb0_spdif",
 				"apb0_ac97", "apb0_iis0", "apb0_iis1",
 				"apb0_pio", "apb0_ir0", "apb0_ir1",
@@ -314,6 +331,12 @@
 			compatible = "allwinner,sun7i-a20-apb1-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb1>;
+			clock-indices = <0>, <1>,
+					<2>, <3>, <4>,
+					<5>, <6>, <7>,
+					<15>, <16>, <17>,
+					<18>, <19>, <20>,
+					<21>, <22>, <23>;
 			clock-output-names = "apb1_i2c0", "apb1_i2c1",
 				"apb1_i2c2", "apb1_i2c3", "apb1_can",
 				"apb1_scr", "apb1_ps20", "apb1_ps21",
@@ -731,6 +754,14 @@
 			status = "disabled";
 		};
 
+		crypto: crypto-engine@01c15000 {
+			compatible = "allwinner,sun4i-a10-crypto";
+			reg = <0x01c15000 0x1000>;
+			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ahb_gates 5>, <&ss_clk>;
+			clock-names = "ahb", "mod";
+		};
+
 		spi2: spi@01c17000 {
 			compatible = "allwinner,sun4i-a10-spi";
 			reg = <0x01c17000 0x1000>;
diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
index 7abd0ae..c318c77 100644
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -180,6 +180,15 @@
 			compatible = "allwinner,sun8i-a23-ahb1-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb1>;
+			clock-indices = <1>, <6>,
+					<8>, <9>, <10>,
+					<13>, <14>,
+					<19>, <20>,
+					<21>, <24>, <26>,
+					<29>, <32>, <36>,
+					<40>, <44>, <46>,
+					<52>, <54>,
+					<57>;
 			clock-output-names = "ahb1_mipidsi", "ahb1_dma",
 					"ahb1_mmc0", "ahb1_mmc1", "ahb1_mmc2",
 					"ahb1_nand", "ahb1_sdram",
@@ -196,6 +205,8 @@
 			compatible = "allwinner,sun8i-a23-apb1-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb1>;
+			clock-indices = <0>, <5>,
+					<12>, <13>;
 			clock-output-names = "apb1_codec", "apb1_pio",
 					"apb1_daudio0",	"apb1_daudio1";
 		};
@@ -213,6 +224,10 @@
 			compatible = "allwinner,sun8i-a23-apb2-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb2>;
+			clock-indices = <0>, <1>,
+					<2>, <16>,
+					<17>, <18>,
+					<19>, <20>;
 			clock-output-names = "apb2_i2c0", "apb2_i2c1",
 					"apb2_i2c2", "apb2_uart0",
 					"apb2_uart1", "apb2_uart2",
diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index a43ad77..5908e3d 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -277,9 +277,12 @@
 			compatible = "allwinner,sun9i-a80-ahb0-gates-clk";
 			reg = <0x06000580 0x4>;
 			clocks = <&ahb0>;
-			clock-indices = <0>, <1>, <3>, <5>, <8>, <12>, <13>,
-					<14>, <15>, <16>, <18>, <20>, <21>,
-					<22>, <23>;
+			clock-indices = <0>, <1>, <3>,
+					<5>, <8>, <12>,
+					<13>, <14>,
+					<15>, <16>, <18>,
+					<20>, <21>, <22>,
+					<23>;
 			clock-output-names = "ahb0_fd", "ahb0_ve", "ahb0_gpu",
 					"ahb0_ss", "ahb0_sd", "ahb0_nand1",
 					"ahb0_nand0", "ahb0_sdram",
@@ -293,7 +296,10 @@
 			compatible = "allwinner,sun9i-a80-ahb1-gates-clk";
 			reg = <0x06000584 0x4>;
 			clocks = <&ahb1>;
-			clock-indices = <0>, <1>, <17>, <21>, <22>, <23>, <24>;
+			clock-indices = <0>, <1>,
+					<17>, <21>,
+					<22>, <23>,
+					<24>;
 			clock-output-names = "ahb1_usbotg", "ahb1_usbhci",
 					"ahb1_gmac", "ahb1_msgbox",
 					"ahb1_spinlock", "ahb1_hstimer",
@@ -305,8 +311,9 @@
 			compatible = "allwinner,sun9i-a80-ahb2-gates-clk";
 			reg = <0x06000588 0x4>;
 			clocks = <&ahb2>;
-			clock-indices = <0>, <1>, <2>, <4>, <5>, <7>, <8>,
-					<11>;
+			clock-indices = <0>, <1>,
+					<2>, <4>, <5>,
+					<7>, <8>, <11>;
 			clock-output-names = "ahb2_lcd0", "ahb2_lcd1",
 					"ahb2_edp", "ahb2_csi", "ahb2_hdmi",
 					"ahb2_de", "ahb2_mp", "ahb2_mipi_dsi";
@@ -317,8 +324,10 @@
 			compatible = "allwinner,sun9i-a80-apb0-gates-clk";
 			reg = <0x06000590 0x4>;
 			clocks = <&apb0>;
-			clock-indices = <1>, <5>, <11>, <12>, <13>, <15>,
-					<17>, <18>, <19>;
+			clock-indices = <1>, <5>,
+					<11>, <12>, <13>,
+					<15>, <17>, <18>,
+					<19>;
 			clock-output-names = "apb0_spdif", "apb0_pio",
 					"apb0_ac97", "apb0_i2s0", "apb0_i2s1",
 					"apb0_lradc", "apb0_gpadc", "apb0_twd",
@@ -330,8 +339,11 @@
 			compatible = "allwinner,sun9i-a80-apb1-gates-clk";
 			reg = <0x06000594 0x4>;
 			clocks = <&apb1>;
-			clock-indices = <0>, <1>, <2>, <3>, <4>,
-					<16>, <17>, <18>, <19>, <20>, <21>;
+			clock-indices = <0>, <1>,
+					<2>, <3>, <4>,
+					<16>, <17>,
+					<18>, <19>,
+					<20>, <21>;
 			clock-output-names = "apb1_i2c0", "apb1_i2c1",
 					"apb1_i2c2", "apb1_i2c3", "apb1_i2c4",
 					"apb1_uart0", "apb1_uart1",
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 107395c..17f63f7 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -150,6 +150,16 @@
 			interface-type = "ace";
 			reg = <0x5000 0x1000>;
 		};
+
+		pmu@9000 {
+			 compatible = "arm,cci-400-pmu,r0";
+			 reg = <0x9000 0x5000>;
+			 interrupts = <0 105 4>,
+				      <0 101 4>,
+				      <0 102 4>,
+				      <0 103 4>,
+				      <0 104 4>;
+		};
 	};
 
 	memory-controller@7ffd0000 {
@@ -187,11 +197,22 @@
 			     <1 10 0xf08>;
 	};
 
-	pmu {
+	pmu_a15 {
 		compatible = "arm,cortex-a15-pmu";
 		interrupts = <0 68 4>,
 			     <0 69 4>;
-		interrupt-affinity = <&cpu0>, <&cpu1>;
+		interrupt-affinity = <&cpu0>,
+				     <&cpu1>;
+	};
+
+	pmu_a7 {
+		compatible = "arm,cortex-a7-pmu";
+		interrupts = <0 128 4>,
+			     <0 129 4>,
+			     <0 130 4>;
+		interrupt-affinity = <&cpu2>,
+				     <&cpu3>,
+				     <&cpu4>;
 	};
 
 	oscclk6a: oscclk6a {
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index b47863d..7569b39 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -354,8 +354,7 @@ CONFIG_PROVE_LOCKING=y
 # CONFIG_FTRACE is not set
 # CONFIG_ARM_UNWIND is not set
 CONFIG_SECURITYFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
 CONFIG_CRC_CCITT=m
 CONFIG_CRC_T10DIF=y
 CONFIG_CRC7=m
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 6d83a1b..5fd8df6 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -353,7 +353,6 @@ CONFIG_POWER_RESET_AS3722=y
 CONFIG_POWER_RESET_GPIO=y
 CONFIG_POWER_RESET_GPIO_RESTART=y
 CONFIG_POWER_RESET_KEYSTONE=y
-CONFIG_POWER_RESET_SUN6I=y
 CONFIG_POWER_RESET_RMOBILE=y
 CONFIG_SENSORS_LM90=y
 CONFIG_SENSORS_LM95245=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 8ecba00..7ebc346b 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -2,6 +2,7 @@ CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_PERF_EVENTS=y
+CONFIG_MODULES=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
@@ -77,7 +78,6 @@ CONFIG_SPI_SUN6I=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_SUN6I=y
 CONFIG_THERMAL=y
 CONFIG_CPU_THERMAL=y
 CONFIG_WATCHDOG=y
@@ -87,6 +87,10 @@ CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_AXP20X=y
 CONFIG_REGULATOR_GPIO=y
+CONFIG_FB=y
+CONFIG_FB_SIMPLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore
index 6231d36..31e1f53 100644
--- a/arch/arm/crypto/.gitignore
+++ b/arch/arm/crypto/.gitignore
@@ -1 +1,3 @@
 aesbs-core.S
+sha256-core.S
+sha512-core.S
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 83c5019..30b3bc1 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += param.h
 generic-y += parport.h
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1c3938f..4859820 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -140,16 +140,11 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
  * The _caller variety takes a __builtin_return_address(0) value for
  * /proc/vmalloc to use - and should only be used in non-inline functions.
  */
-extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long,
-	size_t, unsigned int, void *);
 extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
 	void *);
-
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int);
 extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
 extern void __iounmap(volatile void __iomem *addr);
-extern void __arm_iounmap(volatile void __iomem *addr);
 
 extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 	unsigned int, void *);
@@ -321,21 +316,24 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 static inline void memset_io(volatile void __iomem *dst, unsigned c,
 	size_t count)
 {
-	memset((void __force *)dst, c, count);
+	extern void mmioset(void *, unsigned int, size_t);
+	mmioset((void __force *)dst, c, count);
 }
 #define memset_io(dst,c,count) memset_io(dst,c,count)
 
 static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
 	size_t count)
 {
-	memcpy(to, (const void __force *)from, count);
+	extern void mmiocpy(void *, const void *, size_t);
+	mmiocpy(to, (const void __force *)from, count);
 }
 #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
 
 static inline void memcpy_toio(volatile void __iomem *to, const void *from,
 	size_t count)
 {
-	memcpy((void __force *)to, from, count);
+	extern void mmiocpy(void *, const void *, size_t);
+	mmiocpy((void __force *)to, from, count);
 }
 #define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
 
@@ -348,18 +346,61 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from,
 #endif	/* readl */
 
 /*
- * ioremap and friends.
+ * ioremap() and friends.
+ *
+ * ioremap() takes a resource address, and size.  Due to the ARM memory
+ * types, it is important to use the correct ioremap() function as each
+ * mapping has specific properties.
+ *
+ * Function		Memory type	Cacheability	Cache hint
+ * ioremap()		Device		n/a		n/a
+ * ioremap_nocache()	Device		n/a		n/a
+ * ioremap_cache()	Normal		Writeback	Read allocate
+ * ioremap_wc()		Normal		Non-cacheable	n/a
+ * ioremap_wt()		Normal		Non-cacheable	n/a
+ *
+ * All device mappings have the following properties:
+ * - no access speculation
+ * - no repetition (eg, on return from an exception)
+ * - number, order and size of accesses are maintained
+ * - unaligned accesses are "unpredictable"
+ * - writes may be delayed before they hit the endpoint device
  *
- * ioremap takes a PCI memory address, as specified in
- * Documentation/io-mapping.txt.
+ * ioremap_nocache() is the same as ioremap() as there are too many device
+ * drivers using this for device registers, and documentation which tells
+ * people to use it for such for this to be any different.  This is not a
+ * safe fallback for memory-like mappings, or memory regions where the
+ * compiler may generate unaligned accesses - eg, via inlining its own
+ * memcpy.
  *
+ * All normal memory mappings have the following properties:
+ * - reads can be repeated with no side effects
+ * - repeated reads return the last value written
+ * - reads can fetch additional locations without side effects
+ * - writes can be repeated (in certain cases) with no side effects
+ * - writes can be merged before accessing the target
+ * - unaligned accesses can be supported
+ * - ordering is not guaranteed without explicit dependencies or barrier
+ *   instructions
+ * - writes may be delayed before they hit the endpoint memory
+ *
+ * The cache hint is only a performance hint: CPUs may alias these hints.
+ * Eg, a CPU not implementing read allocate but implementing write allocate
+ * will provide a write allocate mapping instead.
  */
-#define ioremap(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_nocache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_cache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
-#define ioremap_wc(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE_WC)
-#define ioremap_wt(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
-#define iounmap				__arm_iounmap
+void __iomem *ioremap(resource_size_t res_cookie, size_t size);
+#define ioremap ioremap
+#define ioremap_nocache ioremap
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size);
+#define ioremap_cache ioremap_cache
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
+#define ioremap_wc ioremap_wc
+#define ioremap_wt ioremap_wc
+
+void iounmap(volatile void __iomem *iomem_cookie);
+#define iounmap iounmap
 
 /*
  * io{read,write}{16,32}be() macros
diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
index 28b9bb3..8857d28 100644
--- a/arch/arm/include/asm/mach/pci.h
+++ b/arch/arm/include/asm/mach/pci.h
@@ -19,9 +19,7 @@ struct pci_bus;
 struct device;
 
 struct hw_pci {
-#ifdef CONFIG_PCI_MSI
 	struct msi_controller *msi_ctrl;
-#endif
 	struct pci_ops	*ops;
 	int		nr_controllers;
 	void		**private_data;
@@ -42,9 +40,6 @@ struct hw_pci {
  * Per-controller structure
  */
 struct pci_sys_data {
-#ifdef CONFIG_PCI_MSI
-	struct msi_controller *msi_ctrl;
-#endif
 	struct list_head node;
 	int		busnr;		/* primary bus number			*/
 	u64		mem_offset;	/* bus->cpu memory mapping offset	*/
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 3a72d69..b7f6fb4 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -275,7 +275,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  */
 #define __pa(x)			__virt_to_phys((unsigned long)(x))
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
-#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)	__va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
 extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x);
 
@@ -286,7 +286,7 @@ extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x);
  */
 static inline phys_addr_t __virt_to_idmap(unsigned long x)
 {
-	if (arch_virt_to_idmap)
+	if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap)
 		return arch_virt_to_idmap(x);
 	else
 		return __virt_to_phys(x);
diff --git a/arch/arm/include/asm/mm-arch-hooks.h b/arch/arm/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 7056660..0000000
--- a/arch/arm/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARM_MM_ARCH_HOOKS_H
-#define _ASM_ARM_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ARM_MM_ARCH_HOOKS_H */
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index bfd662e..aeddd28 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -129,7 +129,36 @@
 
 /*
  * These are the memory types, defined to be compatible with
- * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
+ * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B
+ * ARMv6+ without TEX remapping, they are a table index.
+ * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B
+ *
+ * MT type		Pre-ARMv6	ARMv6+ type / cacheable status
+ * UNCACHED		Uncached	Strongly ordered
+ * BUFFERABLE		Bufferable	Normal memory / non-cacheable
+ * WRITETHROUGH		Writethrough	Normal memory / write through
+ * WRITEBACK		Writeback	Normal memory / write back, read alloc
+ * MINICACHE		Minicache	N/A
+ * WRITEALLOC		Writeback	Normal memory / write back, write alloc
+ * DEV_SHARED		Uncached	Device memory (shared)
+ * DEV_NONSHARED	Uncached	Device memory (non-shared)
+ * DEV_WC		Bufferable	Normal memory / non-cacheable
+ * DEV_CACHED		Writeback	Normal memory / write back, read alloc
+ * VECTORS		Variable	Normal memory / variable
+ *
+ * All normal memory mappings have the following properties:
+ * - reads can be repeated with no side effects
+ * - repeated reads return the last value written
+ * - reads can fetch additional locations without side effects
+ * - writes can be repeated (in certain cases) with no side effects
+ * - writes can be merged before accessing the target
+ * - unaligned accesses can be supported
+ *
+ * All device mappings have the following properties:
+ * - no access speculation
+ * - no repetition (eg, on return from an exception)
+ * - number, order and size of accesses are maintained
+ * - unaligned accesses are "unpredictable"
  */
 #define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
 #define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671c..5e5a51a 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void);
 
 extern void fpundefinstr(void);
 
+void mmioset(void *, unsigned int, size_t);
+void mmiocpy(void *, const void *, size_t);
+
 	/* platform dependent support */
 EXPORT_SYMBOL(arm_delay_ops);
 
@@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(__memzero);
 
+EXPORT_SYMBOL(mmioset);
+EXPORT_SYMBOL(mmiocpy);
+
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index fcbbbb1..874e182 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -18,15 +18,6 @@
 
 static int debug_pci;
 
-#ifdef CONFIG_PCI_MSI
-struct msi_controller *pcibios_msi_controller(struct pci_dev *dev)
-{
-	struct pci_sys_data *sysdata = dev->bus->sysdata;
-
-	return sysdata->msi_ctrl;
-}
-#endif
-
 /*
  * We can't use pci_get_device() here since we are
  * called from interrupt context.
@@ -459,12 +450,9 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 
 	for (nr = busnr = 0; nr < hw->nr_controllers; nr++) {
 		sys = kzalloc(sizeof(struct pci_sys_data), GFP_KERNEL);
-		if (!sys)
-			panic("PCI: unable to allocate sys data!");
+		if (WARN(!sys, "PCI: unable to allocate sys data!"))
+			break;
 
-#ifdef CONFIG_PCI_MSI
-		sys->msi_ctrl = hw->msi_ctrl;
-#endif
 		sys->busnr   = busnr;
 		sys->swizzle = hw->swizzle;
 		sys->map_irq = hw->map_irq;
@@ -486,11 +474,14 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 			if (hw->scan)
 				sys->bus = hw->scan(nr, sys);
 			else
-				sys->bus = pci_scan_root_bus(parent, sys->busnr,
-						hw->ops, sys, &sys->resources);
+				sys->bus = pci_scan_root_bus_msi(parent,
+					sys->busnr, hw->ops, sys,
+					&sys->resources, hw->msi_ctrl);
 
-			if (!sys->bus)
-				panic("PCI: unable to scan bus!");
+			if (WARN(!sys->bus, "PCI: unable to scan bus!")) {
+				kfree(sys);
+				break;
+			}
 
 			busnr = sys->bus->busn_res.end + 1;
 
@@ -521,6 +512,8 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
 		struct pci_bus *bus = sys->bus;
 
 		if (!pci_has_flag(PCI_PROBE_ONLY)) {
+			struct pci_bus *child;
+
 			/*
 			 * Size the bridge windows.
 			 */
@@ -530,25 +523,15 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
 			 * Assign resources.
 			 */
 			pci_bus_assign_resources(bus);
-		}
 
+			list_for_each_entry(child, &bus->children, node)
+				pcie_bus_configure_settings(child);
+		}
 		/*
 		 * Tell drivers about devices found.
 		 */
 		pci_bus_add_devices(bus);
 	}
-
-	list_for_each_entry(sys, &head, node) {
-		struct pci_bus *bus = sys->bus;
-
-		/* Configure PCI Express settings */
-		if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
-			struct pci_bus *child;
-
-			list_for_each_entry(child, &bus->children, node)
-				pcie_bus_configure_settings(child);
-		}
-	}
 }
 
 #ifndef CONFIG_PCI_HOST_ITE8152
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7dac308..cb4fb1e 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -410,7 +410,7 @@ ENDPROC(__fiq_abt)
 	zero_fp
 
 	.if	\trace
-#ifdef CONFIG_IRQSOFF_TRACER
+#ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
 	ct_user_exit save = 0
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 92828a1..b48dd4f 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -61,6 +61,7 @@ work_pending:
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
 	b	local_restart			@ ... and off we go
+ENDPROC(ret_fast_syscall)
 
 /*
  * "slow" syscall return path.  "why" tells us if this was a real syscall.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index bd755d9..29e2991 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -399,6 +399,9 @@ ENTRY(secondary_startup)
 	sub	lr, r4, r5			@ mmu has been enabled
 	add	r3, r7, lr
 	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
+ARM_BE8(eor	r4, r4, r5)			@ Swap r5 and r4 in BE:
+ARM_BE8(eor	r5, r4, r5)			@ it can be done in 3 steps
+ARM_BE8(eor	r4, r4, r5)			@ without using a temp reg.
 	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
 	badr	lr, __enable_mmu		@ return address
 	mov	r13, r12			@ __secondary_switched address
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 357f57e..54272e0 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -818,12 +818,13 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
 				break;
 
-		of_node_put(dn);
 		if (cpu >= nr_cpu_ids) {
 			pr_warn("Failed to find logical CPU for %s\n",
 				dn->name);
+			of_node_put(dn);
 			break;
 		}
+		of_node_put(dn);
 
 		irqs[i] = cpu;
 		cpumask_set_cpu(cpu, &pmu->supported_cpus);
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
index 1a4d232..3826935 100644
--- a/arch/arm/kernel/reboot.c
+++ b/arch/arm/kernel/reboot.c
@@ -50,7 +50,7 @@ static void __soft_restart(void *addr)
 	flush_cache_all();
 
 	/* Switch to the identity mapping. */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset);
 	phys_reset((unsigned long)addr);
 
 	/* Should never get here. */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbed..3d6b782 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -578,7 +578,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	if ((unsigned)ipinr < NR_IPI) {
-		trace_ipi_entry(ipi_types[ipinr]);
+		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
 		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
 	}
 
@@ -637,7 +637,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 	}
 
 	if ((unsigned)ipinr < NR_IPI)
-		trace_ipi_exit(ipi_types[ipinr]);
+		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
 	set_irq_regs(old_regs);
 }
 
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index efe17dd..54a5aea 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -296,7 +296,6 @@ static bool tk_is_cntvct(const struct timekeeper *tk)
  */
 void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec xtime_coarse;
 	struct timespec64 *wtm = &tk->wall_to_monotonic;
 
 	if (!cntvct_ok) {
@@ -308,10 +307,10 @@ void update_vsyscall(struct timekeeper *tk)
 
 	vdso_write_begin(vdso_data);
 
-	xtime_coarse = __current_kernel_time();
 	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
-	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
-	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
+	vdso_data->xtime_coarse_nsec		= (u32)(tk->tkr_mono.xtime_nsec >>
+							tk->tkr_mono.shift);
 	vdso_data->wtm_clock_sec		= wtm->tv_sec;
 	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
 
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 7797e81..64111bd 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -61,8 +61,10 @@
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
+ENTRY(mmiocpy)
 ENTRY(memcpy)
 
 #include "copy_template.S"
 
 ENDPROC(memcpy)
+ENDPROC(mmiocpy)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index a4ee97b..3c65e3b 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -16,6 +16,7 @@
 	.text
 	.align	5
 
+ENTRY(mmioset)
 ENTRY(memset)
 UNWIND( .fnstart         )
 	ands	r3, r0, #3		@ 1 unaligned?
@@ -133,3 +134,4 @@ UNWIND( .fnstart            )
 	b	1b
 UNWIND( .fnend   )
 ENDPROC(memset)
+ENDPROC(mmioset)
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 3e58d71..4b39af2 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -96,7 +96,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 	}
 
 	/* the mmap semaphore is taken only if not in an atomic context */
-	atomic = in_atomic();
+	atomic = faulthandler_disabled();
 
 	if (!atomic)
 		down_read(&current->mm->mmap_sem);
diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c
index eaf58f8..685826c 100644
--- a/arch/arm/mach-at91/at91rm9200.c
+++ b/arch/arm/mach-at91/at91rm9200.c
@@ -8,7 +8,6 @@
  * Licensed under GPLv2 or later.
  */
 
-#include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index ddfdd82..29e08aa 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -1010,11 +1010,13 @@ static struct davinci_spi_platform_data da8xx_spi_pdata[] = {
 		.version	= SPI_VERSION_2,
 		.intr_line	= 1,
 		.dma_event_q	= EVENTQ_0,
+		.prescaler_limit = 2,
 	},
 	[1] = {
 		.version	= SPI_VERSION_2,
 		.intr_line	= 1,
 		.dma_event_q	= EVENTQ_0,
+		.prescaler_limit = 2,
 	},
 };
 
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 9cbeda7..567dc56 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -411,6 +411,7 @@ static struct davinci_spi_platform_data dm355_spi0_pdata = {
 	.num_chipselect = 2,
 	.cshold_bug	= true,
 	.dma_event_q	= EVENTQ_1,
+	.prescaler_limit = 1,
 };
 static struct platform_device dm355_spi0_device = {
 	.name = "spi_davinci",
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index e3a3c54..6a890a8 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -646,6 +646,7 @@ static struct davinci_spi_platform_data dm365_spi0_pdata = {
 	.version 	= SPI_VERSION_1,
 	.num_chipselect = 2,
 	.dma_event_q	= EVENTQ_3,
+	.prescaler_limit = 1,
 };
 
 static struct resource dm365_spi0_resources[] = {
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 6001f1c..4a87e86 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -146,9 +146,8 @@ static __init int exynos4_pm_init_power_domain(void)
 		pd->base = of_iomap(np, 0);
 		if (!pd->base) {
 			pr_warn("%s: failed to map memory\n", __func__);
-			kfree(pd->pd.name);
+			kfree_const(pd->pd.name);
 			kfree(pd);
-			of_node_put(np);
 			continue;
 		}
 
diff --git a/arch/arm/mach-hisi/hisilicon.c b/arch/arm/mach-hisi/hisilicon.c
index c6bd7c7..8cc6215 100644
--- a/arch/arm/mach-hisi/hisilicon.c
+++ b/arch/arm/mach-hisi/hisilicon.c
@@ -11,7 +11,6 @@
  * published by the Free Software Foundation.
 */
 
-#include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/irqchip.h>
 
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 80bad29..8c4467f 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -291,8 +291,6 @@ void __init imx_gpc_check_dt(void)
 	}
 }
 
-#ifdef CONFIG_PM_GENERIC_DOMAINS
-
 static void _imx6q_pm_pu_power_off(struct generic_pm_domain *genpd)
 {
 	int iso, iso2sw;
@@ -399,7 +397,6 @@ static struct genpd_onecell_data imx_gpc_onecell_data = {
 static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 {
 	struct clk *clk;
-	bool is_off;
 	int i;
 
 	imx6q_pu_domain.reg = pu_reg;
@@ -416,18 +413,13 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 	}
 	imx6q_pu_domain.num_clks = i;
 
-	is_off = IS_ENABLED(CONFIG_PM);
-	if (is_off) {
-		_imx6q_pm_pu_power_off(&imx6q_pu_domain.base);
-	} else {
-		/*
-		 * Enable power if compiled without CONFIG_PM in case the
-		 * bootloader disabled it.
-		 */
-		imx6q_pm_pu_power_on(&imx6q_pu_domain.base);
-	}
+	/* Enable power always in case bootloader disabled it. */
+	imx6q_pm_pu_power_on(&imx6q_pu_domain.base);
+
+	if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
+		return 0;
 
-	pm_genpd_init(&imx6q_pu_domain.base, NULL, is_off);
+	pm_genpd_init(&imx6q_pu_domain.base, NULL, false);
 	return of_genpd_add_provider_onecell(dev->of_node,
 					     &imx_gpc_onecell_data);
 
@@ -437,13 +429,6 @@ clk_err:
 	return -EINVAL;
 }
 
-#else
-static inline int imx_gpc_genpd_init(struct device *dev, struct regulator *reg)
-{
-	return 0;
-}
-#endif /* CONFIG_PM_GENERIC_DOMAINS */
-
 static int imx_gpc_probe(struct platform_device *pdev)
 {
 	struct regulator *pu_reg;
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index edea697..e283939 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -16,7 +16,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/pm_clock.h>
 #include <linux/platform_device.h>
-#include <linux/clk-provider.h>
 #include <linux/of.h>
 
 static struct dev_pm_domain keystone_pm_domain = {
diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c
index afee908..9f739f3 100644
--- a/arch/arm/mach-mvebu/board-v7.c
+++ b/arch/arm/mach-mvebu/board-v7.c
@@ -14,7 +14,6 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/clk-provider.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index ecc04ff..4a023e8 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -60,6 +60,7 @@ config SOC_AM43XX
 	select ARM_GIC
 	select MACH_OMAP_GENERIC
 	select MIGHT_HAVE_CACHE_L2X0
+	select HAVE_ARM_SCU
 
 config SOC_DRA7XX
 	bool "TI DRA7XX"
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 903c85b..7892c7d 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -12,8 +12,7 @@ obj-y := id.o io.o control.o mux.o devices.o fb.o serial.o timer.o pm.o \
 
 hwmod-common				= omap_hwmod.o omap_hwmod_reset.o \
 					  omap_hwmod_common_data.o
-clock-common				= clock.o clock_common_data.o \
-					  clkt_dpll.o clkt_clksel.o
+clock-common				= clock.o
 secure-common				= omap-smc.o omap-secure.o
 
 obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(hwmod-common)
@@ -182,24 +181,17 @@ obj-$(CONFIG_SOC_DRA7XX)		+= $(clockdomain-common)
 obj-$(CONFIG_SOC_DRA7XX)		+= clockdomains7xx_data.o
 
 # Clock framework
-obj-$(CONFIG_ARCH_OMAP2)		+= $(clock-common) clock2xxx.o
+obj-$(CONFIG_ARCH_OMAP2)		+= $(clock-common)
 obj-$(CONFIG_ARCH_OMAP2)		+= clkt2xxx_dpllcore.o
 obj-$(CONFIG_ARCH_OMAP2)		+= clkt2xxx_virt_prcm_set.o
-obj-$(CONFIG_ARCH_OMAP2)		+= clkt2xxx_dpll.o clkt_iclk.o
-obj-$(CONFIG_SOC_OMAP2430)		+= clock2430.o
-obj-$(CONFIG_ARCH_OMAP3)		+= $(clock-common) clock3xxx.o
-obj-$(CONFIG_ARCH_OMAP3)		+= clock34xx.o clkt34xx_dpll3m2.o
-obj-$(CONFIG_ARCH_OMAP3)		+= clock3517.o clock36xx.o
-obj-$(CONFIG_ARCH_OMAP3)		+= dpll3xxx.o
-obj-$(CONFIG_ARCH_OMAP3)		+= clkt_iclk.o
+obj-$(CONFIG_ARCH_OMAP2)		+= clkt2xxx_dpll.o
+obj-$(CONFIG_ARCH_OMAP3)		+= $(clock-common)
+obj-$(CONFIG_ARCH_OMAP3)		+= clkt34xx_dpll3m2.o
 obj-$(CONFIG_ARCH_OMAP4)		+= $(clock-common)
-obj-$(CONFIG_ARCH_OMAP4)		+= dpll3xxx.o dpll44xx.o
-obj-$(CONFIG_SOC_AM33XX)		+= $(clock-common) dpll3xxx.o
+obj-$(CONFIG_SOC_AM33XX)		+= $(clock-common)
 obj-$(CONFIG_SOC_OMAP5)			+= $(clock-common)
-obj-$(CONFIG_SOC_OMAP5)			+= dpll3xxx.o dpll44xx.o
 obj-$(CONFIG_SOC_DRA7XX)		+= $(clock-common)
-obj-$(CONFIG_SOC_DRA7XX)		+= dpll3xxx.o dpll44xx.o
-obj-$(CONFIG_SOC_AM43XX)		+= $(clock-common) dpll3xxx.o
+obj-$(CONFIG_SOC_AM43XX)		+= $(clock-common)
 
 # OMAP2 clock rate set data (old "OPP" data)
 obj-$(CONFIG_SOC_OMAP2420)		+= opp2420_data.o
diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
index eb69acf..3f65213 100644
--- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
+++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
@@ -23,12 +23,13 @@
 
 #include "clock.h"
 #include "clock3xxx.h"
-#include "clock34xx.h"
 #include "sdrc.h"
 #include "sram.h"
 
 #define CYCLES_PER_MHZ			1000000
 
+struct clk *sdrc_ick_p, *arm_fck_p;
+
 /*
  * CORE DPLL (DPLL3) M2 divider rate programming functions
  *
@@ -60,12 +61,14 @@ int omap3_core_dpll_m2_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (!clk || !rate)
 		return -EINVAL;
 
-	validrate = omap2_clksel_round_rate_div(clk, rate, &new_div);
+	new_div = DIV_ROUND_UP(parent_rate, rate);
+	validrate = parent_rate / new_div;
+
 	if (validrate != rate)
 		return -EINVAL;
 
-	sdrcrate = __clk_get_rate(sdrc_ick_p);
-	clkrate = __clk_get_rate(hw->clk);
+	sdrcrate = clk_get_rate(sdrc_ick_p);
+	clkrate = clk_hw_get_rate(hw);
 	if (rate > clkrate)
 		sdrcrate <<= ((rate / clkrate) >> 1);
 	else
@@ -83,7 +86,7 @@ int omap3_core_dpll_m2_set_rate(struct clk_hw *hw, unsigned long rate,
 	/*
 	 * XXX This only needs to be done when the CPU frequency changes
 	 */
-	_mpurate = __clk_get_rate(arm_fck_p) / CYCLES_PER_MHZ;
+	_mpurate = clk_get_rate(arm_fck_p) / CYCLES_PER_MHZ;
 	c = (_mpurate << SDRC_MPURATE_SCALE) >> SDRC_MPURATE_BASE_SHIFT;
 	c += 1;  /* for safety */
 	c *= SDRC_MPURATE_LOOPS;
diff --git a/arch/arm/mach-omap2/clkt_clksel.c b/arch/arm/mach-omap2/clkt_clksel.c
deleted file mode 100644
index 7ee2610..0000000
--- a/arch/arm/mach-omap2/clkt_clksel.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * clkt_clksel.c - OMAP2/3/4 clksel clock functions
- *
- * Copyright (C) 2005-2008 Texas Instruments, Inc.
- * Copyright (C) 2004-2010 Nokia Corporation
- *
- * Contacts:
- * Richard Woodruff <r-woodruff2@ti.com>
- * Paul Walmsley
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *
- * clksel clocks are clocks that do not have a fixed parent, or that
- * can divide their parent's rate, or possibly both at the same time, based
- * on the contents of a hardware register bitfield.
- *
- * All of the various mux and divider settings can be encoded into
- * struct clksel* data structures, and then these can be autogenerated
- * from some hardware database for each new chip generation.  This
- * should avoid the need to write, review, and validate a lot of new
- * clock code for each new chip, since it can be exported from the SoC
- * design flow.  This is now done on OMAP4.
- *
- * The fusion of mux and divider clocks is a software creation.  In
- * hardware reality, the multiplexer (parent selection) and the
- * divider exist separately.  XXX At some point these clksel clocks
- * should be split into "divider" clocks and "mux" clocks to better
- * match the hardware.
- *
- * (The name "clksel" comes from the name of the corresponding
- * register field in the OMAP2/3 family of SoCs.)
- *
- * XXX Currently these clocks are only used in the OMAP2/3/4 code, but
- * many of the OMAP1 clocks should be convertible to use this
- * mechanism.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/clk-provider.h>
-#include <linux/io.h>
-#include <linux/bug.h>
-
-#include "clock.h"
-
-/* Private functions */
-
-/**
- * _get_clksel_by_parent() - return clksel struct for a given clk & parent
- * @clk: OMAP struct clk ptr to inspect
- * @src_clk: OMAP struct clk ptr of the parent clk to search for
- *
- * Scan the struct clksel array associated with the clock to find
- * the element associated with the supplied parent clock address.
- * Returns a pointer to the struct clksel on success or NULL on error.
- */
-static const struct clksel *_get_clksel_by_parent(struct clk_hw_omap *clk,
-						  struct clk *src_clk)
-{
-	const struct clksel *clks;
-
-	if (!src_clk)
-		return NULL;
-
-	for (clks = clk->clksel; clks->parent; clks++)
-		if (clks->parent == src_clk)
-			break; /* Found the requested parent */
-
-	if (!clks->parent) {
-		/* This indicates a data problem */
-		WARN(1, "clock: %s: could not find parent clock %s in clksel array\n",
-		     __clk_get_name(clk->hw.clk), __clk_get_name(src_clk));
-		return NULL;
-	}
-
-	return clks;
-}
-
-/**
- * _write_clksel_reg() - program a clock's clksel register in hardware
- * @clk: struct clk * to program
- * @v: clksel bitfield value to program (with LSB at bit 0)
- *
- * Shift the clksel register bitfield value @v to its appropriate
- * location in the clksel register and write it in.  This function
- * will ensure that the write to the clksel_reg reaches its
- * destination before returning -- important since PRM and CM register
- * accesses can be quite slow compared to ARM cycles -- but does not
- * take into account any time the hardware might take to switch the
- * clock source.
- */
-static void _write_clksel_reg(struct clk_hw_omap *clk, u32 field_val)
-{
-	u32 v;
-
-	v = omap2_clk_readl(clk, clk->clksel_reg);
-	v &= ~clk->clksel_mask;
-	v |= field_val << __ffs(clk->clksel_mask);
-	omap2_clk_writel(v, clk, clk->clksel_reg);
-
-	v = omap2_clk_readl(clk, clk->clksel_reg); /* OCP barrier */
-}
-
-/**
- * _clksel_to_divisor() - turn clksel field value into integer divider
- * @clk: OMAP struct clk to use
- * @field_val: register field value to find
- *
- * Given a struct clk of a rate-selectable clksel clock, and a register field
- * value to search for, find the corresponding clock divisor.  The register
- * field value should be pre-masked and shifted down so the LSB is at bit 0
- * before calling.  Returns 0 on error or returns the actual integer divisor
- * upon success.
- */
-static u32 _clksel_to_divisor(struct clk_hw_omap *clk, u32 field_val)
-{
-	const struct clksel *clks;
-	const struct clksel_rate *clkr;
-	struct clk *parent;
-
-	parent = __clk_get_parent(clk->hw.clk);
-
-	clks = _get_clksel_by_parent(clk, parent);
-	if (!clks)
-		return 0;
-
-	for (clkr = clks->rates; clkr->div; clkr++) {
-		if (!(clkr->flags & cpu_mask))
-			continue;
-
-		if (clkr->val == field_val)
-			break;
-	}
-
-	if (!clkr->div) {
-		/* This indicates a data error */
-		WARN(1, "clock: %s: could not find fieldval %d for parent %s\n",
-		     __clk_get_name(clk->hw.clk), field_val,
-		     __clk_get_name(parent));
-		return 0;
-	}
-
-	return clkr->div;
-}
-
-/**
- * _divisor_to_clksel() - turn clksel integer divisor into a field value
- * @clk: OMAP struct clk to use
- * @div: integer divisor to search for
- *
- * Given a struct clk of a rate-selectable clksel clock, and a clock
- * divisor, find the corresponding register field value.  Returns the
- * register field value _before_ left-shifting (i.e., LSB is at bit
- * 0); or returns 0xFFFFFFFF (~0) upon error.
- */
-static u32 _divisor_to_clksel(struct clk_hw_omap *clk, u32 div)
-{
-	const struct clksel *clks;
-	const struct clksel_rate *clkr;
-	struct clk *parent;
-
-	/* should never happen */
-	WARN_ON(div == 0);
-
-	parent = __clk_get_parent(clk->hw.clk);
-	clks = _get_clksel_by_parent(clk, parent);
-	if (!clks)
-		return ~0;
-
-	for (clkr = clks->rates; clkr->div; clkr++) {
-		if (!(clkr->flags & cpu_mask))
-			continue;
-
-		if (clkr->div == div)
-			break;
-	}
-
-	if (!clkr->div) {
-		pr_err("clock: %s: could not find divisor %d for parent %s\n",
-		       __clk_get_name(clk->hw.clk), div,
-		       __clk_get_name(parent));
-		return ~0;
-	}
-
-	return clkr->val;
-}
-
-/**
- * _read_divisor() - get current divisor applied to parent clock (from hdwr)
- * @clk: OMAP struct clk to use.
- *
- * Read the current divisor register value for @clk that is programmed
- * into the hardware, convert it into the actual divisor value, and
- * return it; or return 0 on error.
- */
-static u32 _read_divisor(struct clk_hw_omap *clk)
-{
-	u32 v;
-
-	if (!clk->clksel || !clk->clksel_mask)
-		return 0;
-
-	v = omap2_clk_readl(clk, clk->clksel_reg);
-	v &= clk->clksel_mask;
-	v >>= __ffs(clk->clksel_mask);
-
-	return _clksel_to_divisor(clk, v);
-}
-
-/* Public functions */
-
-/**
- * omap2_clksel_round_rate_div() - find divisor for the given clock and rate
- * @clk: OMAP struct clk to use
- * @target_rate: desired clock rate
- * @new_div: ptr to where we should store the divisor
- *
- * Finds 'best' divider value in an array based on the source and target
- * rates.  The divider array must be sorted with smallest divider first.
- * This function is also used by the DPLL3 M2 divider code.
- *
- * Returns the rounded clock rate or returns 0xffffffff on error.
- */
-u32 omap2_clksel_round_rate_div(struct clk_hw_omap *clk,
-						 unsigned long target_rate,
-				u32 *new_div)
-{
-	unsigned long test_rate;
-	const struct clksel *clks;
-	const struct clksel_rate *clkr;
-	u32 last_div = 0;
-	struct clk *parent;
-	unsigned long parent_rate;
-	const char *clk_name;
-
-	parent = __clk_get_parent(clk->hw.clk);
-	clk_name = __clk_get_name(clk->hw.clk);
-	parent_rate = __clk_get_rate(parent);
-
-	if (!clk->clksel || !clk->clksel_mask)
-		return ~0;
-
-	pr_debug("clock: clksel_round_rate_div: %s target_rate %ld\n",
-		 clk_name, target_rate);
-
-	*new_div = 1;
-
-	clks = _get_clksel_by_parent(clk, parent);
-	if (!clks)
-		return ~0;
-
-	for (clkr = clks->rates; clkr->div; clkr++) {
-		if (!(clkr->flags & cpu_mask))
-			continue;
-
-		/* Sanity check */
-		if (clkr->div <= last_div)
-			pr_err("clock: %s: clksel_rate table not sorted\n",
-			       clk_name);
-
-		last_div = clkr->div;
-
-		test_rate = parent_rate / clkr->div;
-
-		if (test_rate <= target_rate)
-			break; /* found it */
-	}
-
-	if (!clkr->div) {
-		pr_err("clock: %s: could not find divisor for target rate %ld for parent %s\n",
-		       clk_name, target_rate, __clk_get_name(parent));
-		return ~0;
-	}
-
-	*new_div = clkr->div;
-
-	pr_debug("clock: new_div = %d, new_rate = %ld\n", *new_div,
-		 (parent_rate / clkr->div));
-
-	return parent_rate / clkr->div;
-}
-
-/*
- * Clocktype interface functions to the OMAP clock code
- * (i.e., those used in struct clk field function pointers, etc.)
- */
-
-/**
- * omap2_clksel_find_parent_index() - return the array index of the current
- * hardware parent of @hw
- * @hw: struct clk_hw * to find the current hardware parent of
- *
- * Given a struct clk_hw pointer @hw to the 'hw' member of a struct
- * clk_hw_omap record representing a source-selectable hardware clock,
- * read the hardware register and determine what its parent is
- * currently set to.  Intended to be called only by the common clock
- * framework struct clk_hw_ops.get_parent function pointer.  Return
- * the array index of this parent clock upon success -- there is no
- * way to return an error, so if we encounter an error, just WARN()
- * and pretend that we know that we're doing.
- */
-u8 omap2_clksel_find_parent_index(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	const struct clksel *clks;
-	const struct clksel_rate *clkr;
-	u32 r, found = 0;
-	struct clk *parent;
-	const char *clk_name;
-	int ret = 0, f = 0;
-
-	parent = __clk_get_parent(hw->clk);
-	clk_name = __clk_get_name(hw->clk);
-
-	/* XXX should be able to return an error */
-	WARN((!clk->clksel || !clk->clksel_mask),
-	     "clock: %s: attempt to call on a non-clksel clock", clk_name);
-
-	r = omap2_clk_readl(clk, clk->clksel_reg) & clk->clksel_mask;
-	r >>= __ffs(clk->clksel_mask);
-
-	for (clks = clk->clksel; clks->parent && !found; clks++) {
-		for (clkr = clks->rates; clkr->div && !found; clkr++) {
-			if (!(clkr->flags & cpu_mask))
-				continue;
-
-			if (clkr->val == r) {
-				found = 1;
-				ret = f;
-			}
-		}
-		f++;
-	}
-
-	/* This indicates a data error */
-	WARN(!found, "clock: %s: init parent: could not find regval %0x\n",
-	     clk_name, r);
-
-	return ret;
-}
-
-
-/**
- * omap2_clksel_recalc() - function ptr to pass via struct clk .recalc field
- * @clk: struct clk *
- *
- * This function is intended to be called only by the clock framework.
- * Each clksel clock should have its struct clk .recalc field set to this
- * function.  Returns the clock's current rate, based on its parent's rate
- * and its current divisor setting in the hardware.
- */
-unsigned long omap2_clksel_recalc(struct clk_hw *hw, unsigned long parent_rate)
-{
-	unsigned long rate;
-	u32 div = 0;
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-
-	if (!parent_rate)
-		return 0;
-
-	div = _read_divisor(clk);
-	if (!div)
-		rate = parent_rate;
-	else
-		rate = parent_rate / div;
-
-	pr_debug("%s: recalc'd %s's rate to %lu (div %d)\n", __func__,
-		 __clk_get_name(hw->clk), rate, div);
-
-	return rate;
-}
-
-/**
- * omap2_clksel_round_rate() - find rounded rate for the given clock and rate
- * @clk: OMAP struct clk to use
- * @target_rate: desired clock rate
- *
- * This function is intended to be called only by the clock framework.
- * Finds best target rate based on the source clock and possible dividers.
- * rates. The divider array must be sorted with smallest divider first.
- *
- * Returns the rounded clock rate or returns 0xffffffff on error.
- */
-long omap2_clksel_round_rate(struct clk_hw *hw, unsigned long target_rate,
-			unsigned long *parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	u32 new_div;
-
-	return omap2_clksel_round_rate_div(clk, target_rate, &new_div);
-}
-
-/**
- * omap2_clksel_set_rate() - program clock rate in hardware
- * @clk: struct clk * to program rate
- * @rate: target rate to program
- *
- * This function is intended to be called only by the clock framework.
- * Program @clk's rate to @rate in the hardware.  The clock can be
- * either enabled or disabled when this happens, although if the clock
- * is enabled, some downstream devices may glitch or behave
- * unpredictably when the clock rate is changed - this depends on the
- * hardware. This function does not currently check the usecount of
- * the clock, so if multiple drivers are using the clock, and the rate
- * is changed, they will all be affected without any notification.
- * Returns -EINVAL upon error, or 0 upon success.
- */
-int omap2_clksel_set_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	u32 field_val, validrate, new_div = 0;
-
-	if (!clk->clksel || !clk->clksel_mask)
-		return -EINVAL;
-
-	validrate = omap2_clksel_round_rate_div(clk, rate, &new_div);
-	if (validrate != rate)
-		return -EINVAL;
-
-	field_val = _divisor_to_clksel(clk, new_div);
-	if (field_val == ~0)
-		return -EINVAL;
-
-	_write_clksel_reg(clk, field_val);
-
-	pr_debug("clock: %s: set rate to %ld\n", __clk_get_name(hw->clk),
-		 __clk_get_rate(hw->clk));
-
-	return 0;
-}
-
-/*
- * Clksel parent setting function - not passed in struct clk function
- * pointer - instead, the OMAP clock code currently assumes that any
- * parent-setting clock is a clksel clock, and calls
- * omap2_clksel_set_parent() by default
- */
-
-/**
- * omap2_clksel_set_parent() - change a clock's parent clock
- * @clk: struct clk * of the child clock
- * @new_parent: struct clk * of the new parent clock
- *
- * This function is intended to be called only by the clock framework.
- * Change the parent clock of clock @clk to @new_parent.  This is
- * intended to be used while @clk is disabled.  This function does not
- * currently check the usecount of the clock, so if multiple drivers
- * are using the clock, and the parent is changed, they will all be
- * affected without any notification.  Returns -EINVAL upon error, or
- * 0 upon success.
- */
-int omap2_clksel_set_parent(struct clk_hw *hw, u8 field_val)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-
-	if (!clk->clksel || !clk->clksel_mask)
-		return -EINVAL;
-
-	_write_clksel_reg(clk, field_val);
-	return 0;
-}
diff --git a/arch/arm/mach-omap2/clkt_dpll.c b/arch/arm/mach-omap2/clkt_dpll.c
deleted file mode 100644
index f251a14..0000000
--- a/arch/arm/mach-omap2/clkt_dpll.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * OMAP2/3/4 DPLL clock functions
- *
- * Copyright (C) 2005-2008 Texas Instruments, Inc.
- * Copyright (C) 2004-2010 Nokia Corporation
- *
- * Contacts:
- * Richard Woodruff <r-woodruff2@ti.com>
- * Paul Walmsley
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/clk-provider.h>
-#include <linux/io.h>
-
-#include <asm/div64.h>
-
-#include "clock.h"
-
-/* DPLL rate rounding: minimum DPLL multiplier, divider values */
-#define DPLL_MIN_MULTIPLIER		2
-#define DPLL_MIN_DIVIDER		1
-
-/* Possible error results from _dpll_test_mult */
-#define DPLL_MULT_UNDERFLOW		-1
-
-/*
- * Scale factor to mitigate roundoff errors in DPLL rate rounding.
- * The higher the scale factor, the greater the risk of arithmetic overflow,
- * but the closer the rounded rate to the target rate.  DPLL_SCALE_FACTOR
- * must be a power of DPLL_SCALE_BASE.
- */
-#define DPLL_SCALE_FACTOR		64
-#define DPLL_SCALE_BASE			2
-#define DPLL_ROUNDING_VAL		((DPLL_SCALE_BASE / 2) * \
-					 (DPLL_SCALE_FACTOR / DPLL_SCALE_BASE))
-
-/*
- * DPLL valid Fint frequency range for OMAP36xx and OMAP4xxx.
- * From device data manual section 4.3 "DPLL and DLL Specifications".
- */
-#define OMAP3PLUS_DPLL_FINT_JTYPE_MIN	500000
-#define OMAP3PLUS_DPLL_FINT_JTYPE_MAX	2500000
-
-/* _dpll_test_fint() return codes */
-#define DPLL_FINT_UNDERFLOW		-1
-#define DPLL_FINT_INVALID		-2
-
-/* Private functions */
-
-/*
- * _dpll_test_fint - test whether an Fint value is valid for the DPLL
- * @clk: DPLL struct clk to test
- * @n: divider value (N) to test
- *
- * Tests whether a particular divider @n will result in a valid DPLL
- * internal clock frequency Fint. See the 34xx TRM 4.7.6.2 "DPLL Jitter
- * Correction".  Returns 0 if OK, -1 if the enclosing loop can terminate
- * (assuming that it is counting N upwards), or -2 if the enclosing loop
- * should skip to the next iteration (again assuming N is increasing).
- */
-static int _dpll_test_fint(struct clk_hw_omap *clk, unsigned int n)
-{
-	struct dpll_data *dd;
-	long fint, fint_min, fint_max;
-	int ret = 0;
-
-	dd = clk->dpll_data;
-
-	/* DPLL divider must result in a valid jitter correction val */
-	fint = __clk_get_rate(__clk_get_parent(clk->hw.clk)) / n;
-
-	if (dd->flags & DPLL_J_TYPE) {
-		fint_min = OMAP3PLUS_DPLL_FINT_JTYPE_MIN;
-		fint_max = OMAP3PLUS_DPLL_FINT_JTYPE_MAX;
-	} else {
-		fint_min = ti_clk_features.fint_min;
-		fint_max = ti_clk_features.fint_max;
-	}
-
-	if (!fint_min || !fint_max) {
-		WARN(1, "No fint limits available!\n");
-		return DPLL_FINT_INVALID;
-	}
-
-	if (fint < ti_clk_features.fint_min) {
-		pr_debug("rejecting n=%d due to Fint failure, lowering max_divider\n",
-			 n);
-		dd->max_divider = n;
-		ret = DPLL_FINT_UNDERFLOW;
-	} else if (fint > ti_clk_features.fint_max) {
-		pr_debug("rejecting n=%d due to Fint failure, boosting min_divider\n",
-			 n);
-		dd->min_divider = n;
-		ret = DPLL_FINT_INVALID;
-	} else if (fint > ti_clk_features.fint_band1_max &&
-		   fint < ti_clk_features.fint_band2_min) {
-		pr_debug("rejecting n=%d due to Fint failure\n", n);
-		ret = DPLL_FINT_INVALID;
-	}
-
-	return ret;
-}
-
-static unsigned long _dpll_compute_new_rate(unsigned long parent_rate,
-					    unsigned int m, unsigned int n)
-{
-	unsigned long long num;
-
-	num = (unsigned long long)parent_rate * m;
-	do_div(num, n);
-	return num;
-}
-
-/*
- * _dpll_test_mult - test a DPLL multiplier value
- * @m: pointer to the DPLL m (multiplier) value under test
- * @n: current DPLL n (divider) value under test
- * @new_rate: pointer to storage for the resulting rounded rate
- * @target_rate: the desired DPLL rate
- * @parent_rate: the DPLL's parent clock rate
- *
- * This code tests a DPLL multiplier value, ensuring that the
- * resulting rate will not be higher than the target_rate, and that
- * the multiplier value itself is valid for the DPLL.  Initially, the
- * integer pointed to by the m argument should be prescaled by
- * multiplying by DPLL_SCALE_FACTOR.  The code will replace this with
- * a non-scaled m upon return.  This non-scaled m will result in a
- * new_rate as close as possible to target_rate (but not greater than
- * target_rate) given the current (parent_rate, n, prescaled m)
- * triple. Returns DPLL_MULT_UNDERFLOW in the event that the
- * non-scaled m attempted to underflow, which can allow the calling
- * function to bail out early; or 0 upon success.
- */
-static int _dpll_test_mult(int *m, int n, unsigned long *new_rate,
-			   unsigned long target_rate,
-			   unsigned long parent_rate)
-{
-	int r = 0, carry = 0;
-
-	/* Unscale m and round if necessary */
-	if (*m % DPLL_SCALE_FACTOR >= DPLL_ROUNDING_VAL)
-		carry = 1;
-	*m = (*m / DPLL_SCALE_FACTOR) + carry;
-
-	/*
-	 * The new rate must be <= the target rate to avoid programming
-	 * a rate that is impossible for the hardware to handle
-	 */
-	*new_rate = _dpll_compute_new_rate(parent_rate, *m, n);
-	if (*new_rate > target_rate) {
-		(*m)--;
-		*new_rate = 0;
-	}
-
-	/* Guard against m underflow */
-	if (*m < DPLL_MIN_MULTIPLIER) {
-		*m = DPLL_MIN_MULTIPLIER;
-		*new_rate = 0;
-		r = DPLL_MULT_UNDERFLOW;
-	}
-
-	if (*new_rate == 0)
-		*new_rate = _dpll_compute_new_rate(parent_rate, *m, n);
-
-	return r;
-}
-
-/**
- * _omap2_dpll_is_in_bypass - check if DPLL is in bypass mode or not
- * @v: bitfield value of the DPLL enable
- *
- * Checks given DPLL enable bitfield to see whether the DPLL is in bypass
- * mode or not. Returns 1 if the DPLL is in bypass, 0 otherwise.
- */
-static int _omap2_dpll_is_in_bypass(u32 v)
-{
-	u8 mask, val;
-
-	mask = ti_clk_features.dpll_bypass_vals;
-
-	/*
-	 * Each set bit in the mask corresponds to a bypass value equal
-	 * to the bitshift. Go through each set-bit in the mask and
-	 * compare against the given register value.
-	 */
-	while (mask) {
-		val = __ffs(mask);
-		mask ^= (1 << val);
-		if (v == val)
-			return 1;
-	}
-
-	return 0;
-}
-
-/* Public functions */
-u8 omap2_init_dpll_parent(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	u32 v;
-	struct dpll_data *dd;
-
-	dd = clk->dpll_data;
-	if (!dd)
-		return -EINVAL;
-
-	v = omap2_clk_readl(clk, dd->control_reg);
-	v &= dd->enable_mask;
-	v >>= __ffs(dd->enable_mask);
-
-	/* Reparent the struct clk in case the dpll is in bypass */
-	if (_omap2_dpll_is_in_bypass(v))
-		return 1;
-
-	return 0;
-}
-
-/**
- * omap2_get_dpll_rate - returns the current DPLL CLKOUT rate
- * @clk: struct clk * of a DPLL
- *
- * DPLLs can be locked or bypassed - basically, enabled or disabled.
- * When locked, the DPLL output depends on the M and N values.  When
- * bypassed, on OMAP2xxx, the output rate is either the 32KiHz clock
- * or sys_clk.  Bypass rates on OMAP3 depend on the DPLL: DPLLs 1 and
- * 2 are bypassed with dpll1_fclk and dpll2_fclk respectively
- * (generated by DPLL3), while DPLL 3, 4, and 5 bypass rates are sys_clk.
- * Returns the current DPLL CLKOUT rate (*not* CLKOUTX2) if the DPLL is
- * locked, or the appropriate bypass rate if the DPLL is bypassed, or 0
- * if the clock @clk is not a DPLL.
- */
-unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
-{
-	long long dpll_clk;
-	u32 dpll_mult, dpll_div, v;
-	struct dpll_data *dd;
-
-	dd = clk->dpll_data;
-	if (!dd)
-		return 0;
-
-	/* Return bypass rate if DPLL is bypassed */
-	v = omap2_clk_readl(clk, dd->control_reg);
-	v &= dd->enable_mask;
-	v >>= __ffs(dd->enable_mask);
-
-	if (_omap2_dpll_is_in_bypass(v))
-		return __clk_get_rate(dd->clk_bypass);
-
-	v = omap2_clk_readl(clk, dd->mult_div1_reg);
-	dpll_mult = v & dd->mult_mask;
-	dpll_mult >>= __ffs(dd->mult_mask);
-	dpll_div = v & dd->div1_mask;
-	dpll_div >>= __ffs(dd->div1_mask);
-
-	dpll_clk = (long long) __clk_get_rate(dd->clk_ref) * dpll_mult;
-	do_div(dpll_clk, dpll_div + 1);
-
-	return dpll_clk;
-}
-
-/* DPLL rate rounding code */
-
-/**
- * omap2_dpll_round_rate - round a target rate for an OMAP DPLL
- * @clk: struct clk * for a DPLL
- * @target_rate: desired DPLL clock rate
- *
- * Given a DPLL and a desired target rate, round the target rate to a
- * possible, programmable rate for this DPLL.  Attempts to select the
- * minimum possible n.  Stores the computed (m, n) in the DPLL's
- * dpll_data structure so set_rate() will not need to call this
- * (expensive) function again.  Returns ~0 if the target rate cannot
- * be rounded, or the rounded rate upon success.
- */
-long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate,
-		unsigned long *parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	int m, n, r, scaled_max_m;
-	int min_delta_m = INT_MAX, min_delta_n = INT_MAX;
-	unsigned long scaled_rt_rp;
-	unsigned long new_rate = 0;
-	struct dpll_data *dd;
-	unsigned long ref_rate;
-	long delta;
-	long prev_min_delta = LONG_MAX;
-	const char *clk_name;
-
-	if (!clk || !clk->dpll_data)
-		return ~0;
-
-	dd = clk->dpll_data;
-
-	ref_rate = __clk_get_rate(dd->clk_ref);
-	clk_name = __clk_get_name(hw->clk);
-	pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n",
-		 clk_name, target_rate);
-
-	scaled_rt_rp = target_rate / (ref_rate / DPLL_SCALE_FACTOR);
-	scaled_max_m = dd->max_multiplier * DPLL_SCALE_FACTOR;
-
-	dd->last_rounded_rate = 0;
-
-	for (n = dd->min_divider; n <= dd->max_divider; n++) {
-
-		/* Is the (input clk, divider) pair valid for the DPLL? */
-		r = _dpll_test_fint(clk, n);
-		if (r == DPLL_FINT_UNDERFLOW)
-			break;
-		else if (r == DPLL_FINT_INVALID)
-			continue;
-
-		/* Compute the scaled DPLL multiplier, based on the divider */
-		m = scaled_rt_rp * n;
-
-		/*
-		 * Since we're counting n up, a m overflow means we
-		 * can bail out completely (since as n increases in
-		 * the next iteration, there's no way that m can
-		 * increase beyond the current m)
-		 */
-		if (m > scaled_max_m)
-			break;
-
-		r = _dpll_test_mult(&m, n, &new_rate, target_rate,
-				    ref_rate);
-
-		/* m can't be set low enough for this n - try with a larger n */
-		if (r == DPLL_MULT_UNDERFLOW)
-			continue;
-
-		/* skip rates above our target rate */
-		delta = target_rate - new_rate;
-		if (delta < 0)
-			continue;
-
-		if (delta < prev_min_delta) {
-			prev_min_delta = delta;
-			min_delta_m = m;
-			min_delta_n = n;
-		}
-
-		pr_debug("clock: %s: m = %d: n = %d: new_rate = %lu\n",
-			 clk_name, m, n, new_rate);
-
-		if (delta == 0)
-			break;
-	}
-
-	if (prev_min_delta == LONG_MAX) {
-		pr_debug("clock: %s: cannot round to rate %lu\n",
-			 clk_name, target_rate);
-		return ~0;
-	}
-
-	dd->last_rounded_m = min_delta_m;
-	dd->last_rounded_n = min_delta_n;
-	dd->last_rounded_rate = target_rate - prev_min_delta;
-
-	return dd->last_rounded_rate;
-}
-
diff --git a/arch/arm/mach-omap2/clkt_iclk.c b/arch/arm/mach-omap2/clkt_iclk.c
deleted file mode 100644
index 55eb579..0000000
--- a/arch/arm/mach-omap2/clkt_iclk.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * OMAP2/3 interface clock control
- *
- * Copyright (C) 2011 Nokia Corporation
- * Paul Walmsley
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/clk-provider.h>
-#include <linux/io.h>
-
-#include "clock.h"
-
-/* Register offsets */
-#define CM_AUTOIDLE			0x30
-#define CM_ICLKEN			0x10
-
-/* Private functions */
-
-/* XXX */
-void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk)
-{
-	u32 v;
-	void __iomem *r;
-
-	r = (__force void __iomem *)
-		((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN));
-
-	v = omap2_clk_readl(clk, r);
-	v |= (1 << clk->enable_bit);
-	omap2_clk_writel(v, clk, r);
-}
-
-/* XXX */
-void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk)
-{
-	u32 v;
-	void __iomem *r;
-
-	r = (__force void __iomem *)
-		((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN));
-
-	v = omap2_clk_readl(clk, r);
-	v &= ~(1 << clk->enable_bit);
-	omap2_clk_writel(v, clk, r);
-}
-
-/* Public data */
-
-const struct clk_hw_omap_ops clkhwops_iclk = {
-	.allow_idle	= omap2_clkt_iclk_allow_idle,
-	.deny_idle	= omap2_clkt_iclk_deny_idle,
-};
-
-const struct clk_hw_omap_ops clkhwops_iclk_wait = {
-	.allow_idle	= omap2_clkt_iclk_allow_idle,
-	.deny_idle	= omap2_clkt_iclk_deny_idle,
-	.find_idlest	= omap2_clk_dflt_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
-
-
-
diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index a699d71..acb60ed 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -20,12 +20,11 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/delay.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/io.h>
 #include <linux/bitops.h>
-#include <linux/regmap.h>
 #include <linux/of_address.h>
-#include <linux/bootmem.h>
 #include <asm/cpu.h>
 
 #include <trace/events/power.h>
@@ -40,19 +39,8 @@
 #include "cm-regbits-34xx.h"
 #include "common.h"
 
-/*
- * MAX_MODULE_ENABLE_WAIT: maximum of number of microseconds to wait
- * for a module to indicate that it is no longer in idle
- */
-#define MAX_MODULE_ENABLE_WAIT		100000
-
 u16 cpu_mask;
 
-/*
- * Clock features setup. Used instead of CPU type checks.
- */
-struct ti_clk_features ti_clk_features;
-
 /* DPLL valid Fint frequency band limits - from 34xx TRM Section 4.7.6.2 */
 #define OMAP3430_DPLL_FINT_BAND1_MIN	750000
 #define OMAP3430_DPLL_FINT_BAND1_MAX	2100000
@@ -66,119 +54,24 @@ struct ti_clk_features ti_clk_features;
 #define OMAP3PLUS_DPLL_FINT_MIN		32000
 #define OMAP3PLUS_DPLL_FINT_MAX		52000000
 
-/*
- * clkdm_control: if true, then when a clock is enabled in the
- * hardware, its clockdomain will first be enabled; and when a clock
- * is disabled in the hardware, its clockdomain will be disabled
- * afterwards.
- */
-static bool clkdm_control = true;
-
-static LIST_HEAD(clk_hw_omap_clocks);
-
-struct clk_iomap {
-	struct regmap *regmap;
-	void __iomem *mem;
-};
-
-static struct clk_iomap *clk_memmaps[CLK_MAX_MEMMAPS];
-
-static void clk_memmap_writel(u32 val, void __iomem *reg)
-{
-	struct clk_omap_reg *r = (struct clk_omap_reg *)&reg;
-	struct clk_iomap *io = clk_memmaps[r->index];
-
-	if (io->regmap)
-		regmap_write(io->regmap, r->offset, val);
-	else
-		writel_relaxed(val, io->mem + r->offset);
-}
-
-static u32 clk_memmap_readl(void __iomem *reg)
-{
-	u32 val;
-	struct clk_omap_reg *r = (struct clk_omap_reg *)&reg;
-	struct clk_iomap *io = clk_memmaps[r->index];
-
-	if (io->regmap)
-		regmap_read(io->regmap, r->offset, &val);
-	else
-		val = readl_relaxed(io->mem + r->offset);
-
-	return val;
-}
-
-void omap2_clk_writel(u32 val, struct clk_hw_omap *clk, void __iomem *reg)
-{
-	if (WARN_ON_ONCE(!(clk->flags & MEMMAP_ADDRESSING)))
-		writel_relaxed(val, reg);
-	else
-		clk_memmap_writel(val, reg);
-}
-
-u32 omap2_clk_readl(struct clk_hw_omap *clk, void __iomem *reg)
-{
-	if (WARN_ON_ONCE(!(clk->flags & MEMMAP_ADDRESSING)))
-		return readl_relaxed(reg);
-	else
-		return clk_memmap_readl(reg);
-}
-
 static struct ti_clk_ll_ops omap_clk_ll_ops = {
-	.clk_readl = clk_memmap_readl,
-	.clk_writel = clk_memmap_writel,
+	.clkdm_clk_enable = clkdm_clk_enable,
+	.clkdm_clk_disable = clkdm_clk_disable,
+	.cm_wait_module_ready = omap_cm_wait_module_ready,
+	.cm_split_idlest_reg = cm_split_idlest_reg,
 };
 
 /**
- * omap2_clk_provider_init - initialize a clock provider
- * @match_table: DT device table to match for devices to init
- * @np: device node pointer for the this clock provider
- * @index: index for the clock provider
- + @syscon: syscon regmap pointer
- * @mem: iomem pointer for the clock provider memory area, only used if
- *	 syscon is not provided
+ * omap2_clk_setup_ll_ops - setup clock driver low-level ops
  *
- * Initializes a clock provider module (CM/PRM etc.), registering
- * the memory mapping at specified index and initializing the
- * low level driver infrastructure. Returns 0 in success.
+ * Sets up clock driver low-level platform ops. These are needed
+ * for register accesses and various other misc platform operations.
+ * Returns 0 on success, -EBUSY if low level ops have been registered
+ * already.
  */
-int __init omap2_clk_provider_init(struct device_node *np, int index,
-				   struct regmap *syscon, void __iomem *mem)
+int __init omap2_clk_setup_ll_ops(void)
 {
-	struct clk_iomap *io;
-
-	ti_clk_ll_ops = &omap_clk_ll_ops;
-
-	io = kzalloc(sizeof(*io), GFP_KERNEL);
-
-	io->regmap = syscon;
-	io->mem = mem;
-
-	clk_memmaps[index] = io;
-
-	ti_dt_clk_init_provider(np, index);
-
-	return 0;
-}
-
-/**
- * omap2_clk_legacy_provider_init - initialize a legacy clock provider
- * @index: index for the clock provider
- * @mem: iomem pointer for the clock provider memory area
- *
- * Initializes a legacy clock provider memory mapping.
- */
-void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem)
-{
-	struct clk_iomap *io;
-
-	ti_clk_ll_ops = &omap_clk_ll_ops;
-
-	io = memblock_virt_alloc(sizeof(*io), 0);
-
-	io->mem = mem;
-
-	clk_memmaps[index] = io;
+	return ti_clk_setup_ll_ops(&omap_clk_ll_ops);
 }
 
 /*
@@ -187,77 +80,6 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem)
 
 /* Private functions */
 
-
-/**
- * _wait_idlest_generic - wait for a module to leave the idle state
- * @clk: module clock to wait for (needed for register offsets)
- * @reg: virtual address of module IDLEST register
- * @mask: value to mask against to determine if the module is active
- * @idlest: idle state indicator (0 or 1) for the clock
- * @name: name of the clock (for printk)
- *
- * Wait for a module to leave idle, where its idle-status register is
- * not inside the CM module.  Returns 1 if the module left idle
- * promptly, or 0 if the module did not leave idle before the timeout
- * elapsed.  XXX Deprecated - should be moved into drivers for the
- * individual IP block that the IDLEST register exists in.
- */
-static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg,
-				u32 mask, u8 idlest, const char *name)
-{
-	int i = 0, ena = 0;
-
-	ena = (idlest) ? 0 : mask;
-
-	omap_test_timeout(((omap2_clk_readl(clk, reg) & mask) == ena),
-			  MAX_MODULE_ENABLE_WAIT, i);
-
-	if (i < MAX_MODULE_ENABLE_WAIT)
-		pr_debug("omap clock: module associated with clock %s ready after %d loops\n",
-			 name, i);
-	else
-		pr_err("omap clock: module associated with clock %s didn't enable in %d tries\n",
-		       name, MAX_MODULE_ENABLE_WAIT);
-
-	return (i < MAX_MODULE_ENABLE_WAIT) ? 1 : 0;
-};
-
-/**
- * _omap2_module_wait_ready - wait for an OMAP module to leave IDLE
- * @clk: struct clk * belonging to the module
- *
- * If the necessary clocks for the OMAP hardware IP block that
- * corresponds to clock @clk are enabled, then wait for the module to
- * indicate readiness (i.e., to leave IDLE).  This code does not
- * belong in the clock code and will be moved in the medium term to
- * module-dependent code.  No return value.
- */
-static void _omap2_module_wait_ready(struct clk_hw_omap *clk)
-{
-	void __iomem *companion_reg, *idlest_reg;
-	u8 other_bit, idlest_bit, idlest_val, idlest_reg_id;
-	s16 prcm_mod;
-	int r;
-
-	/* Not all modules have multiple clocks that their IDLEST depends on */
-	if (clk->ops->find_companion) {
-		clk->ops->find_companion(clk, &companion_reg, &other_bit);
-		if (!(omap2_clk_readl(clk, companion_reg) & (1 << other_bit)))
-			return;
-	}
-
-	clk->ops->find_idlest(clk, &idlest_reg, &idlest_bit, &idlest_val);
-	r = cm_split_idlest_reg(idlest_reg, &prcm_mod, &idlest_reg_id);
-	if (r) {
-		/* IDLEST register not in the CM module */
-		_wait_idlest_generic(clk, idlest_reg, (1 << idlest_bit),
-				     idlest_val, __clk_get_name(clk->hw.clk));
-	} else {
-		omap_cm_wait_module_ready(0, prcm_mod, idlest_reg_id,
-					  idlest_bit);
-	};
-}
-
 /* Public functions */
 
 /**
@@ -290,279 +112,6 @@ void omap2_init_clk_clkdm(struct clk_hw *hw)
 	}
 }
 
-/**
- * omap2_clk_disable_clkdm_control - disable clkdm control on clk enable/disable
- *
- * Prevent the OMAP clock code from calling into the clockdomain code
- * when a hardware clock in that clockdomain is enabled or disabled.
- * Intended to be called at init time from omap*_clk_init().  No
- * return value.
- */
-void __init omap2_clk_disable_clkdm_control(void)
-{
-	clkdm_control = false;
-}
-
-/**
- * omap2_clk_dflt_find_companion - find companion clock to @clk
- * @clk: struct clk * to find the companion clock of
- * @other_reg: void __iomem ** to return the companion clock CM_*CLKEN va in
- * @other_bit: u8 ** to return the companion clock bit shift in
- *
- * Note: We don't need special code here for INVERT_ENABLE for the
- * time being since INVERT_ENABLE only applies to clocks enabled by
- * CM_CLKEN_PLL
- *
- * Convert CM_ICLKEN* <-> CM_FCLKEN*.  This conversion assumes it's
- * just a matter of XORing the bits.
- *
- * Some clocks don't have companion clocks.  For example, modules with
- * only an interface clock (such as MAILBOXES) don't have a companion
- * clock.  Right now, this code relies on the hardware exporting a bit
- * in the correct companion register that indicates that the
- * nonexistent 'companion clock' is active.  Future patches will
- * associate this type of code with per-module data structures to
- * avoid this issue, and remove the casts.  No return value.
- */
-void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
-			void __iomem **other_reg, u8 *other_bit)
-{
-	u32 r;
-
-	/*
-	 * Convert CM_ICLKEN* <-> CM_FCLKEN*.  This conversion assumes
-	 * it's just a matter of XORing the bits.
-	 */
-	r = ((__force u32)clk->enable_reg ^ (CM_FCLKEN ^ CM_ICLKEN));
-
-	*other_reg = (__force void __iomem *)r;
-	*other_bit = clk->enable_bit;
-}
-
-/**
- * omap2_clk_dflt_find_idlest - find CM_IDLEST reg va, bit shift for @clk
- * @clk: struct clk * to find IDLEST info for
- * @idlest_reg: void __iomem ** to return the CM_IDLEST va in
- * @idlest_bit: u8 * to return the CM_IDLEST bit shift in
- * @idlest_val: u8 * to return the idle status indicator
- *
- * Return the CM_IDLEST register address and bit shift corresponding
- * to the module that "owns" this clock.  This default code assumes
- * that the CM_IDLEST bit shift is the CM_*CLKEN bit shift, and that
- * the IDLEST register address ID corresponds to the CM_*CLKEN
- * register address ID (e.g., that CM_FCLKEN2 corresponds to
- * CM_IDLEST2).  This is not true for all modules.  No return value.
- */
-void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk,
-		void __iomem **idlest_reg, u8 *idlest_bit, u8 *idlest_val)
-{
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
-	*idlest_bit = clk->enable_bit;
-
-	/*
-	 * 24xx uses 0 to indicate not ready, and 1 to indicate ready.
-	 * 34xx reverses this, just to keep us on our toes
-	 * AM35xx uses both, depending on the module.
-	 */
-	*idlest_val = ti_clk_features.cm_idlest_val;
-}
-
-/**
- * omap2_dflt_clk_enable - enable a clock in the hardware
- * @hw: struct clk_hw * of the clock to enable
- *
- * Enable the clock @hw in the hardware.  We first call into the OMAP
- * clockdomain code to "enable" the corresponding clockdomain if this
- * is the first enabled user of the clockdomain.  Then program the
- * hardware to enable the clock.  Then wait for the IP block that uses
- * this clock to leave idle (if applicable).  Returns the error value
- * from clkdm_clk_enable() if it terminated with an error, or -EINVAL
- * if @hw has a null clock enable_reg, or zero upon success.
- */
-int omap2_dflt_clk_enable(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk;
-	u32 v;
-	int ret = 0;
-
-	clk = to_clk_hw_omap(hw);
-
-	if (clkdm_control && clk->clkdm) {
-		ret = clkdm_clk_enable(clk->clkdm, hw->clk);
-		if (ret) {
-			WARN(1, "%s: could not enable %s's clockdomain %s: %d\n",
-			     __func__, __clk_get_name(hw->clk),
-			     clk->clkdm->name, ret);
-			return ret;
-		}
-	}
-
-	if (unlikely(clk->enable_reg == NULL)) {
-		pr_err("%s: %s missing enable_reg\n", __func__,
-		       __clk_get_name(hw->clk));
-		ret = -EINVAL;
-		goto err;
-	}
-
-	/* FIXME should not have INVERT_ENABLE bit here */
-	v = omap2_clk_readl(clk, clk->enable_reg);
-	if (clk->flags & INVERT_ENABLE)
-		v &= ~(1 << clk->enable_bit);
-	else
-		v |= (1 << clk->enable_bit);
-	omap2_clk_writel(v, clk, clk->enable_reg);
-	v = omap2_clk_readl(clk, clk->enable_reg); /* OCP barrier */
-
-	if (clk->ops && clk->ops->find_idlest)
-		_omap2_module_wait_ready(clk);
-
-	return 0;
-
-err:
-	if (clkdm_control && clk->clkdm)
-		clkdm_clk_disable(clk->clkdm, hw->clk);
-	return ret;
-}
-
-/**
- * omap2_dflt_clk_disable - disable a clock in the hardware
- * @hw: struct clk_hw * of the clock to disable
- *
- * Disable the clock @hw in the hardware, and call into the OMAP
- * clockdomain code to "disable" the corresponding clockdomain if all
- * clocks/hwmods in that clockdomain are now disabled.  No return
- * value.
- */
-void omap2_dflt_clk_disable(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk;
-	u32 v;
-
-	clk = to_clk_hw_omap(hw);
-	if (!clk->enable_reg) {
-		/*
-		 * 'independent' here refers to a clock which is not
-		 * controlled by its parent.
-		 */
-		pr_err("%s: independent clock %s has no enable_reg\n",
-		       __func__, __clk_get_name(hw->clk));
-		return;
-	}
-
-	v = omap2_clk_readl(clk, clk->enable_reg);
-	if (clk->flags & INVERT_ENABLE)
-		v |= (1 << clk->enable_bit);
-	else
-		v &= ~(1 << clk->enable_bit);
-	omap2_clk_writel(v, clk, clk->enable_reg);
-	/* No OCP barrier needed here since it is a disable operation */
-
-	if (clkdm_control && clk->clkdm)
-		clkdm_clk_disable(clk->clkdm, hw->clk);
-}
-
-/**
- * omap2_clkops_enable_clkdm - increment usecount on clkdm of @hw
- * @hw: struct clk_hw * of the clock being enabled
- *
- * Increment the usecount of the clockdomain of the clock pointed to
- * by @hw; if the usecount is 1, the clockdomain will be "enabled."
- * Only needed for clocks that don't use omap2_dflt_clk_enable() as
- * their enable function pointer.  Passes along the return value of
- * clkdm_clk_enable(), -EINVAL if @hw is not associated with a
- * clockdomain, or 0 if clock framework-based clockdomain control is
- * not implemented.
- */
-int omap2_clkops_enable_clkdm(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk;
-	int ret = 0;
-
-	clk = to_clk_hw_omap(hw);
-
-	if (unlikely(!clk->clkdm)) {
-		pr_err("%s: %s: no clkdm set ?!\n", __func__,
-		       __clk_get_name(hw->clk));
-		return -EINVAL;
-	}
-
-	if (unlikely(clk->enable_reg))
-		pr_err("%s: %s: should use dflt_clk_enable ?!\n", __func__,
-		       __clk_get_name(hw->clk));
-
-	if (!clkdm_control) {
-		pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n",
-		       __func__, __clk_get_name(hw->clk));
-		return 0;
-	}
-
-	ret = clkdm_clk_enable(clk->clkdm, hw->clk);
-	WARN(ret, "%s: could not enable %s's clockdomain %s: %d\n",
-	     __func__, __clk_get_name(hw->clk), clk->clkdm->name, ret);
-
-	return ret;
-}
-
-/**
- * omap2_clkops_disable_clkdm - decrement usecount on clkdm of @hw
- * @hw: struct clk_hw * of the clock being disabled
- *
- * Decrement the usecount of the clockdomain of the clock pointed to
- * by @hw; if the usecount is 0, the clockdomain will be "disabled."
- * Only needed for clocks that don't use omap2_dflt_clk_disable() as their
- * disable function pointer.  No return value.
- */
-void omap2_clkops_disable_clkdm(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk;
-
-	clk = to_clk_hw_omap(hw);
-
-	if (unlikely(!clk->clkdm)) {
-		pr_err("%s: %s: no clkdm set ?!\n", __func__,
-		       __clk_get_name(hw->clk));
-		return;
-	}
-
-	if (unlikely(clk->enable_reg))
-		pr_err("%s: %s: should use dflt_clk_disable ?!\n", __func__,
-		       __clk_get_name(hw->clk));
-
-	if (!clkdm_control) {
-		pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n",
-		       __func__, __clk_get_name(hw->clk));
-		return;
-	}
-
-	clkdm_clk_disable(clk->clkdm, hw->clk);
-}
-
-/**
- * omap2_dflt_clk_is_enabled - is clock enabled in the hardware?
- * @hw: struct clk_hw * to check
- *
- * Return 1 if the clock represented by @hw is enabled in the
- * hardware, or 0 otherwise.  Intended for use in the struct
- * clk_ops.is_enabled function pointer.
- */
-int omap2_dflt_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	u32 v;
-
-	v = omap2_clk_readl(clk, clk->enable_reg);
-
-	if (clk->flags & INVERT_ENABLE)
-		v ^= BIT(clk->enable_bit);
-
-	v &= BIT(clk->enable_bit);
-
-	return v ? 1 : 0;
-}
-
 static int __initdata mpurate;
 
 /*
@@ -584,178 +133,6 @@ static int __init omap_clk_setup(char *str)
 __setup("mpurate=", omap_clk_setup);
 
 /**
- * omap2_init_clk_hw_omap_clocks - initialize an OMAP clock
- * @clk: struct clk * to initialize
- *
- * Add an OMAP clock @clk to the internal list of OMAP clocks.  Used
- * temporarily for autoidle handling, until this support can be
- * integrated into the common clock framework code in some way.  No
- * return value.
- */
-void omap2_init_clk_hw_omap_clocks(struct clk *clk)
-{
-	struct clk_hw_omap *c;
-
-	if (__clk_get_flags(clk) & CLK_IS_BASIC)
-		return;
-
-	c = to_clk_hw_omap(__clk_get_hw(clk));
-	list_add(&c->node, &clk_hw_omap_clocks);
-}
-
-/**
- * omap2_clk_enable_autoidle_all - enable autoidle on all OMAP clocks that
- * support it
- *
- * Enable clock autoidle on all OMAP clocks that have allow_idle
- * function pointers associated with them.  This function is intended
- * to be temporary until support for this is added to the common clock
- * code.  Returns 0.
- */
-int omap2_clk_enable_autoidle_all(void)
-{
-	struct clk_hw_omap *c;
-
-	list_for_each_entry(c, &clk_hw_omap_clocks, node)
-		if (c->ops && c->ops->allow_idle)
-			c->ops->allow_idle(c);
-
-	of_ti_clk_allow_autoidle_all();
-
-	return 0;
-}
-
-/**
- * omap2_clk_disable_autoidle_all - disable autoidle on all OMAP clocks that
- * support it
- *
- * Disable clock autoidle on all OMAP clocks that have allow_idle
- * function pointers associated with them.  This function is intended
- * to be temporary until support for this is added to the common clock
- * code.  Returns 0.
- */
-int omap2_clk_disable_autoidle_all(void)
-{
-	struct clk_hw_omap *c;
-
-	list_for_each_entry(c, &clk_hw_omap_clocks, node)
-		if (c->ops && c->ops->deny_idle)
-			c->ops->deny_idle(c);
-
-	of_ti_clk_deny_autoidle_all();
-
-	return 0;
-}
-
-/**
- * omap2_clk_deny_idle - disable autoidle on an OMAP clock
- * @clk: struct clk * to disable autoidle for
- *
- * Disable autoidle on an OMAP clock.
- */
-int omap2_clk_deny_idle(struct clk *clk)
-{
-	struct clk_hw_omap *c;
-
-	if (__clk_get_flags(clk) & CLK_IS_BASIC)
-		return -EINVAL;
-
-	c = to_clk_hw_omap(__clk_get_hw(clk));
-	if (c->ops && c->ops->deny_idle)
-		c->ops->deny_idle(c);
-	return 0;
-}
-
-/**
- * omap2_clk_allow_idle - enable autoidle on an OMAP clock
- * @clk: struct clk * to enable autoidle for
- *
- * Enable autoidle on an OMAP clock.
- */
-int omap2_clk_allow_idle(struct clk *clk)
-{
-	struct clk_hw_omap *c;
-
-	if (__clk_get_flags(clk) & CLK_IS_BASIC)
-		return -EINVAL;
-
-	c = to_clk_hw_omap(__clk_get_hw(clk));
-	if (c->ops && c->ops->allow_idle)
-		c->ops->allow_idle(c);
-	return 0;
-}
-
-/**
- * omap2_clk_enable_init_clocks - prepare & enable a list of clocks
- * @clk_names: ptr to an array of strings of clock names to enable
- * @num_clocks: number of clock names in @clk_names
- *
- * Prepare and enable a list of clocks, named by @clk_names.  No
- * return value. XXX Deprecated; only needed until these clocks are
- * properly claimed and enabled by the drivers or core code that uses
- * them.  XXX What code disables & calls clk_put on these clocks?
- */
-void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks)
-{
-	struct clk *init_clk;
-	int i;
-
-	for (i = 0; i < num_clocks; i++) {
-		init_clk = clk_get(NULL, clk_names[i]);
-		if (WARN(IS_ERR(init_clk), "could not find init clock %s\n",
-				clk_names[i]))
-			continue;
-		clk_prepare_enable(init_clk);
-	}
-}
-
-const struct clk_hw_omap_ops clkhwops_wait = {
-	.find_idlest	= omap2_clk_dflt_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
-
-/**
- * omap2_clk_switch_mpurate_at_boot - switch ARM MPU rate by boot-time argument
- * @mpurate_ck_name: clk name of the clock to change rate
- *
- * Change the ARM MPU clock rate to the rate specified on the command
- * line, if one was specified.  @mpurate_ck_name should be
- * "virt_prcm_set" on OMAP2xxx and "dpll1_ck" on OMAP34xx/OMAP36xx.
- * XXX Does not handle voltage scaling - on OMAP2xxx this is currently
- * handled by the virt_prcm_set clock, but this should be handled by
- * the OPP layer.  XXX This is intended to be handled by the OPP layer
- * code in the near future and should be removed from the clock code.
- * Returns -EINVAL if 'mpurate' is zero or if clk_set_rate() rejects
- * the rate, -ENOENT if the struct clk referred to by @mpurate_ck_name
- * cannot be found, or 0 upon success.
- */
-int __init omap2_clk_switch_mpurate_at_boot(const char *mpurate_ck_name)
-{
-	struct clk *mpurate_ck;
-	int r;
-
-	if (!mpurate)
-		return -EINVAL;
-
-	mpurate_ck = clk_get(NULL, mpurate_ck_name);
-	if (WARN(IS_ERR(mpurate_ck), "Failed to get %s.\n", mpurate_ck_name))
-		return -ENOENT;
-
-	r = clk_set_rate(mpurate_ck, mpurate);
-	if (r < 0) {
-		WARN(1, "clock: %s: unable to set MPU rate to %d: %d\n",
-		     mpurate_ck_name, mpurate, r);
-		clk_put(mpurate_ck);
-		return -EINVAL;
-	}
-
-	calibrate_delay();
-	clk_put(mpurate_ck);
-
-	return 0;
-}
-
-/**
  * omap2_clk_print_new_rates - print summary of current clock tree rates
  * @hfclkin_ck_name: clk name for the off-chip HF oscillator
  * @core_ck_name: clk name for the on-chip CORE_CLK
@@ -801,29 +178,30 @@ void __init omap2_clk_print_new_rates(const char *hfclkin_ck_name,
  */
 void __init ti_clk_init_features(void)
 {
+	struct ti_clk_features features = { 0 };
 	/* Fint setup for DPLLs */
 	if (cpu_is_omap3430()) {
-		ti_clk_features.fint_min = OMAP3430_DPLL_FINT_BAND1_MIN;
-		ti_clk_features.fint_max = OMAP3430_DPLL_FINT_BAND2_MAX;
-		ti_clk_features.fint_band1_max = OMAP3430_DPLL_FINT_BAND1_MAX;
-		ti_clk_features.fint_band2_min = OMAP3430_DPLL_FINT_BAND2_MIN;
+		features.fint_min = OMAP3430_DPLL_FINT_BAND1_MIN;
+		features.fint_max = OMAP3430_DPLL_FINT_BAND2_MAX;
+		features.fint_band1_max = OMAP3430_DPLL_FINT_BAND1_MAX;
+		features.fint_band2_min = OMAP3430_DPLL_FINT_BAND2_MIN;
 	} else {
-		ti_clk_features.fint_min = OMAP3PLUS_DPLL_FINT_MIN;
-		ti_clk_features.fint_max = OMAP3PLUS_DPLL_FINT_MAX;
+		features.fint_min = OMAP3PLUS_DPLL_FINT_MIN;
+		features.fint_max = OMAP3PLUS_DPLL_FINT_MAX;
 	}
 
 	/* Bypass value setup for DPLLs */
 	if (cpu_is_omap24xx()) {
-		ti_clk_features.dpll_bypass_vals |=
+		features.dpll_bypass_vals |=
 			(1 << OMAP2XXX_EN_DPLL_LPBYPASS) |
 			(1 << OMAP2XXX_EN_DPLL_FRBYPASS);
 	} else if (cpu_is_omap34xx()) {
-		ti_clk_features.dpll_bypass_vals |=
+		features.dpll_bypass_vals |=
 			(1 << OMAP3XXX_EN_DPLL_LPBYPASS) |
 			(1 << OMAP3XXX_EN_DPLL_FRBYPASS);
 	} else if (soc_is_am33xx() || cpu_is_omap44xx() || soc_is_am43xx() ||
 		   soc_is_omap54xx() || soc_is_dra7xx()) {
-		ti_clk_features.dpll_bypass_vals |=
+		features.dpll_bypass_vals |=
 			(1 << OMAP4XXX_EN_DPLL_LPBYPASS) |
 			(1 << OMAP4XXX_EN_DPLL_FRBYPASS) |
 			(1 << OMAP4XXX_EN_DPLL_MNBYPASS);
@@ -831,7 +209,7 @@ void __init ti_clk_init_features(void)
 
 	/* Jitter correction only available on OMAP343X */
 	if (cpu_is_omap343x())
-		ti_clk_features.flags |= TI_CLK_DPLL_HAS_FREQSEL;
+		features.flags |= TI_CLK_DPLL_HAS_FREQSEL;
 
 	/* Idlest value for interface clocks.
 	 * 24xx uses 0 to indicate not ready, and 1 to indicate ready.
@@ -839,11 +217,13 @@ void __init ti_clk_init_features(void)
 	 * AM35xx uses both, depending on the module.
 	 */
 	if (cpu_is_omap24xx())
-		ti_clk_features.cm_idlest_val = OMAP24XX_CM_IDLEST_VAL;
+		features.cm_idlest_val = OMAP24XX_CM_IDLEST_VAL;
 	else if (cpu_is_omap34xx())
-		ti_clk_features.cm_idlest_val = OMAP34XX_CM_IDLEST_VAL;
+		features.cm_idlest_val = OMAP34XX_CM_IDLEST_VAL;
 
 	/* On OMAP3430 ES1.0, DPLL4 can't be re-programmed */
 	if (omap_rev() == OMAP3430_REV_ES1_0)
-		ti_clk_features.flags |= TI_CLK_DPLL4_DENY_REPROGRAM;
+		features.flags |= TI_CLK_DPLL4_DENY_REPROGRAM;
+
+	ti_clk_setup_features(&features);
 }
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index 652ed0a..67da640 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -23,90 +23,6 @@
 #include <linux/clk-provider.h>
 #include <linux/clk/ti.h>
 
-struct omap_clk {
-	u16				cpu;
-	struct clk_lookup		lk;
-};
-
-#define CLK(dev, con, ck)		\
-	{				\
-		.lk = {			\
-			.dev_id = dev,	\
-			.con_id = con,	\
-			.clk = ck,	\
-		},			\
-	}
-
-struct clockdomain;
-
-#define DEFINE_STRUCT_CLK(_name, _parent_array_name, _clkops_name)	\
-	static struct clk_core _name##_core = {			\
-		.name = #_name,					\
-		.hw = &_name##_hw.hw,				\
-		.parent_names = _parent_array_name,		\
-		.num_parents = ARRAY_SIZE(_parent_array_name),	\
-		.ops = &_clkops_name,				\
-	};							\
-	static struct clk _name = {				\
-		.core = &_name##_core,				\
-	};
-
-#define DEFINE_STRUCT_CLK_FLAGS(_name, _parent_array_name,	\
-				_clkops_name, _flags)		\
-	static struct clk_core _name##_core = {			\
-		.name = #_name,					\
-		.hw = &_name##_hw.hw,				\
-		.parent_names = _parent_array_name,		\
-		.num_parents = ARRAY_SIZE(_parent_array_name),	\
-		.ops = &_clkops_name,				\
-		.flags = _flags,				\
-	};							\
-	static struct clk _name = {				\
-		.core = &_name##_core,				\
-	};
-
-#define DEFINE_STRUCT_CLK_HW_OMAP(_name, _clkdm_name)		\
-	static struct clk_hw_omap _name##_hw = {		\
-		.hw = {						\
-			.clk = &_name,				\
-		},						\
-		.clkdm_name = _clkdm_name,			\
-	};
-
-#define DEFINE_CLK_OMAP_MUX(_name, _clkdm_name, _clksel,	\
-			    _clksel_reg, _clksel_mask,		\
-			    _parent_names, _ops)		\
-	static struct clk _name;				\
-	static struct clk_hw_omap _name##_hw = {		\
-		.hw = {						\
-			.clk = &_name,				\
-		},						\
-		.clksel		= _clksel,			\
-		.clksel_reg	= _clksel_reg,			\
-		.clksel_mask	= _clksel_mask,			\
-		.clkdm_name	= _clkdm_name,			\
-	};							\
-	DEFINE_STRUCT_CLK(_name, _parent_names, _ops);
-
-#define DEFINE_CLK_OMAP_MUX_GATE(_name, _clkdm_name, _clksel,	\
-				 _clksel_reg, _clksel_mask,	\
-				 _enable_reg, _enable_bit,	\
-				 _hwops, _parent_names, _ops)	\
-	static struct clk _name;				\
-	static struct clk_hw_omap _name##_hw = {		\
-		.hw = {						\
-			.clk = &_name,				\
-		},						\
-		.ops		= _hwops,			\
-		.enable_reg	= _enable_reg,			\
-		.enable_bit	= _enable_bit,			\
-		.clksel		= _clksel,			\
-		.clksel_reg	= _clksel_reg,			\
-		.clksel_mask	= _clksel_mask,			\
-		.clkdm_name	= _clkdm_name,			\
-	};							\
-	DEFINE_STRUCT_CLK(_name, _parent_names, _ops);
-
 /* struct clksel_rate.flags possibilities */
 #define RATE_IN_242X		(1 << 0)
 #define RATE_IN_243X		(1 << 1)
@@ -127,38 +43,6 @@ struct clockdomain;
 /* RATE_IN_3430ES2PLUS_36XX includes 34xx/35xx with ES >=2, and all 36xx/37xx */
 #define RATE_IN_3430ES2PLUS_36XX	(RATE_IN_3430ES2PLUS | RATE_IN_36XX)
 
-
-/**
- * struct clksel_rate - register bitfield values corresponding to clk divisors
- * @val: register bitfield value (shifted to bit 0)
- * @div: clock divisor corresponding to @val
- * @flags: (see "struct clksel_rate.flags possibilities" above)
- *
- * @val should match the value of a read from struct clk.clksel_reg
- * AND'ed with struct clk.clksel_mask, shifted right to bit 0.
- *
- * @div is the divisor that should be applied to the parent clock's rate
- * to produce the current clock's rate.
- */
-struct clksel_rate {
-	u32			val;
-	u8			div;
-	u16			flags;
-};
-
-/**
- * struct clksel - available parent clocks, and a pointer to their divisors
- * @parent: struct clk * to a possible parent clock
- * @rates: available divisors for this parent clock
- *
- * A struct clksel is always associated with one or more struct clks
- * and one or more struct clksel_rates.
- */
-struct clksel {
-	struct clk		 *parent;
-	const struct clksel_rate *rates;
-};
-
 /* CM_CLKSEL2_PLL.CORE_CLK_SRC bits (2XXX) */
 #define CORE_CLK_SRC_32K		0x0
 #define CORE_CLK_SRC_DPLL		0x1
@@ -180,105 +64,18 @@ struct clksel {
 #define OMAP4XXX_EN_DPLL_FRBYPASS		0x6
 #define OMAP4XXX_EN_DPLL_LOCKED			0x7
 
-u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk);
-void omap3_dpll_allow_idle(struct clk_hw_omap *clk);
-void omap3_dpll_deny_idle(struct clk_hw_omap *clk);
-void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk);
-void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk);
-
-void __init omap2_clk_disable_clkdm_control(void);
-
-/* clkt_clksel.c public functions */
-u32 omap2_clksel_round_rate_div(struct clk_hw_omap *clk,
-				unsigned long target_rate,
-				u32 *new_div);
-u8 omap2_clksel_find_parent_index(struct clk_hw *hw);
-unsigned long omap2_clksel_recalc(struct clk_hw *hw, unsigned long parent_rate);
-long omap2_clksel_round_rate(struct clk_hw *hw, unsigned long target_rate,
-				unsigned long *parent_rate);
-int omap2_clksel_set_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long parent_rate);
-int omap2_clksel_set_parent(struct clk_hw *hw, u8 field_val);
-
-/* clkt_iclk.c public functions */
-extern void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk);
-extern void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk);
-
-unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk);
-
-void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk,
-				   void __iomem **other_reg,
-				   u8 *other_bit);
-void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk,
-				void __iomem **idlest_reg,
-				u8 *idlest_bit, u8 *idlest_val);
-int omap2_clk_enable_autoidle_all(void);
-int omap2_clk_allow_idle(struct clk *clk);
-int omap2_clk_deny_idle(struct clk *clk);
-int omap2_clk_switch_mpurate_at_boot(const char *mpurate_ck_name);
 void omap2_clk_print_new_rates(const char *hfclkin_ck_name,
 			       const char *core_ck_name,
 			       const char *mpu_ck_name);
 
-u32 omap2_clk_readl(struct clk_hw_omap *clk, void __iomem *reg);
-void omap2_clk_writel(u32 val, struct clk_hw_omap *clk, void __iomem *reg);
-
 extern u16 cpu_mask;
 
-/*
- * Clock features setup. Used instead of CPU type checks.
- */
-struct ti_clk_features {
-	u32 flags;
-	long fint_min;
-	long fint_max;
-	long fint_band1_max;
-	long fint_band2_min;
-	u8 dpll_bypass_vals;
-	u8 cm_idlest_val;
-};
-
-#define TI_CLK_DPLL_HAS_FREQSEL		(1 << 0)
-#define TI_CLK_DPLL4_DENY_REPROGRAM	(1 << 1)
-
-extern struct ti_clk_features ti_clk_features;
-
 extern const struct clkops clkops_omap2_dflt_wait;
 extern const struct clkops clkops_omap2_dflt;
 
 extern struct clk_functions omap2_clk_functions;
 
-extern const struct clksel_rate gpt_32k_rates[];
-extern const struct clksel_rate gpt_sys_rates[];
-extern const struct clksel_rate gfx_l3_rates[];
-extern const struct clksel_rate dsp_ick_rates[];
-
-extern const struct clk_hw_omap_ops clkhwops_iclk_wait;
-extern const struct clk_hw_omap_ops clkhwops_wait;
-extern const struct clk_hw_omap_ops clkhwops_omap3430es2_ssi_wait;
-extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait;
-extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait;
-extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait;
-extern const struct clk_hw_omap_ops clkhwops_apll54;
-extern const struct clk_hw_omap_ops clkhwops_apll96;
-
-/* clksel_rate blocks shared between OMAP44xx and AM33xx */
-extern const struct clksel_rate div_1_0_rates[];
-extern const struct clksel_rate div3_1to4_rates[];
-extern const struct clksel_rate div_1_1_rates[];
-extern const struct clksel_rate div_1_2_rates[];
-extern const struct clksel_rate div_1_3_rates[];
-extern const struct clksel_rate div_1_4_rates[];
-extern const struct clksel_rate div31_1to31_rates[];
-
-extern int omap2_clkops_enable_clkdm(struct clk_hw *hw);
-extern void omap2_clkops_disable_clkdm(struct clk_hw *hw);
-
-struct regmap;
-
-int __init omap2_clk_provider_init(struct device_node *np, int index,
-				   struct regmap *syscon, void __iomem *mem);
-void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem);
+int __init omap2_clk_setup_ll_ops(void);
 
 void __init ti_clk_init_features(void);
 #endif
diff --git a/arch/arm/mach-omap2/clock2430.c b/arch/arm/mach-omap2/clock2430.c
deleted file mode 100644
index cef0c8d..0000000
--- a/arch/arm/mach-omap2/clock2430.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * clock2430.c - OMAP2430-specific clock integration code
- *
- * Copyright (C) 2005-2008 Texas Instruments, Inc.
- * Copyright (C) 2004-2010 Nokia Corporation
- *
- * Contacts:
- * Richard Woodruff <r-woodruff2@ti.com>
- * Paul Walmsley
- *
- * Based on earlier work by Tuukka Tikkanen, Tony Lindgren,
- * Gordon McNutt and RidgeRun, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include "soc.h"
-#include "iomap.h"
-#include "clock.h"
-#include "clock2xxx.h"
-#include "cm2xxx.h"
-#include "cm-regbits-24xx.h"
-
-/**
- * omap2430_clk_i2chs_find_idlest - return CM_IDLEST info for 2430 I2CHS
- * @clk: struct clk * being enabled
- * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into
- * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into
- * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator
- *
- * OMAP2430 I2CHS CM_IDLEST bits are in CM_IDLEST1_CORE, but the
- * CM_*CLKEN bits are in CM_{I,F}CLKEN2_CORE.  This custom function
- * passes back the correct CM_IDLEST register address for I2CHS
- * modules.  No return value.
- */
-static void omap2430_clk_i2chs_find_idlest(struct clk_hw_omap *clk,
-					   void __iomem **idlest_reg,
-					   u8 *idlest_bit,
-					   u8 *idlest_val)
-{
-	*idlest_reg = OMAP2430_CM_REGADDR(CORE_MOD, CM_IDLEST);
-	*idlest_bit = clk->enable_bit;
-	*idlest_val = OMAP24XX_CM_IDLEST_VAL;
-}
-
-/* 2430 I2CHS has non-standard IDLEST register */
-const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait = {
-	.find_idlest	= omap2430_clk_i2chs_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
diff --git a/arch/arm/mach-omap2/clock2xxx.c b/arch/arm/mach-omap2/clock2xxx.c
deleted file mode 100644
index b870f6a..0000000
--- a/arch/arm/mach-omap2/clock2xxx.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * clock2xxx.c - OMAP2xxx-specific clock integration code
- *
- * Copyright (C) 2005-2008 Texas Instruments, Inc.
- * Copyright (C) 2004-2010 Nokia Corporation
- *
- * Contacts:
- * Richard Woodruff <r-woodruff2@ti.com>
- * Paul Walmsley
- *
- * Based on earlier work by Tuukka Tikkanen, Tony Lindgren,
- * Gordon McNutt and RidgeRun, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include "soc.h"
-#include "clock.h"
-#include "clock2xxx.h"
-#include "cm.h"
-#include "cm-regbits-24xx.h"
-
-struct clk_hw *dclk_hw;
-/*
- * Omap24xx specific clock functions
- */
-
-/*
- * Switch the MPU rate if specified on cmdline.  We cannot do this
- * early until cmdline is parsed.  XXX This should be removed from the
- * clock code and handled by the OPP layer code in the near future.
- */
-static int __init omap2xxx_clk_arch_init(void)
-{
-	int ret;
-
-	if (!cpu_is_omap24xx())
-		return 0;
-
-	ret = omap2_clk_switch_mpurate_at_boot("virt_prcm_set");
-	if (!ret)
-		omap2_clk_print_new_rates("sys_ck", "dpll_ck", "mpu_ck");
-
-	return ret;
-}
-
-omap_arch_initcall(omap2xxx_clk_arch_init);
-
-
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
deleted file mode 100644
index 4596468..0000000
--- a/arch/arm/mach-omap2/clock34xx.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * OMAP3-specific clock framework functions
- *
- * Copyright (C) 2007-2008 Texas Instruments, Inc.
- * Copyright (C) 2007-2011 Nokia Corporation
- *
- * Paul Walmsley
- * Jouni Högander
- *
- * Parts of this code are based on code written by
- * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu,
- * Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include "clock.h"
-#include "clock34xx.h"
-#include "cm3xxx.h"
-#include "cm-regbits-34xx.h"
-
-/**
- * omap3430es2_clk_ssi_find_idlest - return CM_IDLEST info for SSI
- * @clk: struct clk * being enabled
- * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into
- * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into
- * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator
- *
- * The OMAP3430ES2 SSI target CM_IDLEST bit is at a different shift
- * from the CM_{I,F}CLKEN bit.  Pass back the correct info via
- * @idlest_reg and @idlest_bit.  No return value.
- */
-static void omap3430es2_clk_ssi_find_idlest(struct clk_hw_omap *clk,
-					    void __iomem **idlest_reg,
-					    u8 *idlest_bit,
-					    u8 *idlest_val)
-{
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
-	*idlest_bit = OMAP3430ES2_ST_SSI_IDLE_SHIFT;
-	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
-}
-const struct clk_hw_omap_ops clkhwops_omap3430es2_ssi_wait = {
-	.find_idlest	= omap3430es2_clk_ssi_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
-
-const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait = {
-	.allow_idle	= omap2_clkt_iclk_allow_idle,
-	.deny_idle	= omap2_clkt_iclk_deny_idle,
-	.find_idlest	= omap3430es2_clk_ssi_find_idlest,
-	.find_companion = omap2_clk_dflt_find_companion,
-};
-
-/**
- * omap3430es2_clk_dss_usbhost_find_idlest - CM_IDLEST info for DSS, USBHOST
- * @clk: struct clk * being enabled
- * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into
- * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into
- * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator
- *
- * Some OMAP modules on OMAP3 ES2+ chips have both initiator and
- * target IDLEST bits.  For our purposes, we are concerned with the
- * target IDLEST bits, which exist at a different bit position than
- * the *CLKEN bit position for these modules (DSS and USBHOST) (The
- * default find_idlest code assumes that they are at the same
- * position.)  No return value.
- */
-static void omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk,
-						    void __iomem **idlest_reg,
-						    u8 *idlest_bit,
-						    u8 *idlest_val)
-{
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
-	/* USBHOST_IDLE has same shift */
-	*idlest_bit = OMAP3430ES2_ST_DSS_IDLE_SHIFT;
-	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
-}
-
-const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait = {
-	.find_idlest	= omap3430es2_clk_dss_usbhost_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
-
-const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait = {
-	.allow_idle	= omap2_clkt_iclk_allow_idle,
-	.deny_idle	= omap2_clkt_iclk_deny_idle,
-	.find_idlest	= omap3430es2_clk_dss_usbhost_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
-
-/**
- * omap3430es2_clk_hsotgusb_find_idlest - return CM_IDLEST info for HSOTGUSB
- * @clk: struct clk * being enabled
- * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into
- * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into
- * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator
- *
- * The OMAP3430ES2 HSOTGUSB target CM_IDLEST bit is at a different
- * shift from the CM_{I,F}CLKEN bit.  Pass back the correct info via
- * @idlest_reg and @idlest_bit.  No return value.
- */
-static void omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk,
-						 void __iomem **idlest_reg,
-						 u8 *idlest_bit,
-						 u8 *idlest_val)
-{
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
-	*idlest_bit = OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT;
-	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
-}
-
-const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait = {
-	.allow_idle	= omap2_clkt_iclk_allow_idle,
-	.deny_idle	= omap2_clkt_iclk_deny_idle,
-	.find_idlest	= omap3430es2_clk_hsotgusb_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
-
-const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait = {
-	.find_idlest	= omap3430es2_clk_hsotgusb_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h
deleted file mode 100644
index 084ba71..0000000
--- a/arch/arm/mach-omap2/clock34xx.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * OMAP34xx clock function prototypes and macros
- *
- * Copyright (C) 2007-2010 Texas Instruments, Inc.
- * Copyright (C) 2007-2011 Nokia Corporation
- */
-
-#ifndef __ARCH_ARM_MACH_OMAP2_CLOCK34XX_H
-#define __ARCH_ARM_MACH_OMAP2_CLOCK34XX_H
-
-extern const struct clkops clkops_omap3430es2_ssi_wait;
-extern const struct clkops clkops_omap3430es2_iclk_ssi_wait;
-extern const struct clkops clkops_omap3430es2_hsotgusb_wait;
-extern const struct clkops clkops_omap3430es2_iclk_hsotgusb_wait;
-extern const struct clkops clkops_omap3430es2_dss_usbhost_wait;
-extern const struct clkops clkops_omap3430es2_iclk_dss_usbhost_wait;
-
-#endif
diff --git a/arch/arm/mach-omap2/clock3517.c b/arch/arm/mach-omap2/clock3517.c
deleted file mode 100644
index 4d79ae2..0000000
--- a/arch/arm/mach-omap2/clock3517.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * OMAP3517/3505-specific clock framework functions
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- * Copyright (C) 2011 Nokia Corporation
- *
- * Ranjith Lohithakshan
- * Paul Walmsley
- *
- * Parts of this code are based on code written by
- * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu,
- * Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include "clock.h"
-#include "clock3517.h"
-#include "cm3xxx.h"
-#include "cm-regbits-34xx.h"
-
-/*
- * In AM35xx IPSS, the {ICK,FCK} enable bits for modules are exported
- * in the same register at a bit offset of 0x8. The EN_ACK for ICK is
- * at an offset of 4 from ICK enable bit.
- */
-#define AM35XX_IPSS_ICK_MASK			0xF
-#define AM35XX_IPSS_ICK_EN_ACK_OFFSET 		0x4
-#define AM35XX_IPSS_ICK_FCK_OFFSET		0x8
-#define AM35XX_IPSS_CLK_IDLEST_VAL		0
-
-/**
- * am35xx_clk_find_idlest - return clock ACK info for AM35XX IPSS
- * @clk: struct clk * being enabled
- * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into
- * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into
- * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator
- *
- * The interface clocks on AM35xx IPSS reflects the clock idle status
- * in the enable register itsel at a bit offset of 4 from the enable
- * bit. A value of 1 indicates that clock is enabled.
- */
-static void am35xx_clk_find_idlest(struct clk_hw_omap *clk,
-					    void __iomem **idlest_reg,
-					    u8 *idlest_bit,
-					    u8 *idlest_val)
-{
-	*idlest_reg = (__force void __iomem *)(clk->enable_reg);
-	*idlest_bit = clk->enable_bit + AM35XX_IPSS_ICK_EN_ACK_OFFSET;
-	*idlest_val = AM35XX_IPSS_CLK_IDLEST_VAL;
-}
-
-/**
- * am35xx_clk_find_companion - find companion clock to @clk
- * @clk: struct clk * to find the companion clock of
- * @other_reg: void __iomem ** to return the companion clock CM_*CLKEN va in
- * @other_bit: u8 ** to return the companion clock bit shift in
- *
- * Some clocks don't have companion clocks.  For example, modules with
- * only an interface clock (such as HECC) don't have a companion
- * clock.  Right now, this code relies on the hardware exporting a bit
- * in the correct companion register that indicates that the
- * nonexistent 'companion clock' is active.  Future patches will
- * associate this type of code with per-module data structures to
- * avoid this issue, and remove the casts.  No return value.
- */
-static void am35xx_clk_find_companion(struct clk_hw_omap *clk,
-				      void __iomem **other_reg,
-				      u8 *other_bit)
-{
-	*other_reg = (__force void __iomem *)(clk->enable_reg);
-	if (clk->enable_bit & AM35XX_IPSS_ICK_MASK)
-		*other_bit = clk->enable_bit + AM35XX_IPSS_ICK_FCK_OFFSET;
-	else
-		*other_bit = clk->enable_bit - AM35XX_IPSS_ICK_FCK_OFFSET;
-}
-const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait = {
-	.find_idlest	= am35xx_clk_find_idlest,
-	.find_companion	= am35xx_clk_find_companion,
-};
-
-/**
- * am35xx_clk_ipss_find_idlest - return CM_IDLEST info for IPSS
- * @clk: struct clk * being enabled
- * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into
- * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into
- * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator
- *
- * The IPSS target CM_IDLEST bit is at a different shift from the
- * CM_{I,F}CLKEN bit.  Pass back the correct info via @idlest_reg
- * and @idlest_bit.  No return value.
- */
-static void am35xx_clk_ipss_find_idlest(struct clk_hw_omap *clk,
-					    void __iomem **idlest_reg,
-					    u8 *idlest_bit,
-					    u8 *idlest_val)
-{
-	u32 r;
-
-	r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20);
-	*idlest_reg = (__force void __iomem *)r;
-	*idlest_bit = AM35XX_ST_IPSS_SHIFT;
-	*idlest_val = OMAP34XX_CM_IDLEST_VAL;
-}
-
-const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait = {
-	.allow_idle	= omap2_clkt_iclk_allow_idle,
-	.deny_idle	= omap2_clkt_iclk_deny_idle,
-	.find_idlest	= am35xx_clk_ipss_find_idlest,
-	.find_companion	= omap2_clk_dflt_find_companion,
-};
diff --git a/arch/arm/mach-omap2/clock3517.h b/arch/arm/mach-omap2/clock3517.h
deleted file mode 100644
index ca5e5a6..0000000
--- a/arch/arm/mach-omap2/clock3517.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * OMAP3517/3505 clock function prototypes and macros
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- * Copyright (C) 2010 Nokia Corporation
- */
-
-#ifndef __ARCH_ARM_MACH_OMAP2_CLOCK3517_H
-#define __ARCH_ARM_MACH_OMAP2_CLOCK3517_H
-
-extern const struct clkops clkops_am35xx_ipss_module_wait;
-extern const struct clkops clkops_am35xx_ipss_wait;
-
-#endif
diff --git a/arch/arm/mach-omap2/clock36xx.c b/arch/arm/mach-omap2/clock36xx.c
deleted file mode 100644
index 91ccb96..0000000
--- a/arch/arm/mach-omap2/clock36xx.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * OMAP36xx-specific clkops
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- * Copyright (C) 2010 Nokia Corporation
- *
- * Mike Turquette
- * Vijaykumar GN
- * Paul Walmsley
- *
- * Parts of this code are based on code written by
- * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu,
- * Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#include <linux/io.h>
-
-#include "clock.h"
-#include "clock36xx.h"
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
-/**
- * omap36xx_pwrdn_clk_enable_with_hsdiv_restore - enable clocks suffering
- *         from HSDivider PWRDN problem Implements Errata ID: i556.
- * @clk: DPLL output struct clk
- *
- * 3630 only: dpll3_m3_ck, dpll4_m2_ck, dpll4_m3_ck, dpll4_m4_ck,
- * dpll4_m5_ck & dpll4_m6_ck dividers gets loaded with reset
- * valueafter their respective PWRDN bits are set.  Any dummy write
- * (Any other value different from the Read value) to the
- * corresponding CM_CLKSEL register will refresh the dividers.
- */
-int omap36xx_pwrdn_clk_enable_with_hsdiv_restore(struct clk_hw *clk)
-{
-	struct clk_divider *parent;
-	struct clk_hw *parent_hw;
-	u32 dummy_v, orig_v;
-	struct clk_hw_omap *omap_clk = to_clk_hw_omap(clk);
-	int ret;
-
-	/* Clear PWRDN bit of HSDIVIDER */
-	ret = omap2_dflt_clk_enable(clk);
-
-	parent_hw = __clk_get_hw(__clk_get_parent(clk->clk));
-	parent = to_clk_divider(parent_hw);
-
-	/* Restore the dividers */
-	if (!ret) {
-		orig_v = omap2_clk_readl(omap_clk, parent->reg);
-		dummy_v = orig_v;
-
-		/* Write any other value different from the Read value */
-		dummy_v ^= (1 << parent->shift);
-		omap2_clk_writel(dummy_v, omap_clk, parent->reg);
-
-		/* Write the original divider */
-		omap2_clk_writel(orig_v, omap_clk, parent->reg);
-	}
-
-	return ret;
-}
diff --git a/arch/arm/mach-omap2/clock36xx.h b/arch/arm/mach-omap2/clock36xx.h
deleted file mode 100644
index 945bb7f..0000000
--- a/arch/arm/mach-omap2/clock36xx.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * OMAP36xx clock function prototypes and macros
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- * Copyright (C) 2010 Nokia Corporation
- */
-
-#ifndef __ARCH_ARM_MACH_OMAP2_CLOCK36XX_H
-#define __ARCH_ARM_MACH_OMAP2_CLOCK36XX_H
-
-extern int omap36xx_pwrdn_clk_enable_with_hsdiv_restore(struct clk_hw *hw);
-
-#endif
diff --git a/arch/arm/mach-omap2/clock3xxx.c b/arch/arm/mach-omap2/clock3xxx.c
deleted file mode 100644
index a9e86db..0000000
--- a/arch/arm/mach-omap2/clock3xxx.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * OMAP3-specific clock framework functions
- *
- * Copyright (C) 2007-2008 Texas Instruments, Inc.
- * Copyright (C) 2007-2010 Nokia Corporation
- *
- * Paul Walmsley
- * Jouni Högander
- *
- * Parts of this code are based on code written by
- * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include "soc.h"
-#include "clock.h"
-#include "clock3xxx.h"
-#include "prm2xxx_3xxx.h"
-#include "prm-regbits-34xx.h"
-#include "cm2xxx_3xxx.h"
-#include "cm-regbits-34xx.h"
-
-/*
- * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks
- * that are sourced by DPLL5, and both of these require this clock
- * to be at 120 MHz for proper operation.
- */
-#define DPLL5_FREQ_FOR_USBHOST		120000000
-
-/* needed by omap3_core_dpll_m2_set_rate() */
-struct clk *sdrc_ick_p, *arm_fck_p;
-
-/**
- * omap3_dpll4_set_rate - set rate for omap3 per-dpll
- * @hw: clock to change
- * @rate: target rate for clock
- * @parent_rate: rate of the parent clock
- *
- * Check if the current SoC supports the per-dpll reprogram operation
- * or not, and then do the rate change if supported. Returns -EINVAL
- * if not supported, 0 for success, and potential error codes from the
- * clock rate change.
- */
-int omap3_dpll4_set_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long parent_rate)
-{
-	/*
-	 * According to the 12-5 CDP code from TI, "Limitation 2.5"
-	 * on 3430ES1 prevents us from changing DPLL multipliers or dividers
-	 * on DPLL4.
-	 */
-	if (ti_clk_features.flags & TI_CLK_DPLL4_DENY_REPROGRAM) {
-		pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n");
-		return -EINVAL;
-	}
-
-	return omap3_noncore_dpll_set_rate(hw, rate, parent_rate);
-}
-
-/**
- * omap3_dpll4_set_rate_and_parent - set rate and parent for omap3 per-dpll
- * @hw: clock to change
- * @rate: target rate for clock
- * @parent_rate: rate of the parent clock
- * @index: parent index, 0 - reference clock, 1 - bypass clock
- *
- * Check if the current SoC support the per-dpll reprogram operation
- * or not, and then do the rate + parent change if supported. Returns
- * -EINVAL if not supported, 0 for success, and potential error codes
- * from the clock rate change.
- */
-int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate,
-				    unsigned long parent_rate, u8 index)
-{
-	if (ti_clk_features.flags & TI_CLK_DPLL4_DENY_REPROGRAM) {
-		pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n");
-		return -EINVAL;
-	}
-
-	return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate,
-						      index);
-}
-
-void __init omap3_clk_lock_dpll5(void)
-{
-	struct clk *dpll5_clk;
-	struct clk *dpll5_m2_clk;
-
-	dpll5_clk = clk_get(NULL, "dpll5_ck");
-	clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST);
-	clk_prepare_enable(dpll5_clk);
-
-	/* Program dpll5_m2_clk divider for no division */
-	dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck");
-	clk_prepare_enable(dpll5_m2_clk);
-	clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST);
-
-	clk_disable_unprepare(dpll5_m2_clk);
-	clk_disable_unprepare(dpll5_clk);
-	return;
-}
-
-/* Common clock code */
-
-/*
- * Switch the MPU rate if specified on cmdline.  We cannot do this
- * early until cmdline is parsed.  XXX This should be removed from the
- * clock code and handled by the OPP layer code in the near future.
- */
-static int __init omap3xxx_clk_arch_init(void)
-{
-	int ret;
-
-	if (!cpu_is_omap34xx())
-		return 0;
-
-	ret = omap2_clk_switch_mpurate_at_boot("dpll1_ck");
-	if (!ret)
-		omap2_clk_print_new_rates("osc_sys_ck", "core_ck", "arm_fck");
-
-	return ret;
-}
-
-omap_arch_initcall(omap3xxx_clk_arch_init);
-
-
diff --git a/arch/arm/mach-omap2/clock44xx.h b/arch/arm/mach-omap2/clock44xx.h
deleted file mode 100644
index 287a46f..0000000
--- a/arch/arm/mach-omap2/clock44xx.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * OMAP4 clock function prototypes and macros
- *
- * Copyright (C) 2009 Texas Instruments, Inc.
- * Copyright (C) 2010 Nokia Corporation
- */
-
-#ifndef __ARCH_ARM_MACH_OMAP2_CLOCK44XX_H
-#define __ARCH_ARM_MACH_OMAP2_CLOCK44XX_H
-
-/*
- * OMAP4430_REGM4XEN_MULT: If the CM_CLKMODE_DPLL_ABE.DPLL_REGM4XEN bit is
- *    set, then the DPLL's lock frequency is multiplied by 4 (OMAP4430 TRM
- *    vV Section 3.6.3.3.1 "DPLLs Output Clocks Parameters")
- */
-#define OMAP4430_REGM4XEN_MULT	4
-
-int omap4xxx_clk_init(void);
-
-#endif
diff --git a/arch/arm/mach-omap2/clock_common_data.c b/arch/arm/mach-omap2/clock_common_data.c
deleted file mode 100644
index 61b60df..0000000
--- a/arch/arm/mach-omap2/clock_common_data.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- *  linux/arch/arm/mach-omap2/clock_common_data.c
- *
- *  Copyright (C) 2005-2009 Texas Instruments, Inc.
- *  Copyright (C) 2004-2009 Nokia Corporation
- *
- *  Contacts:
- *  Richard Woodruff <r-woodruff2@ti.com>
- *  Paul Walmsley
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This file contains clock data that is common to both the OMAP2xxx and
- * OMAP3xxx clock definition files.
- */
-
-#include "clock.h"
-
-/* clksel_rate data common to 24xx/343x */
-const struct clksel_rate gpt_32k_rates[] = {
-	 { .div = 1, .val = 0, .flags = RATE_IN_24XX | RATE_IN_3XXX },
-	 { .div = 0 }
-};
-
-const struct clksel_rate gpt_sys_rates[] = {
-	 { .div = 1, .val = 1, .flags = RATE_IN_24XX | RATE_IN_3XXX },
-	 { .div = 0 }
-};
-
-const struct clksel_rate gfx_l3_rates[] = {
-	{ .div = 1, .val = 1, .flags = RATE_IN_24XX | RATE_IN_3XXX },
-	{ .div = 2, .val = 2, .flags = RATE_IN_24XX | RATE_IN_3XXX },
-	{ .div = 3, .val = 3, .flags = RATE_IN_243X | RATE_IN_3XXX },
-	{ .div = 4, .val = 4, .flags = RATE_IN_243X | RATE_IN_3XXX },
-	{ .div = 0 }
-};
-
-const struct clksel_rate dsp_ick_rates[] = {
-	{ .div = 1, .val = 1, .flags = RATE_IN_24XX },
-	{ .div = 2, .val = 2, .flags = RATE_IN_24XX },
-	{ .div = 3, .val = 3, .flags = RATE_IN_243X },
-	{ .div = 0 },
-};
-
-
-/* clksel_rate blocks shared between OMAP44xx and AM33xx */
-
-const struct clksel_rate div_1_0_rates[] = {
-	{ .div = 1, .val = 0, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 0 },
-};
-
-const struct clksel_rate div3_1to4_rates[] = {
-	{ .div = 1, .val = 0, .flags = RATE_IN_4430 },
-	{ .div = 2, .val = 1, .flags = RATE_IN_4430 },
-	{ .div = 4, .val = 2, .flags = RATE_IN_4430 },
-	{ .div = 0 },
-};
-
-const struct clksel_rate div_1_1_rates[] = {
-	{ .div = 1, .val = 1, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 0 },
-};
-
-const struct clksel_rate div_1_2_rates[] = {
-	{ .div = 1, .val = 2, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 0 },
-};
-
-const struct clksel_rate div_1_3_rates[] = {
-	{ .div = 1, .val = 3, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 0 },
-};
-
-const struct clksel_rate div_1_4_rates[] = {
-	{ .div = 1, .val = 4, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 0 },
-};
-
-const struct clksel_rate div31_1to31_rates[] = {
-	{ .div = 1, .val = 1, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 2, .val = 2, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 3, .val = 3, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 4, .val = 4, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 5, .val = 5, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 6, .val = 6, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 7, .val = 7, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 8, .val = 8, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 9, .val = 9, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 10, .val = 10, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 11, .val = 11, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 12, .val = 12, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 13, .val = 13, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 14, .val = 14, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 15, .val = 15, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 16, .val = 16, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 17, .val = 17, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 18, .val = 18, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 19, .val = 19, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 20, .val = 20, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 21, .val = 21, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 22, .val = 22, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 23, .val = 23, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 24, .val = 24, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 25, .val = 25, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 26, .val = 26, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 27, .val = 27, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 28, .val = 28, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 29, .val = 29, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 30, .val = 30, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 31, .val = 31, .flags = RATE_IN_4430 | RATE_IN_AM33XX },
-	{ .div = 0 },
-};
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index e1a56d8..1ed4be1 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -117,7 +117,6 @@ static void omap2_show_dma_caps(void)
 	u8 revision = dma_read(REVISION, 0) & 0xff;
 	printk(KERN_INFO "OMAP DMA hardware revision %d.%d\n",
 				revision >> 4, revision & 0xf);
-	return;
 }
 
 static unsigned configure_dma_errata(void)
diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
deleted file mode 100644
index 44e57ec..0000000
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * OMAP3/4 - specific DPLL control functions
- *
- * Copyright (C) 2009-2010 Texas Instruments, Inc.
- * Copyright (C) 2009-2010 Nokia Corporation
- *
- * Written by Paul Walmsley
- * Testing and integration fixes by Jouni Högander
- *
- * 36xx support added by Vishwanath BS, Richard Woodruff, and Nishanth
- * Menon
- *
- * Parts of this code are based on code written by
- * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/bitops.h>
-#include <linux/clkdev.h>
-
-#include "clockdomain.h"
-#include "clock.h"
-
-/* CM_AUTOIDLE_PLL*.AUTO_* bit values */
-#define DPLL_AUTOIDLE_DISABLE			0x0
-#define DPLL_AUTOIDLE_LOW_POWER_STOP		0x1
-
-#define MAX_DPLL_WAIT_TRIES		1000000
-
-/* Private functions */
-
-/* _omap3_dpll_write_clken - write clken_bits arg to a DPLL's enable bits */
-static void _omap3_dpll_write_clken(struct clk_hw_omap *clk, u8 clken_bits)
-{
-	const struct dpll_data *dd;
-	u32 v;
-
-	dd = clk->dpll_data;
-
-	v = omap2_clk_readl(clk, dd->control_reg);
-	v &= ~dd->enable_mask;
-	v |= clken_bits << __ffs(dd->enable_mask);
-	omap2_clk_writel(v, clk, dd->control_reg);
-}
-
-/* _omap3_wait_dpll_status: wait for a DPLL to enter a specific state */
-static int _omap3_wait_dpll_status(struct clk_hw_omap *clk, u8 state)
-{
-	const struct dpll_data *dd;
-	int i = 0;
-	int ret = -EINVAL;
-	const char *clk_name;
-
-	dd = clk->dpll_data;
-	clk_name = __clk_get_name(clk->hw.clk);
-
-	state <<= __ffs(dd->idlest_mask);
-
-	while (((omap2_clk_readl(clk, dd->idlest_reg) & dd->idlest_mask)
-		!= state) && i < MAX_DPLL_WAIT_TRIES) {
-		i++;
-		udelay(1);
-	}
-
-	if (i == MAX_DPLL_WAIT_TRIES) {
-		printk(KERN_ERR "clock: %s failed transition to '%s'\n",
-		       clk_name, (state) ? "locked" : "bypassed");
-	} else {
-		pr_debug("clock: %s transition to '%s' in %d loops\n",
-			 clk_name, (state) ? "locked" : "bypassed", i);
-
-		ret = 0;
-	}
-
-	return ret;
-}
-
-/* From 3430 TRM ES2 4.7.6.2 */
-static u16 _omap3_dpll_compute_freqsel(struct clk_hw_omap *clk, u8 n)
-{
-	unsigned long fint;
-	u16 f = 0;
-
-	fint = __clk_get_rate(clk->dpll_data->clk_ref) / n;
-
-	pr_debug("clock: fint is %lu\n", fint);
-
-	if (fint >= 750000 && fint <= 1000000)
-		f = 0x3;
-	else if (fint > 1000000 && fint <= 1250000)
-		f = 0x4;
-	else if (fint > 1250000 && fint <= 1500000)
-		f = 0x5;
-	else if (fint > 1500000 && fint <= 1750000)
-		f = 0x6;
-	else if (fint > 1750000 && fint <= 2100000)
-		f = 0x7;
-	else if (fint > 7500000 && fint <= 10000000)
-		f = 0xB;
-	else if (fint > 10000000 && fint <= 12500000)
-		f = 0xC;
-	else if (fint > 12500000 && fint <= 15000000)
-		f = 0xD;
-	else if (fint > 15000000 && fint <= 17500000)
-		f = 0xE;
-	else if (fint > 17500000 && fint <= 21000000)
-		f = 0xF;
-	else
-		pr_debug("clock: unknown freqsel setting for %d\n", n);
-
-	return f;
-}
-
-/*
- * _omap3_noncore_dpll_lock - instruct a DPLL to lock and wait for readiness
- * @clk: pointer to a DPLL struct clk
- *
- * Instructs a non-CORE DPLL to lock.  Waits for the DPLL to report
- * readiness before returning.  Will save and restore the DPLL's
- * autoidle state across the enable, per the CDP code.  If the DPLL
- * locked successfully, return 0; if the DPLL did not lock in the time
- * allotted, or DPLL3 was passed in, return -EINVAL.
- */
-static int _omap3_noncore_dpll_lock(struct clk_hw_omap *clk)
-{
-	const struct dpll_data *dd;
-	u8 ai;
-	u8 state = 1;
-	int r = 0;
-
-	pr_debug("clock: locking DPLL %s\n", __clk_get_name(clk->hw.clk));
-
-	dd = clk->dpll_data;
-	state <<= __ffs(dd->idlest_mask);
-
-	/* Check if already locked */
-	if ((omap2_clk_readl(clk, dd->idlest_reg) & dd->idlest_mask) == state)
-		goto done;
-
-	ai = omap3_dpll_autoidle_read(clk);
-
-	if (ai)
-		omap3_dpll_deny_idle(clk);
-
-	_omap3_dpll_write_clken(clk, DPLL_LOCKED);
-
-	r = _omap3_wait_dpll_status(clk, 1);
-
-	if (ai)
-		omap3_dpll_allow_idle(clk);
-
-done:
-	return r;
-}
-
-/*
- * _omap3_noncore_dpll_bypass - instruct a DPLL to bypass and wait for readiness
- * @clk: pointer to a DPLL struct clk
- *
- * Instructs a non-CORE DPLL to enter low-power bypass mode.  In
- * bypass mode, the DPLL's rate is set equal to its parent clock's
- * rate.  Waits for the DPLL to report readiness before returning.
- * Will save and restore the DPLL's autoidle state across the enable,
- * per the CDP code.  If the DPLL entered bypass mode successfully,
- * return 0; if the DPLL did not enter bypass in the time allotted, or
- * DPLL3 was passed in, or the DPLL does not support low-power bypass,
- * return -EINVAL.
- */
-static int _omap3_noncore_dpll_bypass(struct clk_hw_omap *clk)
-{
-	int r;
-	u8 ai;
-
-	if (!(clk->dpll_data->modes & (1 << DPLL_LOW_POWER_BYPASS)))
-		return -EINVAL;
-
-	pr_debug("clock: configuring DPLL %s for low-power bypass\n",
-		 __clk_get_name(clk->hw.clk));
-
-	ai = omap3_dpll_autoidle_read(clk);
-
-	_omap3_dpll_write_clken(clk, DPLL_LOW_POWER_BYPASS);
-
-	r = _omap3_wait_dpll_status(clk, 0);
-
-	if (ai)
-		omap3_dpll_allow_idle(clk);
-
-	return r;
-}
-
-/*
- * _omap3_noncore_dpll_stop - instruct a DPLL to stop
- * @clk: pointer to a DPLL struct clk
- *
- * Instructs a non-CORE DPLL to enter low-power stop. Will save and
- * restore the DPLL's autoidle state across the stop, per the CDP
- * code.  If DPLL3 was passed in, or the DPLL does not support
- * low-power stop, return -EINVAL; otherwise, return 0.
- */
-static int _omap3_noncore_dpll_stop(struct clk_hw_omap *clk)
-{
-	u8 ai;
-
-	if (!(clk->dpll_data->modes & (1 << DPLL_LOW_POWER_STOP)))
-		return -EINVAL;
-
-	pr_debug("clock: stopping DPLL %s\n", __clk_get_name(clk->hw.clk));
-
-	ai = omap3_dpll_autoidle_read(clk);
-
-	_omap3_dpll_write_clken(clk, DPLL_LOW_POWER_STOP);
-
-	if (ai)
-		omap3_dpll_allow_idle(clk);
-
-	return 0;
-}
-
-/**
- * _lookup_dco - Lookup DCO used by j-type DPLL
- * @clk: pointer to a DPLL struct clk
- * @dco: digital control oscillator selector
- * @m: DPLL multiplier to set
- * @n: DPLL divider to set
- *
- * See 36xx TRM section 3.5.3.3.3.2 "Type B DPLL (Low-Jitter)"
- *
- * XXX This code is not needed for 3430/AM35xx; can it be optimized
- * out in non-multi-OMAP builds for those chips?
- */
-static void _lookup_dco(struct clk_hw_omap *clk, u8 *dco, u16 m, u8 n)
-{
-	unsigned long fint, clkinp; /* watch out for overflow */
-
-	clkinp = __clk_get_rate(__clk_get_parent(clk->hw.clk));
-	fint = (clkinp / n) * m;
-
-	if (fint < 1000000000)
-		*dco = 2;
-	else
-		*dco = 4;
-}
-
-/**
- * _lookup_sddiv - Calculate sigma delta divider for j-type DPLL
- * @clk: pointer to a DPLL struct clk
- * @sd_div: target sigma-delta divider
- * @m: DPLL multiplier to set
- * @n: DPLL divider to set
- *
- * See 36xx TRM section 3.5.3.3.3.2 "Type B DPLL (Low-Jitter)"
- *
- * XXX This code is not needed for 3430/AM35xx; can it be optimized
- * out in non-multi-OMAP builds for those chips?
- */
-static void _lookup_sddiv(struct clk_hw_omap *clk, u8 *sd_div, u16 m, u8 n)
-{
-	unsigned long clkinp, sd; /* watch out for overflow */
-	int mod1, mod2;
-
-	clkinp = __clk_get_rate(__clk_get_parent(clk->hw.clk));
-
-	/*
-	 * target sigma-delta to near 250MHz
-	 * sd = ceil[(m/(n+1)) * (clkinp_MHz / 250)]
-	 */
-	clkinp /= 100000; /* shift from MHz to 10*Hz for 38.4 and 19.2 */
-	mod1 = (clkinp * m) % (250 * n);
-	sd = (clkinp * m) / (250 * n);
-	mod2 = sd % 10;
-	sd /= 10;
-
-	if (mod1 || mod2)
-		sd++;
-	*sd_div = sd;
-}
-
-/*
- * _omap3_noncore_dpll_program - set non-core DPLL M,N values directly
- * @clk:	struct clk * of DPLL to set
- * @freqsel:	FREQSEL value to set
- *
- * Program the DPLL with the last M, N values calculated, and wait for
- * the DPLL to lock. Returns -EINVAL upon error, or 0 upon success.
- */
-static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
-{
-	struct dpll_data *dd = clk->dpll_data;
-	u8 dco, sd_div;
-	u32 v;
-
-	/* 3430 ES2 TRM: 4.7.6.9 DPLL Programming Sequence */
-	_omap3_noncore_dpll_bypass(clk);
-
-	/*
-	 * Set jitter correction. Jitter correction applicable for OMAP343X
-	 * only since freqsel field is no longer present on other devices.
-	 */
-	if (ti_clk_features.flags & TI_CLK_DPLL_HAS_FREQSEL) {
-		v = omap2_clk_readl(clk, dd->control_reg);
-		v &= ~dd->freqsel_mask;
-		v |= freqsel << __ffs(dd->freqsel_mask);
-		omap2_clk_writel(v, clk, dd->control_reg);
-	}
-
-	/* Set DPLL multiplier, divider */
-	v = omap2_clk_readl(clk, dd->mult_div1_reg);
-
-	/* Handle Duty Cycle Correction */
-	if (dd->dcc_mask) {
-		if (dd->last_rounded_rate >= dd->dcc_rate)
-			v |= dd->dcc_mask; /* Enable DCC */
-		else
-			v &= ~dd->dcc_mask; /* Disable DCC */
-	}
-
-	v &= ~(dd->mult_mask | dd->div1_mask);
-	v |= dd->last_rounded_m << __ffs(dd->mult_mask);
-	v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);
-
-	/* Configure dco and sd_div for dplls that have these fields */
-	if (dd->dco_mask) {
-		_lookup_dco(clk, &dco, dd->last_rounded_m, dd->last_rounded_n);
-		v &= ~(dd->dco_mask);
-		v |= dco << __ffs(dd->dco_mask);
-	}
-	if (dd->sddiv_mask) {
-		_lookup_sddiv(clk, &sd_div, dd->last_rounded_m,
-			      dd->last_rounded_n);
-		v &= ~(dd->sddiv_mask);
-		v |= sd_div << __ffs(dd->sddiv_mask);
-	}
-
-	omap2_clk_writel(v, clk, dd->mult_div1_reg);
-
-	/* Set 4X multiplier and low-power mode */
-	if (dd->m4xen_mask || dd->lpmode_mask) {
-		v = omap2_clk_readl(clk, dd->control_reg);
-
-		if (dd->m4xen_mask) {
-			if (dd->last_rounded_m4xen)
-				v |= dd->m4xen_mask;
-			else
-				v &= ~dd->m4xen_mask;
-		}
-
-		if (dd->lpmode_mask) {
-			if (dd->last_rounded_lpmode)
-				v |= dd->lpmode_mask;
-			else
-				v &= ~dd->lpmode_mask;
-		}
-
-		omap2_clk_writel(v, clk, dd->control_reg);
-	}
-
-	/* We let the clock framework set the other output dividers later */
-
-	/* REVISIT: Set ramp-up delay? */
-
-	_omap3_noncore_dpll_lock(clk);
-
-	return 0;
-}
-
-/* Public functions */
-
-/**
- * omap3_dpll_recalc - recalculate DPLL rate
- * @clk: DPLL struct clk
- *
- * Recalculate and propagate the DPLL rate.
- */
-unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-
-	return omap2_get_dpll_rate(clk);
-}
-
-/* Non-CORE DPLL (e.g., DPLLs that do not control SDRC) clock functions */
-
-/**
- * omap3_noncore_dpll_enable - instruct a DPLL to enter bypass or lock mode
- * @clk: pointer to a DPLL struct clk
- *
- * Instructs a non-CORE DPLL to enable, e.g., to enter bypass or lock.
- * The choice of modes depends on the DPLL's programmed rate: if it is
- * the same as the DPLL's parent clock, it will enter bypass;
- * otherwise, it will enter lock.  This code will wait for the DPLL to
- * indicate readiness before returning, unless the DPLL takes too long
- * to enter the target state.  Intended to be used as the struct clk's
- * enable function.  If DPLL3 was passed in, or the DPLL does not
- * support low-power stop, or if the DPLL took too long to enter
- * bypass or lock, return -EINVAL; otherwise, return 0.
- */
-int omap3_noncore_dpll_enable(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	int r;
-	struct dpll_data *dd;
-	struct clk_hw *parent;
-
-	dd = clk->dpll_data;
-	if (!dd)
-		return -EINVAL;
-
-	if (clk->clkdm) {
-		r = clkdm_clk_enable(clk->clkdm, hw->clk);
-		if (r) {
-			WARN(1,
-			     "%s: could not enable %s's clockdomain %s: %d\n",
-			     __func__, __clk_get_name(hw->clk),
-			     clk->clkdm->name, r);
-			return r;
-		}
-	}
-
-	parent = __clk_get_hw(__clk_get_parent(hw->clk));
-
-	if (__clk_get_rate(hw->clk) == __clk_get_rate(dd->clk_bypass)) {
-		WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
-		r = _omap3_noncore_dpll_bypass(clk);
-	} else {
-		WARN_ON(parent != __clk_get_hw(dd->clk_ref));
-		r = _omap3_noncore_dpll_lock(clk);
-	}
-
-	return r;
-}
-
-/**
- * omap3_noncore_dpll_disable - instruct a DPLL to enter low-power stop
- * @clk: pointer to a DPLL struct clk
- *
- * Instructs a non-CORE DPLL to enter low-power stop.  This function is
- * intended for use in struct clkops.  No return value.
- */
-void omap3_noncore_dpll_disable(struct clk_hw *hw)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-
-	_omap3_noncore_dpll_stop(clk);
-	if (clk->clkdm)
-		clkdm_clk_disable(clk->clkdm, hw->clk);
-}
-
-
-/* Non-CORE DPLL rate set code */
-
-/**
- * omap3_noncore_dpll_determine_rate - determine rate for a DPLL
- * @hw: pointer to the clock to determine rate for
- * @rate: target rate for the DPLL
- * @best_parent_rate: pointer for returning best parent rate
- * @best_parent_clk: pointer for returning best parent clock
- *
- * Determines which DPLL mode to use for reaching a desired target rate.
- * Checks whether the DPLL shall be in bypass or locked mode, and if
- * locked, calculates the M,N values for the DPLL via round-rate.
- * Returns a positive clock rate with success, negative error value
- * in failure.
- */
-long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
-				       unsigned long min_rate,
-				       unsigned long max_rate,
-				       unsigned long *best_parent_rate,
-				       struct clk_hw **best_parent_clk)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	struct dpll_data *dd;
-
-	if (!hw || !rate)
-		return -EINVAL;
-
-	dd = clk->dpll_data;
-	if (!dd)
-		return -EINVAL;
-
-	if (__clk_get_rate(dd->clk_bypass) == rate &&
-	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
-	} else {
-		rate = omap2_dpll_round_rate(hw, rate, best_parent_rate);
-		*best_parent_clk = __clk_get_hw(dd->clk_ref);
-	}
-
-	*best_parent_rate = rate;
-
-	return rate;
-}
-
-/**
- * omap3_noncore_dpll_set_parent - set parent for a DPLL clock
- * @hw: pointer to the clock to set parent for
- * @index: parent index to select
- *
- * Sets parent for a DPLL clock. This sets the DPLL into bypass or
- * locked mode. Returns 0 with success, negative error value otherwise.
- */
-int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	int ret;
-
-	if (!hw)
-		return -EINVAL;
-
-	if (index)
-		ret = _omap3_noncore_dpll_bypass(clk);
-	else
-		ret = _omap3_noncore_dpll_lock(clk);
-
-	return ret;
-}
-
-/**
- * omap3_noncore_dpll_set_rate - set rate for a DPLL clock
- * @hw: pointer to the clock to set parent for
- * @rate: target rate for the clock
- * @parent_rate: rate of the parent clock
- *
- * Sets rate for a DPLL clock. First checks if the clock parent is
- * reference clock (in bypass mode, the rate of the clock can't be
- * changed) and proceeds with the rate change operation. Returns 0
- * with success, negative error value otherwise.
- */
-int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	struct dpll_data *dd;
-	u16 freqsel = 0;
-	int ret;
-
-	if (!hw || !rate)
-		return -EINVAL;
-
-	dd = clk->dpll_data;
-	if (!dd)
-		return -EINVAL;
-
-	if (__clk_get_hw(__clk_get_parent(hw->clk)) !=
-	    __clk_get_hw(dd->clk_ref))
-		return -EINVAL;
-
-	if (dd->last_rounded_rate == 0)
-		return -EINVAL;
-
-	/* Freqsel is available only on OMAP343X devices */
-	if (ti_clk_features.flags & TI_CLK_DPLL_HAS_FREQSEL) {
-		freqsel = _omap3_dpll_compute_freqsel(clk, dd->last_rounded_n);
-		WARN_ON(!freqsel);
-	}
-
-	pr_debug("%s: %s: set rate: locking rate to %lu.\n", __func__,
-		 __clk_get_name(hw->clk), rate);
-
-	ret = omap3_noncore_dpll_program(clk, freqsel);
-
-	return ret;
-}
-
-/**
- * omap3_noncore_dpll_set_rate_and_parent - set rate and parent for a DPLL clock
- * @hw: pointer to the clock to set rate and parent for
- * @rate: target rate for the DPLL
- * @parent_rate: clock rate of the DPLL parent
- * @index: new parent index for the DPLL, 0 - reference, 1 - bypass
- *
- * Sets rate and parent for a DPLL clock. If new parent is the bypass
- * clock, only selects the parent. Otherwise proceeds with a rate
- * change, as this will effectively also change the parent as the
- * DPLL is put into locked mode. Returns 0 with success, negative error
- * value otherwise.
- */
-int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw,
-					   unsigned long rate,
-					   unsigned long parent_rate,
-					   u8 index)
-{
-	int ret;
-
-	if (!hw || !rate)
-		return -EINVAL;
-
-	/*
-	 * clk-ref at index[0], in which case we only need to set rate,
-	 * the parent will be changed automatically with the lock sequence.
-	 * With clk-bypass case we only need to change parent.
-	 */
-	if (index)
-		ret = omap3_noncore_dpll_set_parent(hw, index);
-	else
-		ret = omap3_noncore_dpll_set_rate(hw, rate, parent_rate);
-
-	return ret;
-}
-
-/* DPLL autoidle read/set code */
-
-/**
- * omap3_dpll_autoidle_read - read a DPLL's autoidle bits
- * @clk: struct clk * of the DPLL to read
- *
- * Return the DPLL's autoidle bits, shifted down to bit 0.  Returns
- * -EINVAL if passed a null pointer or if the struct clk does not
- * appear to refer to a DPLL.
- */
-u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk)
-{
-	const struct dpll_data *dd;
-	u32 v;
-
-	if (!clk || !clk->dpll_data)
-		return -EINVAL;
-
-	dd = clk->dpll_data;
-
-	if (!dd->autoidle_reg)
-		return -EINVAL;
-
-	v = omap2_clk_readl(clk, dd->autoidle_reg);
-	v &= dd->autoidle_mask;
-	v >>= __ffs(dd->autoidle_mask);
-
-	return v;
-}
-
-/**
- * omap3_dpll_allow_idle - enable DPLL autoidle bits
- * @clk: struct clk * of the DPLL to operate on
- *
- * Enable DPLL automatic idle control.  This automatic idle mode
- * switching takes effect only when the DPLL is locked, at least on
- * OMAP3430.  The DPLL will enter low-power stop when its downstream
- * clocks are gated.  No return value.
- */
-void omap3_dpll_allow_idle(struct clk_hw_omap *clk)
-{
-	const struct dpll_data *dd;
-	u32 v;
-
-	if (!clk || !clk->dpll_data)
-		return;
-
-	dd = clk->dpll_data;
-
-	if (!dd->autoidle_reg)
-		return;
-
-	/*
-	 * REVISIT: CORE DPLL can optionally enter low-power bypass
-	 * by writing 0x5 instead of 0x1.  Add some mechanism to
-	 * optionally enter this mode.
-	 */
-	v = omap2_clk_readl(clk, dd->autoidle_reg);
-	v &= ~dd->autoidle_mask;
-	v |= DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask);
-	omap2_clk_writel(v, clk, dd->autoidle_reg);
-
-}
-
-/**
- * omap3_dpll_deny_idle - prevent DPLL from automatically idling
- * @clk: struct clk * of the DPLL to operate on
- *
- * Disable DPLL automatic idle control.  No return value.
- */
-void omap3_dpll_deny_idle(struct clk_hw_omap *clk)
-{
-	const struct dpll_data *dd;
-	u32 v;
-
-	if (!clk || !clk->dpll_data)
-		return;
-
-	dd = clk->dpll_data;
-
-	if (!dd->autoidle_reg)
-		return;
-
-	v = omap2_clk_readl(clk, dd->autoidle_reg);
-	v &= ~dd->autoidle_mask;
-	v |= DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask);
-	omap2_clk_writel(v, clk, dd->autoidle_reg);
-
-}
-
-/* Clock control for DPLL outputs */
-
-/* Find the parent DPLL for the given clkoutx2 clock */
-static struct clk_hw_omap *omap3_find_clkoutx2_dpll(struct clk_hw *hw)
-{
-	struct clk_hw_omap *pclk = NULL;
-	struct clk *parent;
-
-	/* Walk up the parents of clk, looking for a DPLL */
-	do {
-		do {
-			parent = __clk_get_parent(hw->clk);
-			hw = __clk_get_hw(parent);
-		} while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC));
-		if (!hw)
-			break;
-		pclk = to_clk_hw_omap(hw);
-	} while (pclk && !pclk->dpll_data);
-
-	/* clk does not have a DPLL as a parent?  error in the clock data */
-	if (!pclk) {
-		WARN_ON(1);
-		return NULL;
-	}
-
-	return pclk;
-}
-
-/**
- * omap3_clkoutx2_recalc - recalculate DPLL X2 output virtual clock rate
- * @clk: DPLL output struct clk
- *
- * Using parent clock DPLL data, look up DPLL state.  If locked, set our
- * rate to the dpll_clk * 2; otherwise, just use dpll_clk.
- */
-unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw,
-				    unsigned long parent_rate)
-{
-	const struct dpll_data *dd;
-	unsigned long rate;
-	u32 v;
-	struct clk_hw_omap *pclk = NULL;
-
-	if (!parent_rate)
-		return 0;
-
-	pclk = omap3_find_clkoutx2_dpll(hw);
-
-	if (!pclk)
-		return 0;
-
-	dd = pclk->dpll_data;
-
-	WARN_ON(!dd->enable_mask);
-
-	v = omap2_clk_readl(pclk, dd->control_reg) & dd->enable_mask;
-	v >>= __ffs(dd->enable_mask);
-	if ((v != OMAP3XXX_EN_DPLL_LOCKED) || (dd->flags & DPLL_J_TYPE))
-		rate = parent_rate;
-	else
-		rate = parent_rate * 2;
-	return rate;
-}
-
-int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate,
-					unsigned long parent_rate)
-{
-	return 0;
-}
-
-long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *prate)
-{
-	const struct dpll_data *dd;
-	u32 v;
-	struct clk_hw_omap *pclk = NULL;
-
-	if (!*prate)
-		return 0;
-
-	pclk = omap3_find_clkoutx2_dpll(hw);
-
-	if (!pclk)
-		return 0;
-
-	dd = pclk->dpll_data;
-
-	/* TYPE J does not have a clkoutx2 */
-	if (dd->flags & DPLL_J_TYPE) {
-		*prate = __clk_round_rate(__clk_get_parent(pclk->hw.clk), rate);
-		return *prate;
-	}
-
-	WARN_ON(!dd->enable_mask);
-
-	v = omap2_clk_readl(pclk, dd->control_reg) & dd->enable_mask;
-	v >>= __ffs(dd->enable_mask);
-
-	/* If in bypass, the rate is fixed to the bypass rate*/
-	if (v != OMAP3XXX_EN_DPLL_LOCKED)
-		return *prate;
-
-	if (__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT) {
-		unsigned long best_parent;
-
-		best_parent = (rate / 2);
-		*prate = __clk_round_rate(__clk_get_parent(hw->clk),
-				best_parent);
-	}
-
-	return *prate * 2;
-}
-
-/* OMAP3/4 non-CORE DPLL clkops */
-const struct clk_hw_omap_ops clkhwops_omap3_dpll = {
-	.allow_idle	= omap3_dpll_allow_idle,
-	.deny_idle	= omap3_dpll_deny_idle,
-};
diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c
deleted file mode 100644
index f231be0..0000000
--- a/arch/arm/mach-omap2/dpll44xx.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * OMAP4-specific DPLL control functions
- *
- * Copyright (C) 2011 Texas Instruments, Inc.
- * Rajendra Nayak
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/bitops.h>
-
-#include "clock.h"
-
-/*
- * Maximum DPLL input frequency (FINT) and output frequency (FOUT) that
- * can supported when using the DPLL low-power mode. Frequencies are
- * defined in OMAP4430/60 Public TRM section 3.6.3.3.2 "Enable Control,
- * Status, and Low-Power Operation Mode".
- */
-#define OMAP4_DPLL_LP_FINT_MAX	1000000
-#define OMAP4_DPLL_LP_FOUT_MAX	100000000
-
-/*
- * Bitfield declarations
- */
-#define OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK		(1 << 8)
-#define OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK		(1 << 10)
-#define OMAP4430_DPLL_REGM4XEN_MASK			(1 << 11)
-
-/* Static rate multiplier for OMAP4 REGM4XEN clocks */
-#define OMAP4430_REGM4XEN_MULT				4
-
-void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk)
-{
-	u32 v;
-	u32 mask;
-
-	if (!clk || !clk->clksel_reg)
-		return;
-
-	mask = clk->flags & CLOCK_CLKOUTX2 ?
-			OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK :
-			OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK;
-
-	v = omap2_clk_readl(clk, clk->clksel_reg);
-	/* Clear the bit to allow gatectrl */
-	v &= ~mask;
-	omap2_clk_writel(v, clk, clk->clksel_reg);
-}
-
-void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk)
-{
-	u32 v;
-	u32 mask;
-
-	if (!clk || !clk->clksel_reg)
-		return;
-
-	mask = clk->flags & CLOCK_CLKOUTX2 ?
-			OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK :
-			OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK;
-
-	v = omap2_clk_readl(clk, clk->clksel_reg);
-	/* Set the bit to deny gatectrl */
-	v |= mask;
-	omap2_clk_writel(v, clk, clk->clksel_reg);
-}
-
-const struct clk_hw_omap_ops clkhwops_omap4_dpllmx = {
-	.allow_idle	= omap4_dpllmx_allow_gatectrl,
-	.deny_idle      = omap4_dpllmx_deny_gatectrl,
-};
-
-/**
- * omap4_dpll_lpmode_recalc - compute DPLL low-power setting
- * @dd: pointer to the dpll data structure
- *
- * Calculates if low-power mode can be enabled based upon the last
- * multiplier and divider values calculated. If low-power mode can be
- * enabled, then the bit to enable low-power mode is stored in the
- * last_rounded_lpmode variable. This implementation is based upon the
- * criteria for enabling low-power mode as described in the OMAP4430/60
- * Public TRM section 3.6.3.3.2 "Enable Control, Status, and Low-Power
- * Operation Mode".
- */
-static void omap4_dpll_lpmode_recalc(struct dpll_data *dd)
-{
-	long fint, fout;
-
-	fint = __clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
-	fout = fint * dd->last_rounded_m;
-
-	if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX))
-		dd->last_rounded_lpmode = 1;
-	else
-		dd->last_rounded_lpmode = 0;
-}
-
-/**
- * omap4_dpll_regm4xen_recalc - compute DPLL rate, considering REGM4XEN bit
- * @clk: struct clk * of the DPLL to compute the rate for
- *
- * Compute the output rate for the OMAP4 DPLL represented by @clk.
- * Takes the REGM4XEN bit into consideration, which is needed for the
- * OMAP4 ABE DPLL.  Returns the DPLL's output rate (before M-dividers)
- * upon success, or 0 upon error.
- */
-unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
-			unsigned long parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	u32 v;
-	unsigned long rate;
-	struct dpll_data *dd;
-
-	if (!clk || !clk->dpll_data)
-		return 0;
-
-	dd = clk->dpll_data;
-
-	rate = omap2_get_dpll_rate(clk);
-
-	/* regm4xen adds a multiplier of 4 to DPLL calculations */
-	v = omap2_clk_readl(clk, dd->control_reg);
-	if (v & OMAP4430_DPLL_REGM4XEN_MASK)
-		rate *= OMAP4430_REGM4XEN_MULT;
-
-	return rate;
-}
-
-/**
- * omap4_dpll_regm4xen_round_rate - round DPLL rate, considering REGM4XEN bit
- * @clk: struct clk * of the DPLL to round a rate for
- * @target_rate: the desired rate of the DPLL
- *
- * Compute the rate that would be programmed into the DPLL hardware
- * for @clk if set_rate() were to be provided with the rate
- * @target_rate.  Takes the REGM4XEN bit into consideration, which is
- * needed for the OMAP4 ABE DPLL.  Returns the rounded rate (before
- * M-dividers) upon success, -EINVAL if @clk is null or not a DPLL, or
- * ~0 if an error occurred in omap2_dpll_round_rate().
- */
-long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw,
-				    unsigned long target_rate,
-				    unsigned long *parent_rate)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	struct dpll_data *dd;
-	long r;
-
-	if (!clk || !clk->dpll_data)
-		return -EINVAL;
-
-	dd = clk->dpll_data;
-
-	dd->last_rounded_m4xen = 0;
-
-	/*
-	 * First try to compute the DPLL configuration for
-	 * target rate without using the 4X multiplier.
-	 */
-	r = omap2_dpll_round_rate(hw, target_rate, NULL);
-	if (r != ~0)
-		goto out;
-
-	/*
-	 * If we did not find a valid DPLL configuration, try again, but
-	 * this time see if using the 4X multiplier can help. Enabling the
-	 * 4X multiplier is equivalent to dividing the target rate by 4.
-	 */
-	r = omap2_dpll_round_rate(hw, target_rate / OMAP4430_REGM4XEN_MULT,
-				  NULL);
-	if (r == ~0)
-		return r;
-
-	dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT;
-	dd->last_rounded_m4xen = 1;
-
-out:
-	omap4_dpll_lpmode_recalc(dd);
-
-	return dd->last_rounded_rate;
-}
-
-/**
- * omap4_dpll_regm4xen_determine_rate - determine rate for a DPLL
- * @hw: pointer to the clock to determine rate for
- * @rate: target rate for the DPLL
- * @best_parent_rate: pointer for returning best parent rate
- * @best_parent_clk: pointer for returning best parent clock
- *
- * Determines which DPLL mode to use for reaching a desired rate.
- * Checks whether the DPLL shall be in bypass or locked mode, and if
- * locked, calculates the M,N values for the DPLL via round-rate.
- * Returns a positive clock rate with success, negative error value
- * in failure.
- */
-long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
-					unsigned long min_rate,
-					unsigned long max_rate,
-					unsigned long *best_parent_rate,
-					struct clk_hw **best_parent_clk)
-{
-	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
-	struct dpll_data *dd;
-
-	if (!hw || !rate)
-		return -EINVAL;
-
-	dd = clk->dpll_data;
-	if (!dd)
-		return -EINVAL;
-
-	if (__clk_get_rate(dd->clk_bypass) == rate &&
-	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
-	} else {
-		rate = omap4_dpll_regm4xen_round_rate(hw, rate,
-						      best_parent_rate);
-		*best_parent_clk = __clk_get_hw(dd->clk_ref);
-	}
-
-	*best_parent_rate = rate;
-
-	return rate;
-}
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 820dde8..a253aaf 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -37,7 +37,6 @@
 #include "clock.h"
 #include "clock2xxx.h"
 #include "clock3xxx.h"
-#include "clock44xx.h"
 #include "omap-pm.h"
 #include "sdrc.h"
 #include "control.h"
@@ -723,6 +722,8 @@ int __init omap_clk_init(void)
 
 	ti_clk_init_features();
 
+	omap2_clk_setup_ll_ops();
+
 	if (of_have_populated_dt()) {
 		ret = omap_control_init();
 		if (ret)
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 8e52621..e1d2e99 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = {
 	.irq_mask		= wakeupgen_mask,
 	.irq_unmask		= wakeupgen_unmask,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_type		= irq_chip_set_type_parent,
 	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index d78c12e..6ef9e63 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -130,6 +130,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/err.h>
@@ -2373,6 +2374,9 @@ static int of_dev_hwmod_lookup(struct device_node *np,
  * registers.  This address is needed early so the OCP registers that
  * are part of the device's address space can be ioremapped properly.
  *
+ * If SYSC access is not needed, the registers will not be remapped
+ * and non-availability of MPU access is not treated as an error.
+ *
  * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and
  * -ENXIO on absent or invalid register target address space.
  */
@@ -2387,6 +2391,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data,
 
 	_save_mpu_port_index(oh);
 
+	/* if we don't need sysc access we don't need to ioremap */
+	if (!oh->class->sysc)
+		return 0;
+
+	/* we can't continue without MPU PORT if we need sysc access */
 	if (oh->_int_flags & _HWMOD_NO_MPU_PORT)
 		return -ENXIO;
 
@@ -2396,8 +2405,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data,
 			 oh->name);
 
 		/* Extract the IO space from device tree blob */
-		if (!np)
+		if (!np) {
+			pr_err("omap_hwmod: %s: no dt node\n", oh->name);
 			return -ENXIO;
+		}
 
 		va_start = of_iomap(np, index + oh->mpu_rt_idx);
 	} else {
@@ -2456,13 +2467,11 @@ static int __init _init(struct omap_hwmod *oh, void *data)
 				oh->name, np->name);
 	}
 
-	if (oh->class->sysc) {
-		r = _init_mpu_rt_base(oh, NULL, index, np);
-		if (r < 0) {
-			WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n",
-			     oh->name);
-			return 0;
-		}
+	r = _init_mpu_rt_base(oh, NULL, index, np);
+	if (r < 0) {
+		WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n",
+		     oh->name);
+		return 0;
 	}
 
 	r = _init_clocks(oh, NULL);
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index 2606c66..562247b 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -827,8 +827,7 @@ static struct omap_hwmod_class_sysconfig dra7xx_gpmc_sysc = {
 	.syss_offs	= 0x0014,
 	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE |
 			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
-	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
-			   SIDLE_SMART_WKUP),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
 	.sysc_fields	= &omap_hwmod_sysc_type1,
 };
 
@@ -844,7 +843,7 @@ static struct omap_hwmod dra7xx_gpmc_hwmod = {
 	.class		= &dra7xx_gpmc_hwmod_class,
 	.clkdm_name	= "l3main1_clkdm",
 	/* Skip reset for CONFIG_OMAP_GPMC_DEBUG for bootloader timings */
-	.flags		= HWMOD_SWSUP_SIDLE | DEBUG_OMAP_GPMC_HWMOD_FLAGS,
+	.flags		= DEBUG_OMAP_GPMC_HWMOD_FLAGS,
 	.main_clk	= "l3_iclk_div",
 	.prcm = {
 		.omap4 = {
diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index b1aad7e..2a1a418 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -25,6 +25,7 @@
 #include <linux/sysfs.h>
 #include <linux/module.h>
 #include <linux/delay.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/irq.h>
 #include <linux/time.h>
diff --git a/arch/arm/mach-orion5x/board-dt.c b/arch/arm/mach-orion5x/board-dt.c
index 79f033b..d087178 100644
--- a/arch/arm/mach-orion5x/board-dt.c
+++ b/arch/arm/mach-orion5x/board-dt.c
@@ -16,7 +16,6 @@
 #include <linux/of_platform.h>
 #include <linux/cpu.h>
 #include <linux/mbus.h>
-#include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <asm/system_misc.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index e03d8b5..9ab8932 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -4,6 +4,7 @@ menuconfig ARCH_SIRF
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_IRQ_CHIP
 	select NO_IOPORT_MAP
+	select REGMAP
 	select PINCTRL
 	select PINCTRL_SIRF
 	help
diff --git a/arch/arm/mach-prima2/rtciobrg.c b/arch/arm/mach-prima2/rtciobrg.c
index 8f66d8f..d4852d2 100644
--- a/arch/arm/mach-prima2/rtciobrg.c
+++ b/arch/arm/mach-prima2/rtciobrg.c
@@ -1,5 +1,5 @@
 /*
- * RTC I/O Bridge interfaces for CSR SiRFprimaII
+ * RTC I/O Bridge interfaces for CSR SiRFprimaII/atlas7
  * ARM access the registers of SYSRTC, GPSRTC and PWRC through this module
  *
  * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/regmap.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
@@ -66,6 +67,7 @@ u32 sirfsoc_rtc_iobrg_readl(u32 addr)
 {
 	unsigned long flags, val;
 
+	/* TODO: add hwspinlock to sync with M3 */
 	spin_lock_irqsave(&rtciobrg_lock, flags);
 
 	val = __sirfsoc_rtc_iobrg_readl(addr);
@@ -90,6 +92,7 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr)
 {
 	unsigned long flags;
 
+	 /* TODO: add hwspinlock to sync with M3 */
 	spin_lock_irqsave(&rtciobrg_lock, flags);
 
 	sirfsoc_rtc_iobrg_pre_writel(val, addr);
@@ -102,6 +105,45 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr)
 }
 EXPORT_SYMBOL_GPL(sirfsoc_rtc_iobrg_writel);
 
+
+static int regmap_iobg_regwrite(void *context, unsigned int reg,
+				   unsigned int val)
+{
+	sirfsoc_rtc_iobrg_writel(val, reg);
+	return 0;
+}
+
+static int regmap_iobg_regread(void *context, unsigned int reg,
+				  unsigned int *val)
+{
+	*val = (u32)sirfsoc_rtc_iobrg_readl(reg);
+	return 0;
+}
+
+static struct regmap_bus regmap_iobg = {
+	.reg_write = regmap_iobg_regwrite,
+	.reg_read = regmap_iobg_regread,
+};
+
+/**
+ * devm_regmap_init_iobg(): Initialise managed register map
+ *
+ * @iobg: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_iobg(struct device *dev,
+				    const struct regmap_config *config)
+{
+	const struct regmap_bus *bus = &regmap_iobg;
+
+	return devm_regmap_init(dev, bus, dev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_iobg);
+
 static const struct of_device_id rtciobrg_ids[] = {
 	{ .compatible = "sirf,prima2-rtciobg" },
 	{}
@@ -132,7 +174,7 @@ static int __init sirfsoc_rtciobrg_init(void)
 }
 postcore_initcall(sirfsoc_rtciobrg_init);
 
-MODULE_AUTHOR("Zhiwu Song <zhiwu.song@csr.com>, "
-		"Barry Song <baohua.song@csr.com>");
+MODULE_AUTHOR("Zhiwu Song <zhiwu.song@csr.com>");
+MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
 MODULE_DESCRIPTION("CSR SiRFprimaII rtc io bridge");
 MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c
index c092730..bf366b3 100644
--- a/arch/arm/mach-pxa/capc7117.c
+++ b/arch/arm/mach-pxa/capc7117.c
@@ -24,6 +24,7 @@
 #include <linux/ata_platform.h>
 #include <linux/serial_8250.h>
 #include <linux/gpio.h>
+#include <linux/regulator/machine.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -144,6 +145,8 @@ static void __init capc7117_init(void)
 
 	capc7117_uarts_init();
 	capc7117_ide_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(CAPC7117,
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index bb99f59..a17a91e 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -13,6 +13,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
+#include <linux/regulator/machine.h>
 
 #include <linux/dm9000.h>
 #include <linux/leds.h>
@@ -466,6 +467,8 @@ static void __init cmx2xx_init(void)
 	cmx2xx_init_ac97();
 	cmx2xx_init_touchscreen();
 	cmx2xx_init_leds();
+
+	regulator_has_full_constraints();
 }
 
 static void __init cmx2xx_init_irq(void)
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 4d3588d..5851f4c 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -835,6 +835,8 @@ static void __init cm_x300_init(void)
 	cm_x300_init_ac97();
 	cm_x300_init_wi2wi();
 	cm_x300_init_bl();
+
+	regulator_has_full_constraints();
 }
 
 static void __init cm_x300_fixup(struct tag *tags, char **cmdline)
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index 5f9d930..3503826 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -18,6 +18,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/regulator/machine.h>
 #include <linux/ucb1400.h>
 
 #include <asm/mach/arch.h>
@@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void)
 		printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n",
 				colibri_pxa270_baseboard);
 	}
+
+	regulator_has_full_constraints();
 }
 
 /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 51531ec..9d7072b 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -1306,6 +1306,8 @@ static void __init em_x270_init(void)
 	em_x270_init_i2c();
 	em_x270_init_camera();
 	em_x270_userspace_consumers_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(EM_X270, "Compulab EM-X270")
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index c98511c..9b0eb02 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -26,6 +26,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/can/platform/mcp251x.h>
+#include <linux/regulator/machine.h>
 
 #include "generic.h"
 
@@ -185,6 +186,8 @@ static void __init icontrol_init(void)
 	mxm_8x10_mmc_init();
 
 	icontrol_can_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM")
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 872dcb2..066e3a2 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -26,6 +26,7 @@
 #include <linux/dm9000.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
+#include <linux/regulator/machine.h>
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/types.h>
@@ -534,6 +535,8 @@ static void __init trizeps4_init(void)
 
 	BCR_writew(trizeps_conxs_bcr);
 	board_backlight_power(1);
+
+	regulator_has_full_constraints();
 }
 
 static void __init trizeps4_map_io(void)
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index aa89488..54122a9 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -24,6 +24,7 @@
 #include <linux/dm9000.h>
 #include <linux/ucb1400.h>
 #include <linux/ata_platform.h>
+#include <linux/regulator/machine.h>
 #include <linux/regulator/max1586.h>
 #include <linux/i2c/pxa-i2c.h>
 
@@ -711,6 +712,8 @@ static void __init vpac270_init(void)
 	vpac270_ts_init();
 	vpac270_rtc_init();
 	vpac270_ide_init();
+
+	regulator_has_full_constraints();
 }
 
 MACHINE_START(VPAC270, "Voipac PXA270")
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index ac2ae5c..6158566f 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -868,6 +868,8 @@ static void __init zeus_init(void)
 	i2c_register_board_info(0, ARRAY_AND_SIZE(zeus_i2c_devices));
 	pxa2xx_set_spi_info(3, &pxa2xx_spi_ssp3_master_info);
 	spi_register_board_info(zeus_spi_board_info, ARRAY_SIZE(zeus_spi_board_info));
+
+	regulator_has_full_constraints();
 }
 
 static struct map_desc zeus_io_desc[] __initdata = {
diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c
index 16547f2..25d6676 100644
--- a/arch/arm/mach-s3c64xx/common.c
+++ b/arch/arm/mach-s3c64xx/common.c
@@ -21,7 +21,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/clk-provider.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/serial_core.h>
diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h
index a99d90a..0664091 100644
--- a/arch/arm/mach-spear/generic.h
+++ b/arch/arm/mach-spear/generic.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009-2012 ST Microelectronics
  * Rajeev Kumar <rajeev-dlh.kumar@st.com>
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/irqs.h b/arch/arm/mach-spear/include/mach/irqs.h
index 92da0a8..7058720 100644
--- a/arch/arm/mach-spear/include/mach/irqs.h
+++ b/arch/arm/mach-spear/include/mach/irqs.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009-2012 ST Microelectronics
  * Rajeev Kumar <rajeev-dlh.kumar@st.com>
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/misc_regs.h b/arch/arm/mach-spear/include/mach/misc_regs.h
index 935639c..cfaf7c6 100644
--- a/arch/arm/mach-spear/include/mach/misc_regs.h
+++ b/arch/arm/mach-spear/include/mach/misc_regs.h
@@ -4,7 +4,7 @@
  * Miscellaneous registers definitions for SPEAr3xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/spear.h b/arch/arm/mach-spear/include/mach/spear.h
index f2d6a01..5ed841c 100644
--- a/arch/arm/mach-spear/include/mach/spear.h
+++ b/arch/arm/mach-spear/include/mach/spear.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009,2012 ST Microelectronics
  * Rajeev Kumar<rajeev-dlh.kumar@st.com>
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/include/mach/uncompress.h b/arch/arm/mach-spear/include/mach/uncompress.h
index 51b2dc9..8439b9c 100644
--- a/arch/arm/mach-spear/include/mach/uncompress.h
+++ b/arch/arm/mach-spear/include/mach/uncompress.h
@@ -4,7 +4,7 @@
  * Serial port stubs for kernel decompress status messages
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/pl080.c b/arch/arm/mach-spear/pl080.c
index cfa1199..b4529f3 100644
--- a/arch/arm/mach-spear/pl080.c
+++ b/arch/arm/mach-spear/pl080.c
@@ -4,7 +4,7 @@
  * DMAC pl080 definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/pl080.h b/arch/arm/mach-spear/pl080.h
index eb6590d..608dec6 100644
--- a/arch/arm/mach-spear/pl080.h
+++ b/arch/arm/mach-spear/pl080.h
@@ -4,7 +4,7 @@
  * DMAC pl080 definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/restart.c b/arch/arm/mach-spear/restart.c
index ce5e098..b434215 100644
--- a/arch/arm/mach-spear/restart.c
+++ b/arch/arm/mach-spear/restart.c
@@ -4,7 +4,7 @@
  * SPEAr platform specific restart functions
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear1310.c b/arch/arm/mach-spear/spear1310.c
index d9ce4d8..cd5d375 100644
--- a/arch/arm/mach-spear/spear1310.c
+++ b/arch/arm/mach-spear/spear1310.c
@@ -4,7 +4,7 @@
  * SPEAr1310 machine source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear1340.c b/arch/arm/mach-spear/spear1340.c
index 3f3c0f1..94594d5 100644
--- a/arch/arm/mach-spear/spear1340.c
+++ b/arch/arm/mach-spear/spear1340.c
@@ -4,7 +4,7 @@
  * SPEAr1340 machine source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index 2e463a9..b7afce6 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -4,7 +4,7 @@
  * SPEAr13XX machines common source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear300.c b/arch/arm/mach-spear/spear300.c
index b52e48f..5b32edd 100644
--- a/arch/arm/mach-spear/spear300.c
+++ b/arch/arm/mach-spear/spear300.c
@@ -4,7 +4,7 @@
  * SPEAr300 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear310.c b/arch/arm/mach-spear/spear310.c
index ed2029d..86a44ac 100644
--- a/arch/arm/mach-spear/spear310.c
+++ b/arch/arm/mach-spear/spear310.c
@@ -4,7 +4,7 @@
  * SPEAr310 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear320.c b/arch/arm/mach-spear/spear320.c
index bf634b3..d45d751 100644
--- a/arch/arm/mach-spear/spear320.c
+++ b/arch/arm/mach-spear/spear320.c
@@ -4,7 +4,7 @@
  * SPEAr320 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c
index bf3b1fd..23394ac 100644
--- a/arch/arm/mach-spear/spear3xx.c
+++ b/arch/arm/mach-spear/spear3xx.c
@@ -4,7 +4,7 @@
  * SPEAr3XX machines common source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
+ * Viresh Kumar <vireshk@kernel.org>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index 81502b9..4efe2d4 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -35,7 +35,7 @@ config MACH_SUN7I
 	select SUN5I_HSTIMER
 
 config MACH_SUN8I
-	bool "Allwinner A23 (sun8i) SoCs support"
+	bool "Allwinner sun8i Family SoCs support"
 	default ARCH_SUNXI
 	select ARM_GIC
 	select MFD_SUN6I_PRCM
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 1bc811a..65bab28 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -67,10 +67,13 @@ MACHINE_END
 
 static const char * const sun8i_board_dt_compat[] = {
 	"allwinner,sun8i-a23",
+	"allwinner,sun8i-a33",
+	"allwinner,sun8i-h3",
 	NULL,
 };
 
-DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i (A23) Family")
+DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family")
+	.init_time	= sun6i_timer_init,
 	.dt_compat	= sun8i_board_dt_compat,
 	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 5d1a318..0fa4c5f 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -8,6 +8,7 @@ menuconfig ARCH_TEGRA
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
 	select PINCTRL
+	select PM_OPP
 	select ARCH_HAS_RESET_CONTROLLER
 	select RESET_CONTROLLER
 	select SOC_BUS
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index e31d3d6..b316e18 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -72,21 +72,12 @@ void __init ux500_init_irq(void)
 	 * Init clocks here so that they are available for system timer
 	 * initialization.
 	 */
-	if (cpu_is_u8500_family()) {
-		u8500_of_clk_init(U8500_CLKRST1_BASE,
-				  U8500_CLKRST2_BASE,
-				  U8500_CLKRST3_BASE,
-				  U8500_CLKRST5_BASE,
-				  U8500_CLKRST6_BASE);
-	} else if (cpu_is_u9540()) {
-		u9540_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE,
-			       U8500_CLKRST3_BASE, U8500_CLKRST5_BASE,
-			       U8500_CLKRST6_BASE);
-	} else if (cpu_is_u8540()) {
-		u8540_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE,
-			       U8500_CLKRST3_BASE, U8500_CLKRST5_BASE,
-			       U8500_CLKRST6_BASE);
-	}
+	if (cpu_is_u8500_family())
+		u8500_clk_init();
+	else if (cpu_is_u9540())
+		u9540_clk_init();
+	else if (cpu_is_u8540())
+		u8540_clk_init();
 }
 
 static const char * __init ux500_get_machine(void)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1ced8a0..cba12f3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1971,7 +1971,7 @@ static int extend_iommu_mapping(struct dma_iommu_mapping *mapping)
 {
 	int next_bitmap;
 
-	if (mapping->nr_bitmaps > mapping->extensions)
+	if (mapping->nr_bitmaps >= mapping->extensions)
 		return -EINVAL;
 
 	next_bitmap = mapping->nr_bitmaps;
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index d1e5ad7..0c81056 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -255,7 +255,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn,
 }
 #endif
 
-void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
+static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
 	unsigned long offset, size_t size, unsigned int mtype, void *caller)
 {
 	const struct mem_type *type;
@@ -363,7 +363,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
 		  unsigned int mtype)
 {
 	return __arm_ioremap_pfn_caller(pfn, offset, size, mtype,
-			__builtin_return_address(0));
+					__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
@@ -371,13 +371,26 @@ void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 				      unsigned int, void *) =
 	__arm_ioremap_caller;
 
-void __iomem *
-__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype)
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
 {
-	return arch_ioremap_caller(phys_addr, size, mtype,
-		__builtin_return_address(0));
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				   __builtin_return_address(0));
 }
-EXPORT_SYMBOL(__arm_ioremap);
+EXPORT_SYMBOL(ioremap_wc);
 
 /*
  * Remap an arbitrary physical address space into the kernel virtual
@@ -431,11 +444,11 @@ void __iounmap(volatile void __iomem *io_addr)
 
 void (*arch_iounmap)(volatile void __iomem *) = __iounmap;
 
-void __arm_iounmap(volatile void __iomem *io_addr)
+void iounmap(volatile void __iomem *cookie)
 {
-	arch_iounmap(io_addr);
+	arch_iounmap(cookie);
 }
-EXPORT_SYMBOL(__arm_iounmap);
+EXPORT_SYMBOL(iounmap);
 
 #ifdef CONFIG_PCI
 static int pci_ioremap_mem_type = MT_DEVICE;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6ca7d9a..870838a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1072,6 +1072,7 @@ void __init sanity_check_meminfo(void)
 	int highmem = 0;
 	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
 	struct memblock_region *reg;
+	bool should_use_highmem = false;
 
 	for_each_memblock(memory, reg) {
 		phys_addr_t block_start = reg->base;
@@ -1090,6 +1091,7 @@ void __init sanity_check_meminfo(void)
 				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
 					  &block_start, &block_end);
 				memblock_remove(reg->base, reg->size);
+				should_use_highmem = true;
 				continue;
 			}
 
@@ -1100,6 +1102,7 @@ void __init sanity_check_meminfo(void)
 					  &block_start, &block_end, &vmalloc_limit);
 				memblock_remove(vmalloc_limit, overlap_size);
 				block_end = vmalloc_limit;
+				should_use_highmem = true;
 			}
 		}
 
@@ -1134,6 +1137,9 @@ void __init sanity_check_meminfo(void)
 		}
 	}
 
+	if (should_use_highmem)
+		pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
@@ -1494,6 +1500,7 @@ void __init paging_init(const struct machine_desc *mdesc)
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
+	memblock_set_current_limit(arm_lowmem_limit);
 	dma_contiguous_remap();
 	devicemaps_init(mdesc);
 	kmap_init();
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index afd7e05..1dd1093 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -351,30 +351,43 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset,
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
-void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset,
-			   size_t size, unsigned int mtype, void *caller)
+void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
+				   unsigned int mtype, void *caller)
 {
-	return __arm_ioremap_pfn(pfn, offset, size, mtype);
+	return (void __iomem *)phys_addr;
 }
 
-void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size,
-			    unsigned int mtype)
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
+
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
 {
-	return (void __iomem *)phys_addr;
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE,
+				    __builtin_return_address(0));
 }
-EXPORT_SYMBOL(__arm_ioremap);
+EXPORT_SYMBOL(ioremap);
 
-void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
 
-void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
-				   unsigned int mtype, void *caller)
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iounmap(volatile void __iomem *addr)
 {
-	return __arm_ioremap(phys_addr, size, mtype);
 }
+EXPORT_SYMBOL(__iounmap);
 
 void (*arch_iounmap)(volatile void __iomem *);
 
-void __arm_iounmap(volatile void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
 {
 }
-EXPORT_SYMBOL(__arm_iounmap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 0716bbe..de2b246 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -274,7 +274,10 @@ __v7_ca15mp_setup:
 __v7_b15mp_setup:
 __v7_ca17mp_setup:
 	mov	r10, #0
-1:
+1:	adr	r12, __v7_setup_stack		@ the local stack
+	stmia	r12, {r0-r5, lr}		@ v7_invalidate_l1 touches r0-r6
+	bl      v7_invalidate_l1
+	ldmia	r12, {r0-r5, lr}
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)
 	ALT_UP(mov	r0, #(1 << 6))		@ fake it for UP
@@ -283,7 +286,7 @@ __v7_ca17mp_setup:
 	orreq	r0, r0, r10			@ Enable CPU-specific SMP bits
 	mcreq	p15, 0, r0, c1, c0, 1
 #endif
-	b	__v7_setup
+	b	__v7_setup_cont
 
 /*
  * Errata:
@@ -413,10 +416,11 @@ __v7_pj4b_setup:
 
 __v7_setup:
 	adr	r12, __v7_setup_stack		@ the local stack
-	stmia	r12, {r0-r5, r7, r9, r11, lr}
+	stmia	r12, {r0-r5, lr}		@ v7_invalidate_l1 touches r0-r6
 	bl      v7_invalidate_l1
-	ldmia	r12, {r0-r5, r7, r9, r11, lr}
+	ldmia	r12, {r0-r5, lr}
 
+__v7_setup_cont:
 	and	r0, r9, #0xff000000		@ ARM?
 	teq	r0, #0x41000000
 	bne	__errata_finish
@@ -480,7 +484,7 @@ ENDPROC(__v7_setup)
 
 	.align	2
 __v7_setup_stack:
-	.space	4 * 11				@ 11 registers
+	.space	4 * 7				@ 12 registers
 
 	__INITDATA
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 4550d24..c011e22 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -74,32 +74,52 @@ struct jit_ctx {
 
 int bpf_jit_enable __read_mostly;
 
-static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
+static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
+		      unsigned int size)
+{
+	void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
+
+	if (!ptr)
+		return -EFAULT;
+	memcpy(ret, ptr, size);
+	return 0;
+}
+
+static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
 {
 	u8 ret;
 	int err;
 
-	err = skb_copy_bits(skb, offset, &ret, 1);
+	if (offset < 0)
+		err = call_neg_helper(skb, offset, &ret, 1);
+	else
+		err = skb_copy_bits(skb, offset, &ret, 1);
 
 	return (u64)err << 32 | ret;
 }
 
-static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
+static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
 {
 	u16 ret;
 	int err;
 
-	err = skb_copy_bits(skb, offset, &ret, 2);
+	if (offset < 0)
+		err = call_neg_helper(skb, offset, &ret, 2);
+	else
+		err = skb_copy_bits(skb, offset, &ret, 2);
 
 	return (u64)err << 32 | ntohs(ret);
 }
 
-static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
+static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
 {
 	u32 ret;
 	int err;
 
-	err = skb_copy_bits(skb, offset, &ret, 4);
+	if (offset < 0)
+		err = call_neg_helper(skb, offset, &ret, 4);
+	else
+		err = skb_copy_bits(skb, offset, &ret, 4);
 
 	return (u64)err << 32 | ntohl(ret);
 }
@@ -536,9 +556,6 @@ static int build_body(struct jit_ctx *ctx)
 		case BPF_LD | BPF_B | BPF_ABS:
 			load_order = 0;
 load:
-			/* the interpreter will deal with the negative K */
-			if ((int)k < 0)
-				return -ENOTSUPP;
 			emit_mov_i(r_off, k, ctx);
 load_common:
 			ctx->seen |= SEEN_DATA | SEEN_CALL;
@@ -547,12 +564,24 @@ load_common:
 				emit(ARM_SUB_I(r_scratch, r_skb_hl,
 					       1 << load_order), ctx);
 				emit(ARM_CMP_R(r_scratch, r_off), ctx);
-				condt = ARM_COND_HS;
+				condt = ARM_COND_GE;
 			} else {
 				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
 				condt = ARM_COND_HI;
 			}
 
+			/*
+			 * test for negative offset, only if we are
+			 * currently scheduled to take the fast
+			 * path. this will update the flags so that
+			 * the slowpath instruction are ignored if the
+			 * offset is negative.
+			 *
+			 * for loard_order == 0 the HI condition will
+			 * make loads at offset 0 take the slow path too.
+			 */
+			_emit(condt, ARM_CMP_I(r_off, 0), ctx);
+
 			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
 			      ctx);
 
@@ -860,9 +889,11 @@ b_epilogue:
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
-				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
-			else
-				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
+				OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
+			else {
+				OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
+				OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
+			}
 			break;
 		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->seen |= SEEN_SKB;
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 9d259d9..1160434 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -14,7 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 VDSO_LDFLAGS += -nostdlib -shared
 VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
 VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id)
-VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd)
+VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd)
 
 obj-$(CONFIG_VDSO) += vdso.o
 extra-$(CONFIG_VDSO) += vdso.lds
diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
index 9005b07..aedec81 100644
--- a/arch/arm/vdso/vdsomunge.c
+++ b/arch/arm/vdso/vdsomunge.c
@@ -45,13 +45,11 @@
  * it does.
  */
 
-#define _GNU_SOURCE
-
 #include <byteswap.h>
 #include <elf.h>
 #include <errno.h>
-#include <error.h>
 #include <fcntl.h>
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -82,11 +80,25 @@
 #define EF_ARM_ABI_FLOAT_HARD 0x400
 #endif
 
+static int failed;
+static const char *argv0;
 static const char *outfile;
 
+static void fail(const char *fmt, ...)
+{
+	va_list ap;
+
+	failed = 1;
+	fprintf(stderr, "%s: ", argv0);
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
 static void cleanup(void)
 {
-	if (error_message_count > 0 && outfile != NULL)
+	if (failed && outfile != NULL)
 		unlink(outfile);
 }
 
@@ -119,68 +131,66 @@ int main(int argc, char **argv)
 	int infd;
 
 	atexit(cleanup);
+	argv0 = argv[0];
 
 	if (argc != 3)
-		error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]);
+		fail("Usage: %s [infile] [outfile]\n", argv[0]);
 
 	infile = argv[1];
 	outfile = argv[2];
 
 	infd = open(infile, O_RDONLY);
 	if (infd < 0)
-		error(EXIT_FAILURE, errno, "Cannot open %s", infile);
+		fail("Cannot open %s: %s\n", infile, strerror(errno));
 
 	if (fstat(infd, &stat) != 0)
-		error(EXIT_FAILURE, errno, "Failed stat for %s", infile);
+		fail("Failed stat for %s: %s\n", infile, strerror(errno));
 
 	inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0);
 	if (inbuf == MAP_FAILED)
-		error(EXIT_FAILURE, errno, "Failed to map %s", infile);
+		fail("Failed to map %s: %s\n", infile, strerror(errno));
 
 	close(infd);
 
 	inhdr = inbuf;
 
 	if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0)
-		error(EXIT_FAILURE, 0, "Not an ELF file");
+		fail("Not an ELF file\n");
 
 	if (inhdr->e_ident[EI_CLASS] != ELFCLASS32)
-		error(EXIT_FAILURE, 0, "Unsupported ELF class");
+		fail("Unsupported ELF class\n");
 
 	swap = inhdr->e_ident[EI_DATA] != HOST_ORDER;
 
 	if (read_elf_half(inhdr->e_type, swap) != ET_DYN)
-		error(EXIT_FAILURE, 0, "Not a shared object");
+		fail("Not a shared object\n");
 
-	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) {
-		error(EXIT_FAILURE, 0, "Unsupported architecture %#x",
-		      inhdr->e_machine);
-	}
+	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM)
+		fail("Unsupported architecture %#x\n", inhdr->e_machine);
 
 	e_flags = read_elf_word(inhdr->e_flags, swap);
 
 	if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) {
-		error(EXIT_FAILURE, 0, "Unsupported EABI version %#x",
-		      EF_ARM_EABI_VERSION(e_flags));
+		fail("Unsupported EABI version %#x\n",
+		     EF_ARM_EABI_VERSION(e_flags));
 	}
 
 	if (e_flags & EF_ARM_ABI_FLOAT_HARD)
-		error(EXIT_FAILURE, 0,
-		      "Unexpected hard-float flag set in e_flags");
+		fail("Unexpected hard-float flag set in e_flags\n");
 
 	clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT);
 
 	outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
 	if (outfd < 0)
-		error(EXIT_FAILURE, errno, "Cannot open %s", outfile);
+		fail("Cannot open %s: %s\n", outfile, strerror(errno));
 
 	if (ftruncate(outfd, stat.st_size) != 0)
-		error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile);
+		fail("Cannot truncate %s: %s\n", outfile, strerror(errno));
 
 	outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
 		      outfd, 0);
 	if (outbuf == MAP_FAILED)
-		error(EXIT_FAILURE, errno, "Failed to map %s", outfile);
+		fail("Failed to map %s: %s\n", outfile, strerror(errno));
 
 	close(outfd);
 
@@ -195,7 +205,7 @@ int main(int argc, char **argv)
 	}
 
 	if (msync(outbuf, stat.st_size, MS_SYNC) != 0)
-		error(EXIT_FAILURE, errno, "Failed to sync %s", outfile);
+		fail("Failed to sync %s: %s\n", outfile, strerror(errno));
 
 	return EXIT_SUCCESS;
 }
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0f6edb1..318175f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,9 +23,9 @@ config ARM64
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select EDAC_SUPPORT
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS
+	select EDAC_SUPPORT
 	select GENERIC_ALLOCATOR
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 83578e7..4c55833 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -23,6 +23,16 @@
 		device_type = "memory";
 		reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
 	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		button@1 {
+			label = "POWER";
+			linux,code = <116>;
+			linux,input-type = <0x1>;
+			interrupts = <0x0 0x2d 0x1>;
+		};
+	};
 };
 
 &pcie0clk {
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index 0689c3f..d831bc2 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -490,7 +490,8 @@
 				0xe0 0xd0000000 0x0 0x00040000>; /* PCI config space */
 			reg-names = "csr", "cfg";
 			ranges = <0x01000000 0x00 0x00000000 0xe0 0x10000000 0x00 0x00010000   /* io */
-				  0x02000000 0x00 0x80000000 0xe1 0x80000000 0x00 0x80000000>; /* mem */
+				  0x02000000 0x00 0x80000000 0xe1 0x80000000 0x00 0x80000000   /* mem */
+				  0x43000000 0xf0 0x00000000 0xf0 0x00000000 0x10 0x00000000>; /* mem */
 			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
 				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
 			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
@@ -513,8 +514,9 @@
 			reg = < 0x00 0x1f2c0000 0x0 0x00010000   /* Controller registers */
 				0xd0 0xd0000000 0x0 0x00040000>; /* PCI config space */
 			reg-names = "csr", "cfg";
-			ranges = <0x01000000 0x0 0x00000000 0xd0 0x10000000 0x00 0x00010000   /* io  */
-				  0x02000000 0x0 0x80000000 0xd1 0x80000000 0x00 0x80000000>; /* mem */
+			ranges = <0x01000000 0x00 0x00000000 0xd0 0x10000000 0x00 0x00010000   /* io  */
+				  0x02000000 0x00 0x80000000 0xd1 0x80000000 0x00 0x80000000   /* mem */
+				  0x43000000 0xd8 0x00000000 0xd8 0x00000000 0x08 0x00000000>; /* mem */
 			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
 				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
 			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
@@ -537,8 +539,9 @@
 			reg =  < 0x00 0x1f2d0000 0x0 0x00010000   /* Controller registers */
 				 0x90 0xd0000000 0x0 0x00040000>; /* PCI config space */
 			reg-names = "csr", "cfg";
-			ranges = <0x01000000 0x0 0x00000000 0x90 0x10000000 0x0 0x00010000   /* io  */
-				  0x02000000 0x0 0x80000000 0x91 0x80000000 0x0 0x80000000>; /* mem */
+			ranges = <0x01000000 0x00 0x00000000 0x90 0x10000000 0x00 0x00010000   /* io  */
+				  0x02000000 0x00 0x80000000 0x91 0x80000000 0x00 0x80000000   /* mem */
+				  0x43000000 0x94 0x00000000 0x94 0x00000000 0x04 0x00000000>; /* mem */
 			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
 				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
 			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
@@ -561,8 +564,9 @@
 			reg = < 0x00 0x1f500000 0x0 0x00010000   /* Controller registers */
 				0xa0 0xd0000000 0x0 0x00040000>; /* PCI config space */
 			reg-names = "csr", "cfg";
-			ranges = <0x01000000 0x0 0x00000000 0xa0 0x10000000 0x0 0x00010000   /* io   */
-				  0x02000000 0x0 0x80000000 0xa1 0x80000000 0x0 0x80000000>; /* mem  */
+			ranges = <0x01000000 0x00 0x00000000 0xa0 0x10000000 0x00 0x00010000   /* io  */
+				  0x02000000 0x00 0x80000000 0xa1 0x80000000 0x00 0x80000000   /* mem */
+				  0x43000000 0xb0 0x00000000 0xb0 0x00000000 0x10 0x00000000>; /* mem */
 			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
 				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
 			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
@@ -585,8 +589,9 @@
 			reg = < 0x00 0x1f510000 0x0 0x00010000   /* Controller registers */
 				0xc0 0xd0000000 0x0 0x00200000>; /* PCI config space */
 			reg-names = "csr", "cfg";
-			ranges = <0x01000000 0x0 0x00000000 0xc0 0x10000000 0x0 0x00010000   /* io  */
-				  0x02000000 0x0 0x80000000 0xc1 0x80000000 0x0 0x80000000>; /* mem */
+			ranges = <0x01000000 0x00 0x00000000 0xc0 0x10000000 0x00 0x00010000   /* io  */
+				  0x02000000 0x00 0x80000000 0xc1 0x80000000 0x00 0x80000000   /* mem */
+				  0x43000000 0xc8 0x00000000 0xc8 0x00000000 0x08 0x00000000>; /* mem */
 			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
 				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
 			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
@@ -823,7 +828,7 @@
 			device_type = "dma";
 			reg = <0x0 0x1f270000 0x0 0x10000>,
 			      <0x0 0x1f200000 0x0 0x10000>,
-			      <0x0 0x1b008000 0x0 0x2000>,
+			      <0x0 0x1b000000 0x0 0x400000>,
 			      <0x0 0x1054a000 0x0 0x100>;
 			interrupts = <0x0 0x82 0x4>,
 				     <0x0 0xb8 0x4>,
diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile
index c5c98b9..bb3c072 100644
--- a/arch/arm64/boot/dts/arm/Makefile
+++ b/arch/arm64/boot/dts/arm/Makefile
@@ -1,6 +1,7 @@
 dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb juno-r1.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2f-1xv7-ca53x2.dtb
 
 always		:= $(dtb-y)
 subdir-y	:= $(dts-dirs)
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
new file mode 100644
index 0000000..5b1d018
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
@@ -0,0 +1,191 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ * LogicTile Express 20MG
+ * V2F-1XV7
+ *
+ * Cortex-A53 (2 cores) Soft Macrocell Model
+ *
+ * HBI-0247C
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	model = "V2F-1XV7 Cortex-A53x2 SMM";
+	arm,hbi = <0x247>;
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,vexpress,v2f-1xv7,ca53x2", "arm,vexpress,v2f-1xv7", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen {
+		stdout-path = "serial0:38400n8";
+	};
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+		i2c0 = &v2m_i2c_dvi;
+		i2c1 = &v2m_i2c_pcie;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0 0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0 1>;
+			next-level-cache = <&L2_0>;
+		};
+
+		L2_0: l2-cache0 {
+			compatible = "cache";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>; /* 2GB @ 2GB */
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,gic-400";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x2000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		smbclk: osc@4 {
+			/* SMC clock */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 4>;
+			freq-range = <40000000 40000000>;
+			#clock-cells = <0>;
+			clock-output-names = "smclk";
+		};
+
+		volt@0 {
+			/* VIO to expansion board above */
+			compatible = "arm,vexpress-volt";
+			arm,vexpress-sysreg,func = <2 0>;
+			regulator-name = "VIO_UP";
+			regulator-min-microvolt = <800000>;
+			regulator-max-microvolt = <1800000>;
+			regulator-always-on;
+		};
+
+		volt@1 {
+			/* 12V from power connector J6 */
+			compatible = "arm,vexpress-volt";
+			arm,vexpress-sysreg,func = <2 1>;
+			regulator-name = "12";
+			regulator-always-on;
+		};
+
+		temp@0 {
+			/* FPGA temperature */
+			compatible = "arm,vexpress-temp";
+			arm,vexpress-sysreg,func = <4 0>;
+			label = "FPGA";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic GIC_SPI  0 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  1 &gic GIC_SPI  1 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  2 &gic GIC_SPI  2 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  3 &gic GIC_SPI  3 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  4 &gic GIC_SPI  4 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  5 &gic GIC_SPI  5 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  6 &gic GIC_SPI  6 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  7 &gic GIC_SPI  7 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  8 &gic GIC_SPI  8 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  9 &gic GIC_SPI  9 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 10 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 11 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 12 &gic GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 13 &gic GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 14 &gic GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 15 &gic GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 16 &gic GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 17 &gic GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 18 &gic GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 19 &gic GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 20 &gic GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 21 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 22 &gic GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 23 &gic GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 24 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 25 &gic GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 26 &gic GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 27 &gic GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 28 &gic GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 29 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 30 &gic GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 31 &gic GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 32 &gic GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 33 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 34 &gic GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 35 &gic GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 36 &gic GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 37 &gic GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 38 &gic GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 39 &gic GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 40 &gic GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 41 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 42 &gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+
+		/include/ "../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi"
+	};
+};
diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
index d8c0bdc..9cb7cf9 100644
--- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
+++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
@@ -376,10 +376,19 @@
 		gic0: interrupt-controller@8010,00000000 {
 			compatible = "arm,gic-v3";
 			#interrupt-cells = <3>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
 			interrupt-controller;
 			reg = <0x8010 0x00000000 0x0 0x010000>, /* GICD */
 			      <0x8010 0x80000000 0x0 0x600000>; /* GICR */
 			interrupts = <1 9 0xf04>;
+
+			its: gic-its@8010,00020000 {
+				compatible = "arm,gic-v3-its";
+				msi-controller;
+				reg = <0x8010 0x20000 0x0 0x200000>;
+			};
 		};
 
 		uaa0: serial@87e0,24000000 {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f38c94f..4e17e7e 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -83,6 +83,7 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_CEVA=y
 CONFIG_AHCI_XGENE=y
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 3303e8a..f4bf2f2 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -124,7 +124,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 
 	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
 			     num_rounds(ctx));
-	scatterwalk_start(&walk, req->assoc);
+	scatterwalk_start(&walk, req->src);
 
 	do {
 		u32 n = scatterwalk_clamp(&walk, len);
@@ -151,6 +151,10 @@ static int ccm_encrypt(struct aead_request *req)
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
 	struct blkcipher_desc desc = { .info = req->iv };
 	struct blkcipher_walk walk;
+	struct scatterlist srcbuf[2];
+	struct scatterlist dstbuf[2];
+	struct scatterlist *src;
+	struct scatterlist *dst;
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
@@ -168,7 +172,12 @@ static int ccm_encrypt(struct aead_request *req)
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
+	dst = src;
+	if (req->src != req->dst)
+		dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
+
+	blkcipher_walk_init(&walk, dst, src, len);
 	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
 					     AES_BLOCK_SIZE);
 
@@ -194,7 +203,7 @@ static int ccm_encrypt(struct aead_request *req)
 		return err;
 
 	/* copy authtag to end of dst */
-	scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+	scatterwalk_map_and_copy(mac, dst, req->cryptlen,
 				 crypto_aead_authsize(aead), 1);
 
 	return 0;
@@ -207,6 +216,10 @@ static int ccm_decrypt(struct aead_request *req)
 	unsigned int authsize = crypto_aead_authsize(aead);
 	struct blkcipher_desc desc = { .info = req->iv };
 	struct blkcipher_walk walk;
+	struct scatterlist srcbuf[2];
+	struct scatterlist dstbuf[2];
+	struct scatterlist *src;
+	struct scatterlist *dst;
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
@@ -224,7 +237,12 @@ static int ccm_decrypt(struct aead_request *req)
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
+	dst = src;
+	if (req->src != req->dst)
+		dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
+
+	blkcipher_walk_init(&walk, dst, src, len);
 	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
 					     AES_BLOCK_SIZE);
 
@@ -250,44 +268,42 @@ static int ccm_decrypt(struct aead_request *req)
 		return err;
 
 	/* compare calculated auth tag with the stored one */
-	scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+	scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize,
 				 authsize, 0);
 
-	if (memcmp(mac, buf, authsize))
+	if (crypto_memneq(mac, buf, authsize))
 		return -EBADMSG;
 	return 0;
 }
 
-static struct crypto_alg ccm_aes_alg = {
-	.cra_name		= "ccm(aes)",
-	.cra_driver_name	= "ccm-aes-ce",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_AEAD,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_aead_type,
-	.cra_module		= THIS_MODULE,
-	.cra_aead = {
-		.ivsize		= AES_BLOCK_SIZE,
-		.maxauthsize	= AES_BLOCK_SIZE,
-		.setkey		= ccm_setkey,
-		.setauthsize	= ccm_setauthsize,
-		.encrypt	= ccm_encrypt,
-		.decrypt	= ccm_decrypt,
-	}
+static struct aead_alg ccm_aes_alg = {
+	.base = {
+		.cra_name		= "ccm(aes)",
+		.cra_driver_name	= "ccm-aes-ce",
+		.cra_priority		= 300,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
+	},
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= AES_BLOCK_SIZE,
+	.setkey		= ccm_setkey,
+	.setauthsize	= ccm_setauthsize,
+	.encrypt	= ccm_encrypt,
+	.decrypt	= ccm_decrypt,
 };
 
 static int __init aes_mod_init(void)
 {
 	if (!(elf_hwcap & HWCAP_AES))
 		return -ENODEV;
-	return crypto_register_alg(&ccm_aes_alg);
+	return crypto_register_aead(&ccm_aes_alg);
 }
 
 static void __exit aes_mod_exit(void)
 {
-	crypto_unregister_alg(&ccm_aes_alg);
+	crypto_unregister_aead(&ccm_aes_alg);
 }
 
 module_init(aes_mod_init);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index b112a39..70fd9ff 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -25,6 +25,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += msi.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 39248d3..406485e 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -19,6 +19,14 @@
 #include <asm/psci.h>
 #include <asm/smp_plat.h>
 
+/* Macros for consistency checks of the GICC subtable of MADT */
+#define ACPI_MADT_GICC_LENGTH	\
+	(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
+
+#define BAD_MADT_GICC_ENTRY(entry, end)						\
+	(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) ||	\
+	 (entry)->header.length != ACPI_MADT_GICC_LENGTH)
+
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
 /* ACPI table mapping after acpi_gbl_permanent_mmap is set */
diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 562b655..0000000
--- a/arch/arm64/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARM64_MM_ARCH_HOOKS_H
-#define _ASM_ARM64_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_ARM64_MM_ARCH_HOOKS_H */
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 9d4aa18..e8ca6ea 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -122,12 +122,12 @@ static int __init uefi_init(void)
 
 	/* Show what we know for posterity */
 	c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
-			     sizeof(vendor));
+			     sizeof(vendor) * sizeof(efi_char16_t));
 	if (c16) {
 		for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
 			vendor[i] = c16[i];
 		vendor[i] = '\0';
-		early_memunmap(c16, sizeof(vendor));
+		early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
 	}
 
 	pr_info("EFI v%u.%.02u by %s\n",
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a7691a3..e163518 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -352,8 +352,8 @@ el1_inv:
 	// TODO: add support for undefined instructions in kernel mode
 	enable_dbg
 	mov	x0, sp
+	mov	x2, x1
 	mov	x1, #BAD_SYNC
-	mrs	x2, esr_el1
 	b	bad_mode
 ENDPROC(el1_sync)
 
@@ -553,7 +553,7 @@ el0_inv:
 	ct_user_exit
 	mov	x0, sp
 	mov	x1, #BAD_SYNC
-	mrs	x2, esr_el1
+	mov	x2, x25
 	bl	bad_mode
 	b	ret_to_user
 ENDPROC(el0_sync)
@@ -585,7 +585,8 @@ ENDPROC(el0_irq)
  *
  */
 ENTRY(cpu_switch_to)
-	add	x8, x0, #THREAD_CPU_CONTEXT
+	mov	x10, #THREAD_CPU_CONTEXT
+	add	x8, x0, x10
 	mov	x9, sp
 	stp	x19, x20, [x8], #16		// store callee-saved registers
 	stp	x21, x22, [x8], #16
@@ -594,7 +595,7 @@ ENTRY(cpu_switch_to)
 	stp	x27, x28, [x8], #16
 	stp	x29, x9, [x8], #16
 	str	lr, [x8]
-	add	x8, x1, #THREAD_CPU_CONTEXT
+	add	x8, x1, x10
 	ldp	x19, x20, [x8], #16		// restore callee-saved registers
 	ldp	x21, x22, [x8], #16
 	ldp	x23, x24, [x8], #16
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index bd9bfaa..f332d5d 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -32,13 +32,11 @@
 
 ENTRY(compat_sys_sigreturn_wrapper)
 	mov	x0, sp
-	mov	x27, #0		// prevent syscall restart handling (why)
 	b	compat_sys_sigreturn
 ENDPROC(compat_sys_sigreturn_wrapper)
 
 ENTRY(compat_sys_rt_sigreturn_wrapper)
 	mov	x0, sp
-	mov	x27, #0		// prevent syscall restart handling (why)
 	b	compat_sys_rt_sigreturn
 ENDPROC(compat_sys_rt_sigreturn_wrapper)
 
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 240b75c..463fa2e 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -61,7 +61,7 @@ void __init init_IRQ(void)
 static bool migrate_one_irq(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
-	const struct cpumask *affinity = d->affinity;
+	const struct cpumask *affinity = irq_data_get_affinity_mask(d);
 	struct irq_chip *c;
 	bool ret = false;
 
@@ -81,7 +81,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	if (!c->irq_set_affinity)
 		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
 	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
-		cpumask_copy(d->affinity, affinity);
+		cpumask_copy(irq_data_get_affinity_mask(d), affinity);
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f3067d4..926ae8d 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -34,7 +34,6 @@
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/root_dev.h>
-#include <linux/clk-provider.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 1670f15..948f0ad 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 		 * Other callers might not initialize the si_lsb field,
 		 * so check explicitely for the right codes here.
 		 */
-		if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
+		if (from->si_signo == SIGBUS &&
+		    (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
 			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
 #endif
 		break;
@@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 
 int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 {
-	memset(to, 0, sizeof *to);
-
 	if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) ||
 	    copy_from_user(to->_sifields._pad,
 			   from->_sifields._pad, SI_PAD_SIZE))
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 695801a..50fb469 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -438,7 +438,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
 	struct acpi_madt_generic_interrupt *processor;
 
 	processor = (struct acpi_madt_generic_interrupt *)header;
-	if (BAD_MADT_ENTRY(processor, end))
+	if (BAD_MADT_GICC_ENTRY(processor, end))
 		return -EINVAL;
 
 	acpi_table_print_madt_entry(header);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index ec37ab3..97bc68f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -199,16 +199,15 @@ up_fail:
  */
 void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec xtime_coarse;
 	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 
 	++vdso_data->tb_seq_count;
 	smp_wmb();
 
-	xtime_coarse = __current_kernel_time();
 	vdso_data->use_syscall			= use_syscall;
-	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
-	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->xtime_coarse_sec		= tk->xtime_sec;
+	vdso_data->xtime_coarse_nsec		= tk->tkr_mono.xtime_nsec >>
+							tk->tkr_mono.shift;
 	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index f02530e..85c5715 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	if (!(vcpu->arch.hcr_el2 & HCR_RW))
 		inject_abt32(vcpu, false, addr);
-
-	inject_abt64(vcpu, false, addr);
+	else
+		inject_abt64(vcpu, false, addr);
 }
 
 /**
@@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	if (!(vcpu->arch.hcr_el2 & HCR_RW))
 		inject_abt32(vcpu, true, addr);
-
-	inject_abt64(vcpu, true, addr);
+	else
+		inject_abt64(vcpu, true, addr);
 }
 
 /**
@@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
 	if (!(vcpu->arch.hcr_el2 & HCR_RW))
 		inject_undef32(vcpu);
-
-	inject_undef64(vcpu);
+	else
+		inject_undef64(vcpu);
 }
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 9d84feb..773d37a 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,5 +4,3 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
-
-CFLAGS_mmu.o			:= -I$(srctree)/scripts/dtc/libfdt/
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 1d66afd..f61f2dd 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/avr32/include/asm/mm-arch-hooks.h b/arch/avr32/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 145452f..0000000
--- a/arch/avr32/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_AVR32_MM_ARCH_HOOKS_H
-#define _ASM_AVR32_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_AVR32_MM_ARCH_HOOKS_H */
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index d0f771b..a124c55 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -18,6 +18,7 @@
 
 #include <mach/pm.h>
 
+static bool disable_cpu_idle_poll;
 
 static cycle_t read_cycle_count(struct clocksource *cs)
 {
@@ -80,45 +81,45 @@ static int comparator_next_event(unsigned long delta,
 	return 0;
 }
 
-static void comparator_mode(enum clock_event_mode mode,
-		struct clock_event_device *evdev)
+static int comparator_shutdown(struct clock_event_device *evdev)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-		pr_debug("%s: start\n", evdev->name);
-		/* FALLTHROUGH */
-	case CLOCK_EVT_MODE_RESUME:
+	pr_debug("%s: %s\n", __func__, evdev->name);
+	sysreg_write(COMPARE, 0);
+
+	if (disable_cpu_idle_poll) {
+		disable_cpu_idle_poll = false;
 		/*
-		 * If we're using the COUNT and COMPARE registers we
-		 * need to force idle poll.
+		 * Only disable idle poll if we have forced that
+		 * in a previous call.
 		 */
-		cpu_idle_poll_ctrl(true);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		sysreg_write(COMPARE, 0);
-		pr_debug("%s: stop\n", evdev->name);
-		if (evdev->mode == CLOCK_EVT_MODE_ONESHOT ||
-		    evdev->mode == CLOCK_EVT_MODE_RESUME) {
-			/*
-			 * Only disable idle poll if we have forced that
-			 * in a previous call.
-			 */
-			cpu_idle_poll_ctrl(false);
-		}
-		break;
-	default:
-		BUG();
+		cpu_idle_poll_ctrl(false);
 	}
+	return 0;
+}
+
+static int comparator_set_oneshot(struct clock_event_device *evdev)
+{
+	pr_debug("%s: %s\n", __func__, evdev->name);
+
+	disable_cpu_idle_poll = true;
+	/*
+	 * If we're using the COUNT and COMPARE registers we
+	 * need to force idle poll.
+	 */
+	cpu_idle_poll_ctrl(true);
+
+	return 0;
 }
 
 static struct clock_event_device comparator = {
-	.name		= "avr32_comparator",
-	.features	= CLOCK_EVT_FEAT_ONESHOT,
-	.shift		= 16,
-	.rating		= 50,
-	.set_next_event	= comparator_next_event,
-	.set_mode	= comparator_mode,
+	.name			= "avr32_comparator",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.shift			= 16,
+	.rating			= 50,
+	.set_next_event		= comparator_next_event,
+	.set_state_shutdown	= comparator_shutdown,
+	.set_state_oneshot	= comparator_set_oneshot,
+	.tick_resume		= comparator_set_oneshot,
 };
 
 void read_persistent_clock(struct timespec *ts)
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
index 23b1a97..52c179b 100644
--- a/arch/avr32/mach-at32ap/clock.c
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -80,6 +80,9 @@ int clk_enable(struct clk *clk)
 {
 	unsigned long flags;
 
+	if (!clk)
+		return 0;
+
 	spin_lock_irqsave(&clk_lock, flags);
 	__clk_enable(clk);
 	spin_unlock_irqrestore(&clk_lock, flags);
@@ -106,6 +109,9 @@ void clk_disable(struct clk *clk)
 {
 	unsigned long flags;
 
+	if (IS_ERR_OR_NULL(clk))
+		return;
+
 	spin_lock_irqsave(&clk_lock, flags);
 	__clk_disable(clk);
 	spin_unlock_irqrestore(&clk_lock, flags);
@@ -117,6 +123,9 @@ unsigned long clk_get_rate(struct clk *clk)
 	unsigned long flags;
 	unsigned long rate;
 
+	if (!clk)
+		return 0;
+
 	spin_lock_irqsave(&clk_lock, flags);
 	rate = clk->get_rate(clk);
 	spin_unlock_irqrestore(&clk_lock, flags);
@@ -129,6 +138,9 @@ long clk_round_rate(struct clk *clk, unsigned long rate)
 {
 	unsigned long flags, actual_rate;
 
+	if (!clk)
+		return 0;
+
 	if (!clk->set_rate)
 		return -ENOSYS;
 
@@ -145,6 +157,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 	unsigned long flags;
 	long ret;
 
+	if (!clk)
+		return 0;
+
 	if (!clk->set_rate)
 		return -ENOSYS;
 
@@ -161,6 +176,9 @@ int clk_set_parent(struct clk *clk, struct clk *parent)
 	unsigned long flags;
 	int ret;
 
+	if (!clk)
+		return 0;
+
 	if (!clk->set_parent)
 		return -ENOSYS;
 
@@ -174,7 +192,7 @@ EXPORT_SYMBOL(clk_set_parent);
 
 struct clk *clk_get_parent(struct clk *clk)
 {
-	return clk->parent;
+	return !clk ? NULL : clk->parent;
 }
 EXPORT_SYMBOL(clk_get_parent);
 
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 07051a6..61cd1e7 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += mutex.h
diff --git a/arch/blackfin/include/asm/mm-arch-hooks.h b/arch/blackfin/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 1c5211e..0000000
--- a/arch/blackfin/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_BLACKFIN_MM_ARCH_HOOKS_H
-#define _ASM_BLACKFIN_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_BLACKFIN_MM_ARCH_HOOKS_H */
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 7aeb322..f17c4dc 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -26,6 +26,7 @@ generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
diff --git a/arch/c6x/include/asm/mm-arch-hooks.h b/arch/c6x/include/asm/mm-arch-hooks.h
deleted file mode 100644
index bb3c4a6..0000000
--- a/arch/c6x/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_C6X_MM_ARCH_HOOKS_H
-#define _ASM_C6X_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_C6X_MM_ARCH_HOOKS_H */
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 4dda9bd..e989cee 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -1464,7 +1464,7 @@ static inline void handle_rx_packet(struct sync_port *port)
 		if (port->write_ts_idx == NBR_IN_DESCR)
 			port->write_ts_idx = 0;
 		idx = port->write_ts_idx++;
-		do_posix_clock_monotonic_gettime(&port->timestamp[idx]);
+		ktime_get_ts(&port->timestamp[idx]);
 		port->in_buffer_len += port->inbufchunk;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index d294f6a..ad2244f 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/cris/include/asm/mm-arch-hooks.h b/arch/cris/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 314f774..0000000
--- a/arch/cris/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_CRIS_MM_ARCH_HOOKS_H
-#define _ASM_CRIS_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_CRIS_MM_ARCH_HOOKS_H */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 30edce3..8e47b83 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/mm-arch-hooks.h b/arch/frv/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 51d13a8..0000000
--- a/arch/frv/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_FRV_MM_ARCH_HOOKS_H
-#define _ASM_FRV_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_FRV_MM_ARCH_HOOKS_H */
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index 0635bd6..34bb4b1 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -175,14 +175,6 @@ static void __init pcibios_assign_resources(void)
 			if (!r->start && r->end)
 				pci_assign_resource(dev, idx);
 		}
-
-		if (pci_probe & PCI_ASSIGN_ROMS) {
-			r = &dev->resource[PCI_ROM_RESOURCE];
-			r->end -= r->start;
-			r->start = 0;
-			if (r->end)
-				pci_assign_resource(dev, PCI_ROM_RESOURCE);
-		}
 	}
 }
 
diff --git a/arch/frv/mb93090-mb00/pci-frv.h b/arch/frv/mb93090-mb00/pci-frv.h
index a7e487fe..d51992f 100644
--- a/arch/frv/mb93090-mb00/pci-frv.h
+++ b/arch/frv/mb93090-mb00/pci-frv.h
@@ -14,14 +14,6 @@
 #define DBG(x...)
 #endif
 
-#define PCI_PROBE_BIOS		0x0001
-#define PCI_PROBE_CONF1		0x0002
-#define PCI_PROBE_CONF2		0x0004
-#define PCI_NO_CHECKS		0x0400
-#define PCI_ASSIGN_ROMS		0x1000
-#define PCI_BIOS_IRQ_SCAN	0x2000
-#define PCI_ASSIGN_ALL_BUSSES	0x4000
-
 extern unsigned int __nongpreldata pci_probe;
 
 /* pci-frv.c */
diff --git a/arch/frv/mb93090-mb00/pci-vdk.c b/arch/frv/mb93090-mb00/pci-vdk.c
index f211839..f9c86c4 100644
--- a/arch/frv/mb93090-mb00/pci-vdk.c
+++ b/arch/frv/mb93090-mb00/pci-vdk.c
@@ -294,8 +294,6 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 	printk("### PCIBIOS_FIXUP_BUS(%d)\n",bus->number);
 #endif
 
-	pci_read_bridge_bases(bus);
-
 	if (bus->number == 0) {
 		struct pci_dev *dev;
 		list_for_each_entry(dev, &bus->devices, bus_list) {
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 00379d6..70e6ae1 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -33,6 +33,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 5ade4a1..daee37b 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -28,6 +28,7 @@ generic-y += kmap_types.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += pci.h
diff --git a/arch/hexagon/include/asm/mm-arch-hooks.h b/arch/hexagon/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 05e8b93..0000000
--- a/arch/hexagon/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_HEXAGON_MM_ARCH_HOOKS_H
-#define _ASM_HEXAGON_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_HEXAGON_MM_ARCH_HOOKS_H */
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index ccff13d..9de3ba1 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += exec.h
 generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/mm-arch-hooks.h b/arch/ia64/include/asm/mm-arch-hooks.h
deleted file mode 100644
index ab4b5c6..0000000
--- a/arch/ia64/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_IA64_MM_ARCH_HOOKS_H
-#define _ASM_IA64_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_IA64_MM_ARCH_HOOKS_H */
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7cc3be9..d89b601 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -533,10 +533,9 @@ void pcibios_fixup_bus(struct pci_bus *b)
 {
 	struct pci_dev *dev;
 
-	if (b->self) {
-		pci_read_bridge_bases(b);
+	if (b->self)
 		pcibios_fixup_bridge_resources(b->self);
-	}
+
 	list_for_each_entry(dev, &b->devices, bus_list)
 		pcibios_fixup_device_resources(dev);
 	platform_pci_fixup_bus(b);
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index ba1cdc0..e0eb704 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += preempt.h
 generic-y += sections.h
diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h
index 0c3f25e..f8de767 100644
--- a/arch/m32r/include/asm/io.h
+++ b/arch/m32r/include/asm/io.h
@@ -174,6 +174,11 @@ static inline void _writel(unsigned long l, unsigned long addr)
 #define iowrite16 writew
 #define iowrite32 writel
 
+#define ioread16be(addr)	be16_to_cpu(readw(addr))
+#define ioread32be(addr)	be32_to_cpu(readl(addr))
+#define iowrite16be(v, addr)	writew(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)	writel(cpu_to_be32(v), (addr))
+
 #define mmiowb()
 
 #define flush_write_buffers() do { } while (0)  /* M32R_FIXME */
diff --git a/arch/m32r/include/asm/mm-arch-hooks.h b/arch/m32r/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 6d60b47..0000000
--- a/arch/m32r/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_M32R_MM_ARCH_HOOKS_H
-#define _ASM_M32R_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_M32R_MM_ARCH_HOOKS_H */
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index 33013df..c496d48 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -125,6 +125,13 @@ endif # M68KCLASSIC
 
 if COLDFIRE
 
+choice
+	prompt "ColdFire SoC type"
+	default M520x
+	help
+	  Select the type of ColdFire System-on-Chip (SoC) that you want
+	  to build for.
+
 config M5206
 	bool "MCF5206"
 	depends on !MMU
@@ -174,9 +181,6 @@ config M525x
 	help
 	  Freescale (Motorola) Coldfire 5251/5253 processor support.
 
-config M527x
-	bool
-
 config M5271
 	bool "MCF5271"
 	depends on !MMU
@@ -223,9 +227,6 @@ config M5307
 	help
 	  Motorola ColdFire 5307 processor support.
 
-config M53xx
-	bool
-
 config M532x
 	bool "MCF532x"
 	depends on !MMU
@@ -251,9 +252,6 @@ config M5407
 	help
 	  Motorola ColdFire 5407 processor support.
 
-config M54xx
-	bool
-
 config M547x
 	bool "MCF547x"
 	select M54xx
@@ -280,6 +278,17 @@ config M5441x
 	help
 	  Freescale Coldfire 54410/54415/54416/54417/54418 processor support.
 
+endchoice
+
+config M527x
+	bool
+
+config M53xx
+	bool
+
+config M54xx
+	bool
+
 endif # COLDFIRE
 
 
@@ -416,22 +425,18 @@ config HAVE_MBAR
 config HAVE_IPSBAR
 	bool
 
-config CLOCK_SET
-	bool "Enable setting the CPU clock frequency"
-	depends on COLDFIRE
-	default n
-	help
-	  On some CPU's you do not need to know what the core CPU clock
-	  frequency is. On these you can disable clock setting. On some
-	  traditional 68K parts, and on all ColdFire parts you need to set
-	  the appropriate CPU clock frequency. On these devices many of the
-	  onboard peripherals derive their timing from the master CPU clock
-	  frequency.
-
 config CLOCK_FREQ
 	int "Set the core clock frequency"
+	default "25000000" if M5206
+	default "54000000" if M5206e
+	default "166666666" if M520x
+	default "140000000" if M5249
+	default "150000000" if M527x || M523x
+	default "90000000" if M5307
+	default "50000000" if M5407
+	default "266000000" if M54xx
 	default "66666666"
-	depends on CLOCK_SET
+	depends on COLDFIRE
 	help
 	  Define the CPU clock frequency in use. This is the core clock
 	  frequency, it may or may not be the same as the external clock
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 753a623..0b6b40d 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -57,7 +57,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -91,6 +91,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -287,7 +288,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -320,7 +320,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -345,6 +344,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -355,6 +355,7 @@ CONFIG_ARIADNE=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_CIRRUS is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
@@ -363,6 +364,7 @@ CONFIG_HYDRA=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -448,6 +450,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -536,6 +539,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -543,6 +547,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -571,14 +576,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 1f93dca..eeb3a89 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -55,7 +55,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -89,6 +89,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -279,7 +280,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -302,7 +302,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -327,17 +326,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -406,6 +408,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -494,6 +497,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -501,6 +505,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -529,14 +534,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 831b8b8..3a70066 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -55,7 +55,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -89,6 +89,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -283,7 +284,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -311,7 +311,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -336,6 +335,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -343,11 +343,13 @@ CONFIG_ATARILANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -428,6 +430,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -516,6 +519,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -523,6 +527,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -551,14 +556,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 91fd187..0586b32 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -53,7 +53,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -87,6 +87,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -277,7 +278,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -301,7 +301,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -326,17 +325,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -399,6 +401,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 9d4934f..ad1dbce 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -55,7 +55,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -89,6 +89,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -279,7 +280,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -302,7 +302,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -327,6 +326,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -334,11 +334,13 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -408,6 +410,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -496,6 +499,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -503,6 +507,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -531,14 +536,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig
index e7292f4..4c7b793 100644
--- a/arch/m68k/configs/m5208evb_defconfig
+++ b/arch/m68k/configs/m5208evb_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,17 +12,12 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
-CONFIG_M520x=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=166666666
-CONFIG_CLOCK_DIV=2
-CONFIG_M5208EVB=y
+# CONFIG_MMU is not set
 # CONFIG_4KSTACKS is not set
 CONFIG_RAMBASE=0x40000000
 CONFIG_RAMSIZE=0x2000000
 CONFIG_VECTORBASE=0x40000000
 CONFIG_KERNELBASE=0x40020000
-CONFIG_RAM16BIT=y
 CONFIG_BINFMT_FLAT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -40,24 +31,19 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
 CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_BAUDRATE=115200
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
@@ -68,8 +54,6 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_FULLDEBUG=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
+CONFIG_FULLDEBUG=y
diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig
index 0cd4b39..a782f36 100644
--- a/arch/m68k/configs/m5249evb_defconfig
+++ b/arch/m68k/configs/m5249evb_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,10 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5249=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=140000000
-CONFIG_CLOCK_DIV=2
 CONFIG_M5249C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00800000
@@ -38,23 +32,18 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
@@ -62,7 +51,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig
index a60cb35..6f5fb92 100644
--- a/arch/m68k/configs/m5272c3_defconfig
+++ b/arch/m68k/configs/m5272c3_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,8 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5272=y
-CONFIG_CLOCK_SET=y
 CONFIG_M5272C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00800000
@@ -36,23 +32,18 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
 CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
@@ -61,6 +52,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig
index e6502ab..b5d7cd1 100644
--- a/arch/m68k/configs/m5275evb_defconfig
+++ b/arch/m68k/configs/m5275evb_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,11 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5275=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=150000000
-CONFIG_CLOCK_DIV=2
-CONFIG_M5275EVB=y
 # CONFIG_4KSTACKS is not set
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00000000
@@ -39,24 +32,19 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
 CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
@@ -65,8 +53,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig
index 023812a..1b4c094 100644
--- a/arch/m68k/configs/m5307c3_defconfig
+++ b/arch/m68k/configs/m5307c3_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -16,10 +12,8 @@ CONFIG_EXPERT=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5307=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=90000000
-CONFIG_CLOCK_DIV=2
 CONFIG_M5307C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00800000
@@ -38,16 +32,11 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 CONFIG_SLIP=y
 CONFIG_SLIP_COMPRESSED=y
@@ -56,21 +45,17 @@ CONFIG_SLIP_COMPRESSED=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
 # CONFIG_DNOTIFY is not set
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_FULLDEBUG=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
+CONFIG_FULLDEBUG=y
diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig
index 557b39f..275ad543 100644
--- a/arch/m68k/configs/m5407c3_defconfig
+++ b/arch/m68k/configs/m5407c3_defconfig
@@ -1,10 +1,6 @@
-# CONFIG_MMU is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -17,9 +13,8 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
 CONFIG_M5407=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=50000000
 CONFIG_M5407C3=y
 CONFIG_RAMBASE=0x00000000
 CONFIG_RAMSIZE=0x00000000
@@ -38,22 +33,17 @@ CONFIG_INET=y
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_UCLINUX=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=y
 # CONFIG_INPUT is not set
 # CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 CONFIG_SERIAL_MCF=y
 CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
@@ -63,8 +53,5 @@ CONFIG_EXT2_FS=y
 CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_MTD=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BOOTPARAM=y
 CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"
-# CONFIG_CRC32 is not set
diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig
index c5018a6..4f4ccd13 100644
--- a/arch/m68k/configs/m5475evb_defconfig
+++ b/arch/m68k/configs/m5475evb_defconfig
@@ -1,11 +1,7 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_SWAP is not set
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
@@ -20,19 +16,16 @@ CONFIG_MODULES=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_COLDFIRE=y
-CONFIG_M547x=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=266000000
 # CONFIG_4KSTACKS is not set
 CONFIG_RAMBASE=0x0
 CONFIG_RAMSIZE=0x2000000
 CONFIG_VECTORBASE=0x0
 CONFIG_MBAR=0xff000000
 CONFIG_KERNELBASE=0x20000
+CONFIG_PCI=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_JEDECPROBE=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 72bc187..b44acac 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -54,7 +54,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -88,6 +88,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -282,7 +283,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -311,7 +311,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -343,6 +342,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -351,12 +351,14 @@ CONFIG_MACMACE=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MAC89x0=y
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
 CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -430,6 +432,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -518,6 +521,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -525,6 +529,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -553,14 +558,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 8fb6553..8afca37 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -64,7 +64,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -98,6 +98,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -301,7 +302,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -344,7 +344,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -376,6 +375,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -391,6 +391,7 @@ CONFIG_MACMACE=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_MAC89x0=y
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_HP is not set
 CONFIG_BVME6000_NET=y
 CONFIG_MVME16x_NET=y
@@ -403,6 +404,7 @@ CONFIG_NE2000=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -510,6 +512,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -598,6 +601,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -605,6 +609,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -633,14 +638,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index f34491e..ef00875 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -52,7 +52,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -86,6 +86,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -276,7 +277,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -300,7 +300,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -325,6 +324,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -332,11 +332,13 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -399,6 +401,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 3d3614d..387c2bd 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -53,7 +53,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -87,6 +87,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -277,7 +278,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -301,7 +301,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -326,17 +325,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -399,6 +401,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 643e9c9..35355c1 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -53,7 +53,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -87,6 +87,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -280,7 +281,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -307,7 +307,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -332,6 +331,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -341,12 +341,14 @@ CONFIG_VETH=m
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_CIRRUS is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -421,6 +423,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -509,6 +512,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -516,6 +520,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -544,14 +549,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 8fecc5a..8442d26 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -50,7 +50,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -84,6 +84,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -274,7 +275,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -298,7 +298,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -323,17 +322,20 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -400,6 +402,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -487,6 +490,7 @@ CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -494,6 +498,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -522,14 +527,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 9902c5b..0e1b542 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -50,7 +50,7 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE=m
+CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -84,6 +84,7 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=m
+CONFIG_NF_TABLES_NETDEV=m
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
@@ -274,7 +275,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_PMEM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
@@ -298,7 +298,6 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_CACHE=m
 CONFIG_DM_ERA=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
@@ -323,6 +322,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
+CONFIG_GENEVE=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
@@ -330,11 +330,13 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
@@ -400,6 +402,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
 CONFIG_AFFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -488,6 +491,7 @@ CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -495,6 +499,7 @@ CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -523,14 +528,15 @@ CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 1555bc1..eb85bd9 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += mutex.h
 generic-y += percpu.h
diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h
index c94557b..50aa4da 100644
--- a/arch/m68k/include/asm/coldfire.h
+++ b/arch/m68k/include/asm/coldfire.h
@@ -19,7 +19,7 @@
  *	in any case new boards come along from time to time that have yet
  *	another different clocking frequency.
  */
-#ifdef CONFIG_CLOCK_SET
+#ifdef CONFIG_CLOCK_FREQ
 #define	MCF_CLK		CONFIG_CLOCK_FREQ
 #else
 #error "Don't know what your ColdFire CPU clock frequency is??"
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 618c85d3..f55cad5 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -413,7 +413,8 @@ static inline void isa_delay(void)
 #define writew(val, addr)	out_le16((addr), (val))
 #endif /* CONFIG_ATARI_ROM_ISA */
 
-#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA)
+#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \
+    !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE))
 /*
  * We need to define dummy functions for GENERIC_IOMAP support.
  */
diff --git a/arch/m68k/include/asm/mm-arch-hooks.h b/arch/m68k/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 7e8709b..0000000
--- a/arch/m68k/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_M68K_MM_ARCH_HOOKS_H
-#define _ASM_M68K_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_M68K_MM_ARCH_HOOKS_H */
diff --git a/arch/m68k/kernel/bootinfo_proc.c b/arch/m68k/kernel/bootinfo_proc.c
index 7ee853e..2a33a96 100644
--- a/arch/m68k/kernel/bootinfo_proc.c
+++ b/arch/m68k/kernel/bootinfo_proc.c
@@ -62,12 +62,10 @@ static int __init init_bootinfo_procfs(void)
 	if (!bootinfo_size)
 		return -EINVAL;
 
-	bootinfo_copy = kmalloc(bootinfo_size, GFP_KERNEL);
+	bootinfo_copy = kmemdup(bootinfo_tmp, bootinfo_size, GFP_KERNEL);
 	if (!bootinfo_copy)
 		return -ENOMEM;
 
-	memcpy(bootinfo_copy, bootinfo_tmp, bootinfo_size);
-
 	pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_fops, NULL);
 	if (!pde) {
 		kfree(bootinfo_copy);
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 199320f..df31353 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -25,6 +25,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += param.h
diff --git a/arch/metag/include/asm/mm-arch-hooks.h b/arch/metag/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b0072b2..0000000
--- a/arch/metag/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_METAG_MM_ARCH_HOOKS_H
-#define _ASM_METAG_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_METAG_MM_ARCH_HOOKS_H */
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 9989ddb..2f222f3 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += device.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += syscalls.h
 generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/mm-arch-hooks.h b/arch/microblaze/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 5c40659..0000000
--- a/arch/microblaze/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_MICROBLAZE_MM_ARCH_HOOKS_H
-#define _ASM_MICROBLAZE_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_MICROBLAZE_MM_ARCH_HOOKS_H */
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index ae838ed..6b8b752 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -863,14 +863,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus)
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
-	/* When called from the generic PCI probe, read PCI<->PCI bridge
-	 * bases. This is -not- called when generating the PCI tree from
-	 * the OF device-tree.
-	 */
-	if (bus->self != NULL)
-		pci_read_bridge_bases(bus);
-
-	/* Now fixup the bus bus */
+	/* Fixup the bus */
 	pcibios_setup_bus_self(bus);
 
 	/* Now fixup devices on that bus */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2a14585..199a835 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -151,7 +151,6 @@ config BMIPS_GENERIC
 	select BCM7120_L2_IRQ
 	select BRCMSTB_L2_IRQ
 	select IRQ_MIPS_CPU
-	select RAW_IRQ_ACCESSORS
 	select DMA_NONCOHERENT
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -1427,6 +1426,7 @@ config CPU_MIPS64_R6
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_MSA
 	select GENERIC_CSUM
+	select MIPS_O32_FP64_SUPPORT if MIPS32_O32
 	help
 	  Choose this option to build a kernel for release 6 or later of the
 	  MIPS64 architecture.  New MIPS processors, starting with the Warrior
@@ -2231,7 +2231,7 @@ config MIPS_CMP
 
 config MIPS_CPS
 	bool "MIPS Coherent Processing System support"
-	depends on SYS_SUPPORTS_MIPS_CPS && !64BIT
+	depends on SYS_SUPPORTS_MIPS_CPS
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
@@ -2262,11 +2262,6 @@ config MIPS_CM
 config MIPS_CPC
 	bool
 
-config SB1_PASS_1_WORKAROUNDS
-	bool
-	depends on CPU_SB1_PASS_1
-	default y
-
 config SB1_PASS_2_WORKAROUNDS
 	bool
 	depends on CPU_SB1 && (CPU_SB1_PASS_2_2 || CPU_SB1_PASS_2)
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index ae2dd59..252e347 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -181,13 +181,6 @@ cflags-$(CONFIG_CPU_R4000_WORKAROUNDS)	+= $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS)	+= $(call cc-option,-mfix-r4400,)
 cflags-$(CONFIG_CPU_DADDI_WORKAROUNDS)	+= $(call cc-option,-mno-daddi,)
 
-ifdef CONFIG_CPU_SB1
-ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
-KBUILD_AFLAGS_MODULE += -msb1-pass1-workarounds
-KBUILD_CFLAGS_MODULE += -msb1-pass1-workarounds
-endif
-endif
-
 # For smartmips configurations, there are hundreds of warnings due to ISA overrides
 # in assembly and header files. smartmips is only supported for MIPS32r1 onwards
 # and there is no support for 64-bit. Various '.set mips2' or '.set mips3' or
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index 6e46abe..bd34f40 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -35,6 +35,7 @@
 
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 #include <linux/slab.h>
@@ -389,12 +390,11 @@ static long alchemy_calc_div(unsigned long rate, unsigned long prate,
 	return div1;
 }
 
-static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
-					unsigned long *best_parent_rate,
-					struct clk_hw **best_parent_clk,
-					int scale, int maxdiv)
+static int alchemy_clk_fgcs_detr(struct clk_hw *hw,
+				 struct clk_rate_request *req,
+				 int scale, int maxdiv)
 {
-	struct clk *pc, *bpc, *free;
+	struct clk_hw *pc, *bpc, *free;
 	long tdv, tpr, pr, nr, br, bpr, diff, lastdiff;
 	int j;
 
@@ -408,7 +408,7 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 	 * the one that gets closest to but not over the requested rate.
 	 */
 	for (j = 0; j < 7; j++) {
-		pc = clk_get_parent_by_index(hw->clk, j);
+		pc = clk_hw_get_parent_by_index(hw, j);
 		if (!pc)
 			break;
 
@@ -416,20 +416,20 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 		 * XXX: we would actually want clk_has_active_children()
 		 * but this is a good-enough approximation for now.
 		 */
-		if (!__clk_is_prepared(pc)) {
+		if (!clk_hw_is_prepared(pc)) {
 			if (!free)
 				free = pc;
 		}
 
-		pr = clk_get_rate(pc);
-		if (pr < rate)
+		pr = clk_hw_get_rate(pc);
+		if (pr < req->rate)
 			continue;
 
 		/* what can hardware actually provide */
-		tdv = alchemy_calc_div(rate, pr, scale, maxdiv, NULL);
+		tdv = alchemy_calc_div(req->rate, pr, scale, maxdiv, NULL);
 		nr = pr / tdv;
-		diff = rate - nr;
-		if (nr > rate)
+		diff = req->rate - nr;
+		if (nr > req->rate)
 			continue;
 
 		if (diff < lastdiff) {
@@ -448,15 +448,16 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 	 */
 	if (lastdiff && free) {
 		for (j = (maxdiv == 4) ? 1 : scale; j <= maxdiv; j += scale) {
-			tpr = rate * j;
+			tpr = req->rate * j;
 			if (tpr < 0)
 				break;
-			pr = clk_round_rate(free, tpr);
+			pr = clk_hw_round_rate(free, tpr);
 
-			tdv = alchemy_calc_div(rate, pr, scale, maxdiv, NULL);
+			tdv = alchemy_calc_div(req->rate, pr, scale, maxdiv,
+					       NULL);
 			nr = pr / tdv;
-			diff = rate - nr;
-			if (nr > rate)
+			diff = req->rate - nr;
+			if (nr > req->rate)
 				continue;
 			if (diff < lastdiff) {
 				lastdiff = diff;
@@ -469,9 +470,14 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 		}
 	}
 
-	*best_parent_rate = bpr;
-	*best_parent_clk = __clk_get_hw(bpc);
-	return br;
+	if (br < 0)
+		return br;
+
+	req->best_parent_rate = bpr;
+	req->best_parent_hw = bpc;
+	req->rate = br;
+
+	return 0;
 }
 
 static int alchemy_clk_fgv1_en(struct clk_hw *hw)
@@ -562,14 +568,10 @@ static unsigned long alchemy_clk_fgv1_recalc(struct clk_hw *hw,
 	return parent_rate / v;
 }
 
-static long alchemy_clk_fgv1_detr(struct clk_hw *hw, unsigned long rate,
-					unsigned long min_rate,
-					unsigned long max_rate,
-					unsigned long *best_parent_rate,
-					struct clk_hw **best_parent_clk)
+static int alchemy_clk_fgv1_detr(struct clk_hw *hw,
+				 struct clk_rate_request *req)
 {
-	return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate,
-				     best_parent_clk, 2, 512);
+	return alchemy_clk_fgcs_detr(hw, req, 2, 512);
 }
 
 /* Au1000, Au1100, Au15x0, Au12x0 */
@@ -696,11 +698,8 @@ static unsigned long alchemy_clk_fgv2_recalc(struct clk_hw *hw,
 	return t;
 }
 
-static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate,
-					unsigned long min_rate,
-					unsigned long max_rate,
-					unsigned long *best_parent_rate,
-					struct clk_hw **best_parent_clk)
+static int alchemy_clk_fgv2_detr(struct clk_hw *hw,
+				 struct clk_rate_request *req)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale, maxdiv;
@@ -713,8 +712,7 @@ static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate,
 		maxdiv = 512;
 	}
 
-	return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate,
-				     best_parent_clk, scale, maxdiv);
+	return alchemy_clk_fgcs_detr(hw, req, scale, maxdiv);
 }
 
 /* Au1300 larger input mux, no separate disable bit, flexible divider */
@@ -917,17 +915,13 @@ static int alchemy_clk_csrc_setr(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
-static long alchemy_clk_csrc_detr(struct clk_hw *hw, unsigned long rate,
-					unsigned long min_rate,
-					unsigned long max_rate,
-					unsigned long *best_parent_rate,
-					struct clk_hw **best_parent_clk)
+static int alchemy_clk_csrc_detr(struct clk_hw *hw,
+				 struct clk_rate_request *req)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale = c->dt[2] == 3 ? 1 : 2; /* au1300 check */
 
-	return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate,
-				     best_parent_clk, scale, 4);
+	return alchemy_clk_fgcs_detr(hw, req, scale, 4);
 }
 
 static struct clk_ops alchemy_clkops_csrc = {
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 01a644f..1ba2120 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -190,6 +190,7 @@ int get_c0_perfcount_int(void)
 {
 	return ATH79_MISC_IRQ(5);
 }
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
 
 unsigned int get_c0_compare_int(void)
 {
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 56f5d08..b7fa9ae 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -42,7 +42,7 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
 
 	if (action & SMP_CALL_FUNCTION)
-		smp_call_function_interrupt();
+		generic_smp_call_function_interrupt();
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 7fe5c61..1f85460 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += emergency-restart.h
 generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mutex.h
 generic-y += parport.h
 generic-y += percpu.h
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 084780b..1b06251 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -74,7 +74,7 @@ static inline int __enable_fpu(enum fpu_mode mode)
 		goto fr_common;
 
 	case FPU_64BIT:
-#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6) \
+#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \
       || defined(CONFIG_64BIT))
 		/* we only have a 32-bit FPU */
 		return SIGFPE;
diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h
deleted file mode 100644
index 11d3b57..0000000
--- a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H
-#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H
-
-#include <asm/bmips.h>
-
-#define plat_post_dma_flush	bmips_post_dma_flush
-
-#include <asm/mach-generic/dma-coherence.h>
-
-#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index 37c08a2..c9f7e23 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
- *                    Insititute of Computing Technology
+ *                    Institute of Computing Technology
  * Author:  Xiang Gao, gaoxiang@ict.ac.cn
  *          Huacai Chen, chenhc@lemote.com
  *          Xiaofu Meng, Shuangshuang Zhang
diff --git a/arch/mips/include/asm/mach-sibyte/war.h b/arch/mips/include/asm/mach-sibyte/war.h
index 0a227d4..520f8fc 100644
--- a/arch/mips/include/asm/mach-sibyte/war.h
+++ b/arch/mips/include/asm/mach-sibyte/war.h
@@ -13,8 +13,7 @@
 #define R4600_V2_HIT_CACHEOP_WAR	0
 #define R5432_CP0_INTERRUPT_WAR		0
 
-#if defined(CONFIG_SB1_PASS_1_WORKAROUNDS) || \
-    defined(CONFIG_SB1_PASS_2_WORKAROUNDS)
+#if defined(CONFIG_SB1_PASS_2_WORKAROUNDS)
 
 #ifndef __ASSEMBLY__
 extern int sb1250_m3_workaround_needed(void);
diff --git a/arch/mips/include/asm/mm-arch-hooks.h b/arch/mips/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b5609fe..0000000
--- a/arch/mips/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_MIPS_MM_ARCH_HOOKS_H
-#define _ASM_MIPS_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_MIPS_MM_ARCH_HOOKS_H */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9d81067..ae85694 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -182,8 +182,39 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 		 * Make sure the buddy is global too (if it's !none,
 		 * it better already be global)
 		 */
+#ifdef CONFIG_SMP
+		/*
+		 * For SMP, multiple CPUs can race, so we need to do
+		 * this atomically.
+		 */
+#ifdef CONFIG_64BIT
+#define LL_INSN "lld"
+#define SC_INSN "scd"
+#else /* CONFIG_32BIT */
+#define LL_INSN "ll"
+#define SC_INSN "sc"
+#endif
+		unsigned long page_global = _PAGE_GLOBAL;
+		unsigned long tmp;
+
+		__asm__ __volatile__ (
+			"	.set	push\n"
+			"	.set	noreorder\n"
+			"1:	" LL_INSN "	%[tmp], %[buddy]\n"
+			"	bnez	%[tmp], 2f\n"
+			"	 or	%[tmp], %[tmp], %[global]\n"
+			"	" SC_INSN "	%[tmp], %[buddy]\n"
+			"	beqz	%[tmp], 1b\n"
+			"	 nop\n"
+			"2:\n"
+			"	.set pop"
+			: [buddy] "+m" (buddy->pte),
+			  [tmp] "=&r" (tmp)
+			: [global] "r" (page_global));
+#else /* !CONFIG_SMP */
 		if (pte_none(*buddy))
 			pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL;
+#endif /* CONFIG_SMP */
 	}
 #endif
 }
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 2b25d1b..03722d4 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -23,6 +23,7 @@
 extern int smp_num_siblings;
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
+extern cpumask_t cpu_foreign_map;
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
@@ -82,8 +83,6 @@ static inline void __cpu_die(unsigned int cpu)
 extern void play_dead(void);
 #endif
 
-extern asmlinkage void smp_call_function_interrupt(void);
-
 static inline void arch_send_call_function_single_ipi(int cpu)
 {
 	extern struct plat_smp_ops *mp_ops;	/* private */
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index 28d6d93..a71da57 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -152,6 +152,31 @@
 		.set	noreorder
 		bltz	k0, 8f
 		 move	k1, sp
+#ifdef CONFIG_EVA
+		/*
+		 * Flush interAptiv's Return Prediction Stack (RPS) by writing
+		 * EntryHi. Toggling Config7.RPS is slower and less portable.
+		 *
+		 * The RPS isn't automatically flushed when exceptions are
+		 * taken, which can result in kernel mode speculative accesses
+		 * to user addresses if the RPS mispredicts. That's harmless
+		 * when user and kernel share the same address space, but with
+		 * EVA the same user segments may be unmapped to kernel mode,
+		 * even containing sensitive MMIO regions or invalid memory.
+		 *
+		 * This can happen when the kernel sets the return address to
+		 * ret_from_* and jr's to the exception handler, which looks
+		 * more like a tail call than a function call. If nested calls
+		 * don't evict the last user address in the RPS, it will
+		 * mispredict the return and fetch from a user controlled
+		 * address into the icache.
+		 *
+		 * More recent EVA-capable cores with MAAR to restrict
+		 * speculative accesses aren't affected.
+		 */
+		MFC0	k0, CP0_ENTRYHI
+		MTC0	k0, CP0_ENTRYHI
+#endif
 		.set	reorder
 		/* Called from user mode, new stack. */
 		get_saved_sp
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index 6c9906f..9081d88 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -16,7 +16,7 @@
 
 /*
  * Keep this struct definition in sync with the sigcontext fragment
- * in arch/mips/tools/offset.c
+ * in arch/mips/kernel/asm-offsets.c
  */
 struct sigcontext {
 	unsigned int		sc_regmask;	/* Unused */
@@ -46,7 +46,7 @@ struct sigcontext {
 #include <linux/posix_types.h>
 /*
  * Keep this struct definition in sync with the sigcontext fragment
- * in arch/mips/tools/offset.c
+ * in arch/mips/kernel/asm-offsets.c
  *
  * Warning: this structure illdefined with sc_badvaddr being just an unsigned
  * int so it was changed to unsigned long in 2.6.0-test1.  This may break
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index beabe19..072fab1 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -1,5 +1,5 @@
 /*
- * offset.c: Calculate pt_regs and task_struct offsets.
+ * asm-offsets.c: Calculate pt_regs and task_struct offsets.
  *
  * Copyright (C) 1996 David S. Miller
  * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 Ralf Baechle
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index c0c5e59..d8f9b35 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -600,7 +600,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		break;
 
 	case blezl_op: /* not really i_format */
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			goto sigill_r6;
 	case blez_op:
 		/*
@@ -635,7 +635,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		break;
 
 	case bgtzl_op:
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			goto sigill_r6;
 	case bgtz_op:
 		/*
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 55b759a..1b6ca63 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -60,7 +60,7 @@ LEAF(mips_cps_core_entry)
 	 nop
 
 	/* This is an NMI */
-	la	k0, nmi_handler
+	PTR_LA	k0, nmi_handler
 	jr	k0
 	 nop
 
@@ -107,10 +107,10 @@ not_nmi:
 	mul	t1, t1, t0
 	mul	t1, t1, t2
 
-	li	a0, KSEG0
-	add	a1, a0, t1
+	li	a0, CKSEG0
+	PTR_ADD	a1, a0, t1
 1:	cache	Index_Store_Tag_I, 0(a0)
-	add	a0, a0, t0
+	PTR_ADD	a0, a0, t0
 	bne	a0, a1, 1b
 	 nop
 icache_done:
@@ -134,12 +134,12 @@ icache_done:
 	mul	t1, t1, t0
 	mul	t1, t1, t2
 
-	li	a0, KSEG0
-	addu	a1, a0, t1
-	subu	a1, a1, t0
+	li	a0, CKSEG0
+	PTR_ADDU a1, a0, t1
+	PTR_SUBU a1, a1, t0
 1:	cache	Index_Store_Tag_D, 0(a0)
 	bne	a0, a1, 1b
-	 add	a0, a0, t0
+	 PTR_ADD a0, a0, t0
 dcache_done:
 
 	/* Set Kseg0 CCA to that in s0 */
@@ -152,11 +152,11 @@ dcache_done:
 
 	/* Enter the coherent domain */
 	li	t0, 0xff
-	sw	t0, GCR_CL_COHERENCE_OFS(v1)
+	PTR_S	t0, GCR_CL_COHERENCE_OFS(v1)
 	ehb
 
 	/* Jump to kseg0 */
-	la	t0, 1f
+	PTR_LA	t0, 1f
 	jr	t0
 	 nop
 
@@ -178,9 +178,9 @@ dcache_done:
 	 nop
 
 	/* Off we go! */
-	lw	t1, VPEBOOTCFG_PC(v0)
-	lw	gp, VPEBOOTCFG_GP(v0)
-	lw	sp, VPEBOOTCFG_SP(v0)
+	PTR_L	t1, VPEBOOTCFG_PC(v0)
+	PTR_L	gp, VPEBOOTCFG_GP(v0)
+	PTR_L	sp, VPEBOOTCFG_SP(v0)
 	jr	t1
 	 nop
 	END(mips_cps_core_entry)
@@ -217,7 +217,7 @@ LEAF(excep_intex)
 
 .org 0x480
 LEAF(excep_ejtag)
-	la	k0, ejtag_debug_handler
+	PTR_LA	k0, ejtag_debug_handler
 	jr	k0
 	 nop
 	END(excep_ejtag)
@@ -229,7 +229,7 @@ LEAF(mips_cps_core_init)
 	 nop
 
 	.set	push
-	.set	mips32r2
+	.set	mips64r2
 	.set	mt
 
 	/* Only allow 1 TC per VPE to execute... */
@@ -237,7 +237,7 @@ LEAF(mips_cps_core_init)
 
 	/* ...and for the moment only 1 VPE */
 	dvpe
-	la	t1, 1f
+	PTR_LA	t1, 1f
 	jr.hb	t1
 	 nop
 
@@ -250,25 +250,25 @@ LEAF(mips_cps_core_init)
 	mfc0	t0, CP0_MVPCONF0
 	srl	t0, t0, MVPCONF0_PVPE_SHIFT
 	andi	t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT)
-	addiu	t7, t0, 1
+	addiu	ta3, t0, 1
 
 	/* If there's only 1, we're done */
 	beqz	t0, 2f
 	 nop
 
 	/* Loop through each VPE within this core */
-	li	t5, 1
+	li	ta1, 1
 
 1:	/* Operate on the appropriate TC */
-	mtc0	t5, CP0_VPECONTROL
+	mtc0	ta1, CP0_VPECONTROL
 	ehb
 
 	/* Bind TC to VPE (1:1 TC:VPE mapping) */
-	mttc0	t5, CP0_TCBIND
+	mttc0	ta1, CP0_TCBIND
 
 	/* Set exclusive TC, non-active, master */
 	li	t0, VPECONF0_MVP
-	sll	t1, t5, VPECONF0_XTC_SHIFT
+	sll	t1, ta1, VPECONF0_XTC_SHIFT
 	or	t0, t0, t1
 	mttc0	t0, CP0_VPECONF0
 
@@ -280,8 +280,8 @@ LEAF(mips_cps_core_init)
 	mttc0	t0, CP0_TCHALT
 
 	/* Next VPE */
-	addiu	t5, t5, 1
-	slt	t0, t5, t7
+	addiu	ta1, ta1, 1
+	slt	t0, ta1, ta3
 	bnez	t0, 1b
 	 nop
 
@@ -298,19 +298,19 @@ LEAF(mips_cps_core_init)
 
 LEAF(mips_cps_boot_vpes)
 	/* Retrieve CM base address */
-	la	t0, mips_cm_base
-	lw	t0, 0(t0)
+	PTR_LA	t0, mips_cm_base
+	PTR_L	t0, 0(t0)
 
 	/* Calculate a pointer to this cores struct core_boot_config */
-	lw	t0, GCR_CL_ID_OFS(t0)
+	PTR_L	t0, GCR_CL_ID_OFS(t0)
 	li	t1, COREBOOTCFG_SIZE
 	mul	t0, t0, t1
-	la	t1, mips_cps_core_bootcfg
-	lw	t1, 0(t1)
-	addu	t0, t0, t1
+	PTR_LA	t1, mips_cps_core_bootcfg
+	PTR_L	t1, 0(t1)
+	PTR_ADDU t0, t0, t1
 
 	/* Calculate this VPEs ID. If the core doesn't support MT use 0 */
-	has_mt	t6, 1f
+	has_mt	ta2, 1f
 	 li	t9, 0
 
 	/* Find the number of VPEs present in the core */
@@ -334,24 +334,24 @@ LEAF(mips_cps_boot_vpes)
 1:	/* Calculate a pointer to this VPEs struct vpe_boot_config */
 	li	t1, VPEBOOTCFG_SIZE
 	mul	v0, t9, t1
-	lw	t7, COREBOOTCFG_VPECONFIG(t0)
-	addu	v0, v0, t7
+	PTR_L	ta3, COREBOOTCFG_VPECONFIG(t0)
+	PTR_ADDU v0, v0, ta3
 
 #ifdef CONFIG_MIPS_MT
 
 	/* If the core doesn't support MT then return */
-	bnez	t6, 1f
+	bnez	ta2, 1f
 	 nop
 	jr	ra
 	 nop
 
 	.set	push
-	.set	mips32r2
+	.set	mips64r2
 	.set	mt
 
 1:	/* Enter VPE configuration state */
 	dvpe
-	la	t1, 1f
+	PTR_LA	t1, 1f
 	jr.hb	t1
 	 nop
 1:	mfc0	t1, CP0_MVPCONTROL
@@ -360,12 +360,12 @@ LEAF(mips_cps_boot_vpes)
 	ehb
 
 	/* Loop through each VPE */
-	lw	t6, COREBOOTCFG_VPEMASK(t0)
-	move	t8, t6
-	li	t5, 0
+	PTR_L	ta2, COREBOOTCFG_VPEMASK(t0)
+	move	t8, ta2
+	li	ta1, 0
 
 	/* Check whether the VPE should be running. If not, skip it */
-1:	andi	t0, t6, 1
+1:	andi	t0, ta2, 1
 	beqz	t0, 2f
 	 nop
 
@@ -373,7 +373,7 @@ LEAF(mips_cps_boot_vpes)
 	mfc0	t0, CP0_VPECONTROL
 	ori	t0, t0, VPECONTROL_TARGTC
 	xori	t0, t0, VPECONTROL_TARGTC
-	or	t0, t0, t5
+	or	t0, t0, ta1
 	mtc0	t0, CP0_VPECONTROL
 	ehb
 
@@ -384,8 +384,8 @@ LEAF(mips_cps_boot_vpes)
 
 	/* Calculate a pointer to the VPEs struct vpe_boot_config */
 	li	t0, VPEBOOTCFG_SIZE
-	mul	t0, t0, t5
-	addu	t0, t0, t7
+	mul	t0, t0, ta1
+	addu	t0, t0, ta3
 
 	/* Set the TC restart PC */
 	lw	t1, VPEBOOTCFG_PC(t0)
@@ -423,9 +423,9 @@ LEAF(mips_cps_boot_vpes)
 	mttc0	t0, CP0_VPECONF0
 
 	/* Next VPE */
-2:	srl	t6, t6, 1
-	addiu	t5, t5, 1
-	bnez	t6, 1b
+2:	srl	ta2, ta2, 1
+	addiu	ta1, ta1, 1
+	bnez	ta2, 1b
 	 nop
 
 	/* Leave VPE configuration state */
@@ -445,7 +445,7 @@ LEAF(mips_cps_boot_vpes)
 	/* This VPE should be offline, halt the TC */
 	li	t0, TCHALT_H
 	mtc0	t0, CP0_TCHALT
-	la	t0, 1f
+	PTR_LA	t0, 1f
 1:	jr.hb	t0
 	 nop
 
@@ -466,10 +466,10 @@ LEAF(mips_cps_boot_vpes)
 	.set	noat
 	lw	$1, TI_CPU(gp)
 	sll	$1, $1, LONGLOG
-	la	\dest, __per_cpu_offset
+	PTR_LA	\dest, __per_cpu_offset
 	addu	$1, $1, \dest
 	lw	$1, 0($1)
-	la	\dest, cps_cpu_state
+	PTR_LA	\dest, cps_cpu_state
 	addu	\dest, \dest, $1
 	.set	pop
 	.endm
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index af42e70..baa7b6f 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -407,7 +407,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.set	noat
 	SAVE_ALL
 	FEXPORT(handle_\exception\ext)
-	__BUILD_clear_\clear
+	__build_clear_\clear
 	.set	at
 	__BUILD_\verbose \exception
 	move	a0, sp
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 3e4491a..789d7bf 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -154,7 +154,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 				      unsigned long __user *user_mask_ptr)
 {
 	unsigned int real_len;
-	cpumask_t mask;
+	cpumask_t allowed, mask;
 	int retval;
 	struct task_struct *p;
 
@@ -173,7 +173,8 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 	if (retval)
 		goto out_unlock;
 
-	cpumask_and(&mask, &p->thread.user_cpus_allowed, cpu_possible_mask);
+	cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
+	cpumask_and(&mask, &allowed, cpu_active_mask);
 
 out_unlock:
 	read_unlock(&tasklist_lock);
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index b130033..5fcec30 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -38,7 +38,7 @@ char *mips_get_machine_name(void)
 	return mips_machine_name;
 }
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_USE_OF
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 	return add_memory_region(base, size, BOOT_MEM_RAM);
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index 74bab9d..c6bbf21 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -24,7 +24,7 @@ LEAF(relocate_new_kernel)
 
 process_entry:
 	PTR_L		s2, (s0)
-	PTR_ADD		s0, s0, SZREG
+	PTR_ADDIU	s0, s0, SZREG
 
 	/*
 	 * In case of a kdump/crash kernel, the indirection page is not
@@ -61,9 +61,9 @@ copy_word:
 	/* copy page word by word */
 	REG_L		s5, (s2)
 	REG_S		s5, (s4)
-	PTR_ADD		s4, s4, SZREG
-	PTR_ADD		s2, s2, SZREG
-	LONG_SUB	s6, s6, 1
+	PTR_ADDIU	s4, s4, SZREG
+	PTR_ADDIU	s2, s2, SZREG
+	LONG_ADDIU	s6, s6, -1
 	beq		s6, zero, process_entry
 	b		copy_word
 	b		process_entry
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 6e8de80..4cc1350 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -73,10 +73,11 @@ NESTED(handle_sys, PT_SIZE, sp)
 	.set    noreorder
 	.set	nomacro
 
-1:	user_lw(t5, 16(t0))		# argument #5 from usp
-4:	user_lw(t6, 20(t0))		# argument #6 from usp
-3:	user_lw(t7, 24(t0))		# argument #7 from usp
-2:	user_lw(t8, 28(t0))		# argument #8 from usp
+load_a4: user_lw(t5, 16(t0))		# argument #5 from usp
+load_a5: user_lw(t6, 20(t0))		# argument #6 from usp
+load_a6: user_lw(t7, 24(t0))		# argument #7 from usp
+load_a7: user_lw(t8, 28(t0))		# argument #8 from usp
+loads_done:
 
 	sw	t5, 16(sp)		# argument #5 to ksp
 	sw	t6, 20(sp)		# argument #6 to ksp
@@ -85,10 +86,10 @@ NESTED(handle_sys, PT_SIZE, sp)
 	.set	pop
 
 	.section __ex_table,"a"
-	PTR	1b,bad_stack
-	PTR	2b,bad_stack
-	PTR	3b,bad_stack
-	PTR	4b,bad_stack
+	PTR	load_a4, bad_stack_a4
+	PTR	load_a5, bad_stack_a5
+	PTR	load_a6, bad_stack_a6
+	PTR	load_a7, bad_stack_a7
 	.previous
 
 	lw	t0, TI_FLAGS($28)	# syscall tracing enabled?
@@ -153,8 +154,8 @@ syscall_trace_entry:
 /* ------------------------------------------------------------------------ */
 
 	/*
-	 * The stackpointer for a call with more than 4 arguments is bad.
-	 * We probably should handle this case a bit more drastic.
+	 * Our open-coded access area sanity test for the stack pointer
+	 * failed. We probably should handle this case a bit more drastic.
 	 */
 bad_stack:
 	li	v0, EFAULT
@@ -163,6 +164,22 @@ bad_stack:
 	sw	t0, PT_R7(sp)
 	j	o32_syscall_exit
 
+bad_stack_a4:
+	li	t5, 0
+	b	load_a5
+
+bad_stack_a5:
+	li	t6, 0
+	b	load_a6
+
+bad_stack_a6:
+	li	t7, 0
+	b	load_a7
+
+bad_stack_a7:
+	li	t8, 0
+	b	loads_done
+
 	/*
 	 * The system call does not exist in this kernel
 	 */
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index ad4d4463..a6f6b76 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -80,7 +80,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	daddiu	a1, v0, __NR_64_Linux
+	move	a1, v0
 	jal	syscall_trace_enter
 
 	bltz	v0, 2f			# seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 446cc65..4b20106 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -72,7 +72,7 @@ n32_syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	daddiu	a1, v0, __NR_N32_Linux
+	move	a1, v0
 	jal	syscall_trace_enter
 
 	bltz	v0, 2f			# seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d07b210..f543ff4 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -69,16 +69,17 @@ NESTED(handle_sys, PT_SIZE, sp)
 	daddu	t1, t0, 32
 	bltz	t1, bad_stack
 
-1:	lw	a4, 16(t0)		# argument #5 from usp
-2:	lw	a5, 20(t0)		# argument #6 from usp
-3:	lw	a6, 24(t0)		# argument #7 from usp
-4:	lw	a7, 28(t0)		# argument #8 from usp (for indirect syscalls)
+load_a4: lw	a4, 16(t0)		# argument #5 from usp
+load_a5: lw	a5, 20(t0)		# argument #6 from usp
+load_a6: lw	a6, 24(t0)		# argument #7 from usp
+load_a7: lw	a7, 28(t0)		# argument #8 from usp
+loads_done:
 
 	.section __ex_table,"a"
-	PTR	1b, bad_stack
-	PTR	2b, bad_stack
-	PTR	3b, bad_stack
-	PTR	4b, bad_stack
+	PTR	load_a4, bad_stack_a4
+	PTR	load_a5, bad_stack_a5
+	PTR	load_a6, bad_stack_a6
+	PTR	load_a7, bad_stack_a7
 	.previous
 
 	li	t1, _TIF_WORK_SYSCALL_ENTRY
@@ -167,6 +168,22 @@ bad_stack:
 	sd	t0, PT_R7(sp)
 	j	o32_syscall_exit
 
+bad_stack_a4:
+	li	a4, 0
+	b	load_a5
+
+bad_stack_a5:
+	li	a5, 0
+	b	load_a6
+
+bad_stack_a6:
+	li	a6, 0
+	b	load_a7
+
+bad_stack_a7:
+	li	a7, 0
+	b	loads_done
+
 not_o32_scall:
 	/*
 	 * This is not an o32 compatibility syscall, pass it on
@@ -383,7 +400,7 @@ EXPORT(sys32_call_table)
 	PTR	sys_connect			/* 4170 */
 	PTR	sys_getpeername
 	PTR	sys_getsockname
-	PTR	sys_getsockopt
+	PTR	compat_sys_getsockopt
 	PTR	sys_listen
 	PTR	compat_sys_recv			/* 4175 */
 	PTR	compat_sys_recvfrom
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index be73c49..008b337 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -337,6 +337,11 @@ static void __init bootmem_init(void)
 			min_low_pfn = start;
 		if (end <= reserved_end)
 			continue;
+#ifdef CONFIG_BLK_DEV_INITRD
+		/* mapstart should be after initrd_end */
+		if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end)))
+			continue;
+#endif
 		if (start >= mapstart)
 			continue;
 		mapstart = max(reserved_end, start);
@@ -366,14 +371,6 @@ static void __init bootmem_init(void)
 		max_low_pfn = PFN_DOWN(HIGHMEM_START);
 	}
 
-#ifdef CONFIG_BLK_DEV_INITRD
-	/*
-	 * mapstart should be after initrd_end
-	 */
-	if (initrd_end)
-		mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end)));
-#endif
-
 	/*
 	 * Initialize the boot-time allocator with low memory only.
 	 */
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 19a7705..5d7f263 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 
 int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 {
-	memset(to, 0, sizeof *to);
-
 	if (copy_from_user(to, from, 3*sizeof(int)) ||
 	    copy_from_user(to->_sifields._pad,
 			   from->_sifields._pad, SI_PAD_SIZE32))
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 336708a..78cf8c2 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -284,7 +284,7 @@ static irqreturn_t bmips5000_ipi_interrupt(int irq, void *dev_id)
 	if (action == 0)
 		scheduler_ipi();
 	else
-		smp_call_function_interrupt();
+		generic_smp_call_function_interrupt();
 
 	return IRQ_HANDLED;
 }
@@ -336,7 +336,7 @@ static irqreturn_t bmips43xx_ipi_interrupt(int irq, void *dev_id)
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 	if (action & SMP_CALL_FUNCTION)
-		smp_call_function_interrupt();
+		generic_smp_call_function_interrupt();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 4251d39..c889377 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -133,7 +133,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
 	/*
 	 * Patch the start of mips_cps_core_entry to provide:
 	 *
-	 * v0 = CM base address
+	 * v1 = CM base address
 	 * s0 = kseg0 CCA
 	 */
 	entry_code = (u32 *)&mips_cps_core_entry;
@@ -369,7 +369,7 @@ void play_dead(void)
 
 static void wait_for_sibling_halt(void *ptr_cpu)
 {
-	unsigned cpu = (unsigned)ptr_cpu;
+	unsigned cpu = (unsigned long)ptr_cpu;
 	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
 	unsigned halted;
 	unsigned long flags;
@@ -430,7 +430,7 @@ static void cps_cpu_die(unsigned int cpu)
 		 */
 		err = smp_call_function_single(cpu_death_sibling,
 					       wait_for_sibling_halt,
-					       (void *)cpu, 1);
+					       (void *)(unsigned long)cpu, 1);
 		if (err)
 			panic("Failed to call remote sibling CPU\n");
 	}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index faa46eb..a31896c 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map);
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
+/*
+ * A logcal cpu mask containing only one VPE per core to
+ * reduce the number of IPIs on large MT systems.
+ */
+cpumask_t cpu_foreign_map __read_mostly;
+EXPORT_SYMBOL(cpu_foreign_map);
+
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
@@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu)
 	}
 }
 
+/*
+ * Calculate a new cpu_foreign_map mask whenever a
+ * new cpu appears or disappears.
+ */
+static inline void calculate_cpu_foreign_map(void)
+{
+	int i, k, core_present;
+	cpumask_t temp_foreign_map;
+
+	/* Re-calculate the mask */
+	for_each_online_cpu(i) {
+		core_present = 0;
+		for_each_cpu(k, &temp_foreign_map)
+			if (cpu_data[i].package == cpu_data[k].package &&
+			    cpu_data[i].core == cpu_data[k].core)
+				core_present = 1;
+		if (!core_present)
+			cpumask_set_cpu(i, &temp_foreign_map);
+	}
+
+	cpumask_copy(&cpu_foreign_map, &temp_foreign_map);
+}
+
 struct plat_smp_ops *mp_ops;
 EXPORT_SYMBOL(mp_ops);
 
@@ -146,6 +176,8 @@ asmlinkage void start_secondary(void)
 	set_cpu_sibling_map(cpu);
 	set_cpu_core_map(cpu);
 
+	calculate_cpu_foreign_map();
+
 	cpumask_set_cpu(cpu, &cpu_callin_map);
 
 	synchronise_count_slave(cpu);
@@ -160,22 +192,21 @@ asmlinkage void start_secondary(void)
 	cpu_startup_entry(CPUHP_ONLINE);
 }
 
-/*
- * Call into both interrupt handlers, as we share the IPI for them
- */
-void __irq_entry smp_call_function_interrupt(void)
-{
-	irq_enter();
-	generic_smp_call_function_interrupt();
-	irq_exit();
-}
-
 static void stop_this_cpu(void *dummy)
 {
 	/*
-	 * Remove this CPU:
+	 * Remove this CPU. Be a bit slow here and
+	 * set the bits for every online CPU so we don't miss
+	 * any IPI whilst taking this VPE down.
 	 */
+
+	cpumask_copy(&cpu_foreign_map, cpu_online_mask);
+
+	/* Make it visible to every other CPU */
+	smp_mb();
+
 	set_cpu_online(smp_processor_id(), false);
+	calculate_cpu_foreign_map();
 	local_irq_disable();
 	while (1);
 }
@@ -197,6 +228,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	mp_ops->prepare_cpus(max_cpus);
 	set_cpu_sibling_map(0);
 	set_cpu_core_map(0);
+	calculate_cpu_foreign_map();
 #ifndef CONFIG_HOTPLUG_CPU
 	init_cpu_present(cpu_possible_mask);
 #endif
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 2a7b38e..8ea28e6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -192,6 +192,7 @@ static void show_stacktrace(struct task_struct *task,
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
 	struct pt_regs regs;
+	mm_segment_t old_fs = get_fs();
 	if (sp) {
 		regs.regs[29] = (unsigned long)sp;
 		regs.regs[31] = 0;
@@ -210,7 +211,13 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 			prepare_frametrace(&regs);
 		}
 	}
+	/*
+	 * show_stack() deals exclusively with kernel mode, so be sure to access
+	 * the stack in the kernel (not user) address space.
+	 */
+	set_fs(KERNEL_DS);
 	show_stacktrace(task, &regs);
+	set_fs(old_fs);
 }
 
 static void show_code(unsigned int __user *pc)
@@ -1519,6 +1526,7 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
 	const int field = 2 * sizeof(unsigned long);
 	int multi_match = regs->cp0_status & ST0_TS;
 	enum ctx_state prev_state;
+	mm_segment_t old_fs = get_fs();
 
 	prev_state = exception_enter();
 	show_regs(regs);
@@ -1540,8 +1548,13 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
 		dump_tlb_all();
 	}
 
+	if (!user_mode(regs))
+		set_fs(KERNEL_DS);
+
 	show_code((unsigned int __user *) regs->cp0_epc);
 
+	set_fs(old_fs);
+
 	/*
 	 * Some chips may have other causes of machine check (e.g. SB1
 	 * graduation timer)
@@ -2130,10 +2143,10 @@ void per_cpu_trap_init(bool is_boot_cpu)
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
 
-		/* Boot CPU's cache setup in setup_arch(). */
-		if (!is_boot_cpu)
-			cpu_cache_init();
-		tlb_init();
+	/* Boot CPU's cache setup in setup_arch(). */
+	if (!is_boot_cpu)
+		cpu_cache_init();
+	tlb_init();
 	TLBMISS_HANDLER_SETUP();
 }
 
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index af84bef..eb3efd1 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -438,7 +438,7 @@ do {                                                        \
 		: "memory");                                \
 } while(0)
 
-#define     StoreDW(addr, value, res) \
+#define     _StoreDW(addr, value, res) \
 do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tpush\n\t"		    \
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 6ab1057..2c218c3 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -293,7 +293,7 @@ static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 
 static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
 {
-	smp_call_function_interrupt();
+	generic_smp_call_function_interrupt();
 	return IRQ_HANDLED;
 }
 
@@ -466,6 +466,7 @@ int get_c0_perfcount_int(void)
 {
 	return ltq_perfcount_irq;
 }
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
 
 unsigned int get_c0_compare_int(void)
 {
diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c
index cc0e4fd..4e116d2 100644
--- a/arch/mips/loongson64/common/bonito-irq.c
+++ b/arch/mips/loongson64/common/bonito-irq.c
@@ -3,7 +3,7 @@
  * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
  * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org)
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c
index 72fed00..01fbed1 100644
--- a/arch/mips/loongson64/common/cmdline.c
+++ b/arch/mips/loongson64/common/cmdline.c
@@ -6,7 +6,7 @@
  * Copyright 2003 ICT CAS
  * Author: Michael Guo <guoyi@ict.ac.cn>
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  * Copyright (C) 2009 Lemote Inc.
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
index 12c75db..8750370 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
@@ -1,7 +1,7 @@
 /*
  * CS5536 General timer functions
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Yanhua, yanh@lemote.com
  *
  * Copyright (C) 2009 Lemote Inc.
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 22f04ca..f6c44dd 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -6,7 +6,7 @@
  * Copyright 2003 ICT CAS
  * Author: Michael Guo <guoyi@ict.ac.cn>
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  * Copyright (C) 2009 Lemote Inc.
diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c
index 687003b..d36d969 100644
--- a/arch/mips/loongson64/common/irq.c
+++ b/arch/mips/loongson64/common/irq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c
index d477dd6..2dc5122 100644
--- a/arch/mips/loongson64/common/setup.c
+++ b/arch/mips/loongson64/common/setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c
index ef5ec8f..892963f8 100644
--- a/arch/mips/loongson64/fuloong-2e/irq.c
+++ b/arch/mips/loongson64/fuloong-2e/irq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c
index 462e34d..a78fb65 100644
--- a/arch/mips/loongson64/lemote-2f/clock.c
+++ b/arch/mips/loongson64/lemote-2f/clock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology
  * Author: Yanhua, yanh@lemote.com
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -15,7 +15,7 @@
 #include <linux/spinlock.h>
 
 #include <asm/clock.h>
-#include <asm/mach-loongson/loongson.h>
+#include <asm/mach-loongson64/loongson.h>
 
 static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 12d14ed..6f9e010 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
- *                    Insititute of Computing Technology
+ *                    Institute of Computing Technology
  * Author:  Xiang Gao, gaoxiang@ict.ac.cn
  *          Huacai Chen, chenhc@lemote.com
  *          Xiaofu Meng, Shuangshuang Zhang
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 509877c..1a4738a 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -266,8 +266,11 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 
-	if (action & SMP_CALL_FUNCTION)
-		smp_call_function_interrupt();
+	if (action & SMP_CALL_FUNCTION) {
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
+	}
 
 	if (action & SMP_ASK_C0COUNT) {
 		BUG_ON(cpu != 0);
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 22b9b2c..712f17a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			/* Fall through */
 		case jr_op:
 			/* For R6, JR already emulated in jalr_op */
-			if (NO_R6EMU && insn.r_format.opcode == jr_op)
+			if (NO_R6EMU && insn.r_format.func == jr_op)
 				break;
 			*contpc = regs->regs[insn.r_format.rs];
 			return 1;
@@ -551,7 +551,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 				dec_insn.next_pc_inc;
 		return 1;
 	case blezl_op:
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			break;
 	case blez_op:
 
@@ -588,7 +588,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 				dec_insn.next_pc_inc;
 		return 1;
 	case bgtzl_op:
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			break;
 	case bgtz_op:
 		/*
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 7f660dc..fbea443 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -37,6 +37,7 @@
 #include <asm/cacheflush.h> /* for run_uncached() */
 #include <asm/traps.h>
 #include <asm/dma-coherence.h>
+#include <asm/mips-cm.h>
 
 /*
  * Special Variant of smp_call_function for use by cache functions:
@@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)
 {
 	preempt_disable();
 
-#ifndef CONFIG_MIPS_MT_SMP
-	smp_call_function(func, info, 1);
-#endif
+	/*
+	 * The Coherent Manager propagates address-based cache ops to other
+	 * cores but not index-based ops. However, r4k_on_each_cpu is used
+	 * in both cases so there is no easy way to tell what kind of op is
+	 * executed to the other cores. The best we can probably do is
+	 * to restrict that call when a CM is not present because both
+	 * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
+	 */
+	if (!mips_cm_present())
+		smp_call_function_many(&cpu_foreign_map, func, info, 1);
 	func(info);
 	preempt_enable();
 }
@@ -937,7 +945,9 @@ static void b5k_instruction_hazard(void)
 }
 
 static char *way_string[] = { NULL, "direct mapped", "2-way",
-	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way"
+	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way",
+	"9-way", "10-way", "11-way", "12-way",
+	"13-way", "14-way", "15-way", "16-way",
 };
 
 static void probe_pcache(void)
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 77d96db..aab218c 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -160,18 +160,18 @@ static inline void setup_protection_map(void)
 		protection_map[1]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
 		protection_map[2]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
 		protection_map[3]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
-		protection_map[4]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ);
+		protection_map[4]  = __pgprot(_page_cachable_default | _PAGE_PRESENT);
 		protection_map[5]  = __pgprot(_page_cachable_default | _PAGE_PRESENT);
-		protection_map[6]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ);
+		protection_map[6]  = __pgprot(_page_cachable_default | _PAGE_PRESENT);
 		protection_map[7]  = __pgprot(_page_cachable_default | _PAGE_PRESENT);
 
 		protection_map[8]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
 		protection_map[9]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
 		protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ);
 		protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
-		protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ);
+		protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
 		protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
-		protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE  | _PAGE_NO_READ);
+		protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE);
 		protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE);
 
 	} else {
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 36c0f26..852a41c 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -133,7 +133,8 @@ good_area:
 #endif
 				goto bad_area;
 			}
-			if (!(vma->vm_flags & VM_READ)) {
+			if (!(vma->vm_flags & VM_READ) &&
+			    exception_epc(regs) != address) {
 #if 0
 				pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n",
 					  raw_smp_processor_id(),
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index d1392f8..fa8f591 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -222,7 +222,7 @@ static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 
 static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
 {
-	smp_call_function_interrupt();
+	generic_smp_call_function_interrupt();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 185e682..b7bf721 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -119,18 +119,24 @@ void read_persistent_clock(struct timespec *ts)
 
 int get_c0_fdc_int(void)
 {
-	int mips_cpu_fdc_irq;
+	/*
+	 * Some cores claim the FDC is routable through the GIC, but it doesn't
+	 * actually seem to be connected for those Malta bitstreams.
+	 */
+	switch (current_cpu_type()) {
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
+		return -1;
+	};
 
 	if (cpu_has_veic)
-		mips_cpu_fdc_irq = -1;
+		return -1;
 	else if (gic_present)
-		mips_cpu_fdc_irq = gic_get_c0_fdc_int();
+		return gic_get_c0_fdc_int();
 	else if (cp0_fdc_irq >= 0)
-		mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
+		return MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
 	else
-		mips_cpu_fdc_irq = -1;
-
-	return mips_cpu_fdc_irq;
+		return -1;
 }
 
 int get_c0_perfcount_int(void)
@@ -148,6 +154,7 @@ int get_c0_perfcount_int(void)
 
 	return mips_cpu_perf_irq;
 }
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
 
 unsigned int get_c0_compare_int(void)
 {
@@ -165,14 +172,17 @@ unsigned int get_c0_compare_int(void)
 
 static void __init init_rtc(void)
 {
-	/* stop the clock whilst setting it up */
-	CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL);
+	unsigned char freq, ctrl;
 
-	/* 32KHz time base */
-	CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT);
+	/* Set 32KHz time base if not already set */
+	freq = CMOS_READ(RTC_FREQ_SELECT);
+	if ((freq & RTC_DIV_CTL) != RTC_REF_CLCK_32KHZ)
+		CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT);
 
-	/* start the clock */
-	CMOS_WRITE(RTC_24H, RTC_CONTROL);
+	/* Ensure SET bit is clear so RTC can run */
+	ctrl = CMOS_READ(RTC_CONTROL);
+	if (ctrl & RTC_SET)
+		CMOS_WRITE(ctrl & ~RTC_SET, RTC_CONTROL);
 }
 
 void __init plat_time_init(void)
diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c
index e1d6989..a120b7a 100644
--- a/arch/mips/mti-sead3/sead3-time.c
+++ b/arch/mips/mti-sead3/sead3-time.c
@@ -77,6 +77,7 @@ int get_c0_perfcount_int(void)
 		return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
 	return -1;
 }
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
 
 unsigned int get_c0_compare_int(void)
 {
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index dc3e327..f5fff22 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -86,7 +86,7 @@ void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc)
 {
 	clear_c0_eimr(irq);
 	ack_c0_eirr(irq);
-	smp_call_function_interrupt();
+	generic_smp_call_function_interrupt();
 	set_c0_eimr(irq);
 }
 
diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c
index 42181c7..f8d3e08 100644
--- a/arch/mips/paravirt/paravirt-smp.c
+++ b/arch/mips/paravirt/paravirt-smp.c
@@ -114,7 +114,7 @@ static irqreturn_t paravirt_reched_interrupt(int irq, void *dev_id)
 
 static irqreturn_t paravirt_function_interrupt(int irq, void *dev_id)
 {
-	smp_call_function_interrupt();
+	generic_smp_call_function_interrupt();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index b8a0bf5..c6996cf 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -311,12 +311,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
-	struct pci_dev *dev = bus->self;
-
-	if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
-	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-		pci_read_bridge_bases(bus);
-	}
 }
 
 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
index d2dc836..8bd8ebb 100644
--- a/arch/mips/pistachio/init.c
+++ b/arch/mips/pistachio/init.c
@@ -63,13 +63,19 @@ void __init plat_mem_setup(void)
 	plat_setup_iocoherency();
 }
 
-#define DEFAULT_CPC_BASE_ADDR 0x1bde0000
+#define DEFAULT_CPC_BASE_ADDR	0x1bde0000
+#define DEFAULT_CDMM_BASE_ADDR	0x1bdd0000
 
 phys_addr_t mips_cpc_default_phys_base(void)
 {
 	return DEFAULT_CPC_BASE_ADDR;
 }
 
+phys_addr_t mips_cdmm_phys_base(void)
+{
+	return DEFAULT_CDMM_BASE_ADDR;
+}
+
 static void __init mips_nmi_setup(void)
 {
 	void *base;
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
index 67889fc..8a37734 100644
--- a/arch/mips/pistachio/time.c
+++ b/arch/mips/pistachio/time.c
@@ -26,6 +26,12 @@ int get_c0_perfcount_int(void)
 {
 	return gic_get_c0_perfcount_int();
 }
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
+
+int get_c0_fdc_int(void)
+{
+	return gic_get_c0_fdc_int();
+}
 
 void __init plat_time_init(void)
 {
diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c
index 1017058..ffa0f71 100644
--- a/arch/mips/pmcs-msp71xx/msp_smp.c
+++ b/arch/mips/pmcs-msp71xx/msp_smp.c
@@ -44,7 +44,7 @@ static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 
 static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
 {
-	smp_call_function_interrupt();
+	generic_smp_call_function_interrupt();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index 53707aa..8c624a8 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -89,6 +89,7 @@ int get_c0_perfcount_int(void)
 {
 	return rt_perfcount_irq;
 }
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
 
 unsigned int get_c0_compare_int(void)
 {
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 3fbaef9..16ec4e1 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -107,10 +107,14 @@ static void ip27_do_irq_mask0(void)
 		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
-		smp_call_function_interrupt();
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
 	} else if (pend0 & (1UL << CPU_CALL_B_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_B_IRQ);
-		smp_call_function_interrupt();
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
 	} else
 #endif
 	{
diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig
index a8bb972..cb9a095 100644
--- a/arch/mips/sibyte/Kconfig
+++ b/arch/mips/sibyte/Kconfig
@@ -81,11 +81,6 @@ choice
 	prompt "SiByte SOC Stepping"
 	depends on SIBYTE_SB1xxx_SOC
 
-config CPU_SB1_PASS_1
-	bool "1250 Pass1"
-	depends on SIBYTE_SB1250
-	select CPU_HAS_PREFETCH
-
 config CPU_SB1_PASS_2_1250
 	bool "1250 An"
 	depends on SIBYTE_SB1250
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index af7d44e..4c71aea 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -29,8 +29,6 @@
 #include <asm/sibyte/bcm1480_regs.h>
 #include <asm/sibyte/bcm1480_int.h>
 
-extern void smp_call_function_interrupt(void);
-
 /*
  * These are routines for dealing with the bcm1480 smp capabilities
  * independent of board/firmware
@@ -184,6 +182,9 @@ void bcm1480_mailbox_interrupt(void)
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 
-	if (action & SMP_CALL_FUNCTION)
-		smp_call_function_interrupt();
+	if (action & SMP_CALL_FUNCTION) {
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
+	}
 }
diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c
index 5581844..41a1d22 100644
--- a/arch/mips/sibyte/common/bus_watcher.c
+++ b/arch/mips/sibyte/common/bus_watcher.c
@@ -81,10 +81,7 @@ void check_bus_watcher(void)
 {
 	u32 status, l2_err, memio_err;
 
-#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
-	/* Destructive read, clears register and interrupt */
-	status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
-#elif defined(CONFIG_SIBYTE_BCM112X) || defined(CONFIG_SIBYTE_SB1250)
+#if defined(CONFIG_SIBYTE_BCM112X) || defined(CONFIG_SIBYTE_SB1250)
 	/* Use non-destructive register */
 	status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS_DEBUG));
 #elif defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c
index 3c02b2a..9d3c24e 100644
--- a/arch/mips/sibyte/sb1250/setup.c
+++ b/arch/mips/sibyte/sb1250/setup.c
@@ -202,12 +202,10 @@ void __init sb1250_setup(void)
 
 	switch (war_pass) {
 	case K_SYS_REVISION_BCM1250_PASS1:
-#ifndef CONFIG_SB1_PASS_1_WORKAROUNDS
 		printk("@@@@ This is a BCM1250 A0-A2 (Pass 1) board, "
 			    "and the kernel doesn't have the proper "
 			    "workarounds compiled in. @@@@\n");
 		bad_config = 1;
-#endif
 		break;
 	case K_SYS_REVISION_BCM1250_PASS2:
 		/* Pass 2 - easiest as default for now - so many numbers */
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c0c4b3f..1cf66f5 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -172,6 +172,9 @@ void sb1250_mailbox_interrupt(void)
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 
-	if (action & SMP_CALL_FUNCTION)
-		smp_call_function_interrupt();
+	if (action & SMP_CALL_FUNCTION) {
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
+	}
 }
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index de30b0c..6edb9ee 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/mm-arch-hooks.h b/arch/mn10300/include/asm/mm-arch-hooks.h
deleted file mode 100644
index e2029a6..0000000
--- a/arch/mn10300/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_MN10300_MM_ARCH_HOOKS_H
-#define _ASM_MN10300_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_MN10300_MM_ARCH_HOOKS_H */
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
index b5b036f..b7ab837 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -183,18 +183,16 @@ static int __init pcibios_assign_resources(void)
 	struct pci_dev *dev = NULL;
 	struct resource *r;
 
-	if (!(pci_probe & PCI_ASSIGN_ROMS)) {
-		/* Try to use BIOS settings for ROMs, otherwise let
-		   pci_assign_unassigned_resources() allocate the new
-		   addresses. */
-		for_each_pci_dev(dev) {
-			r = &dev->resource[PCI_ROM_RESOURCE];
-			if (!r->flags || !r->start)
-				continue;
-			if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
-				r->end -= r->start;
-				r->start = 0;
-			}
+	/* Try to use BIOS settings for ROMs, otherwise let
+	   pci_assign_unassigned_resources() allocate the new
+	   addresses. */
+	for_each_pci_dev(dev) {
+		r = &dev->resource[PCI_ROM_RESOURCE];
+		if (!r->flags || !r->start)
+			continue;
+		if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
+			r->end -= r->start;
+			r->start = 0;
 		}
 	}
 
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.h b/arch/mn10300/unit-asb2305/pci-asb2305.h
index 9e17aca..96c484b 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.h
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.h
@@ -20,13 +20,6 @@
 #define DBG(x...)
 #endif
 
-#define PCI_PROBE_BIOS 1
-#define PCI_PROBE_CONF1 2
-#define PCI_PROBE_CONF2 4
-#define PCI_NO_CHECKS 0x400
-#define PCI_ASSIGN_ROMS 0x1000
-#define PCI_BIOS_IRQ_SCAN 0x2000
-
 extern unsigned int pci_probe;
 
 /* pci-asb2305.c */
diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c
index 3dfe2d3..deaa893 100644
--- a/arch/mn10300/unit-asb2305/pci.c
+++ b/arch/mn10300/unit-asb2305/pci.c
@@ -324,7 +324,6 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 	struct pci_dev *dev;
 
 	if (bus->self) {
-		pci_read_bridge_bases(bus);
 		pcibios_fixup_bridge_resources(bus->self);
 	}
 
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 434639d..914864e 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -30,6 +30,7 @@ generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
 generic-y += msgbuf.h
diff --git a/arch/nios2/include/asm/mm-arch-hooks.h b/arch/nios2/include/asm/mm-arch-hooks.h
deleted file mode 100644
index d7290dc..0000000
--- a/arch/nios2/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_NIOS2_MM_ARCH_HOOKS_H
-#define _ASM_NIOS2_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_NIOS2_MM_ARCH_HOOKS_H */
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e5a693b..443f44d 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -17,6 +17,7 @@ config OPENRISC
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IOMAP
 	select GENERIC_CPU_DEVICES
+	select HAVE_UID16
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
@@ -31,9 +32,6 @@ config MMU
 config HAVE_DMA_ATTRS
 	def_bool y
 
-config UID16
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 2a2e39b..2832f03 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
 generic-y += msgbuf.h
diff --git a/arch/openrisc/include/asm/mm-arch-hooks.h b/arch/openrisc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 6d33cb5..0000000
--- a/arch/openrisc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_OPENRISC_MM_ARCH_HOOKS_H
-#define _ASM_OPENRISC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_OPENRISC_MM_ARCH_HOOKS_H */
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 12b341d..f9b3a81 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
diff --git a/arch/parisc/include/asm/mm-arch-hooks.h b/arch/parisc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 654ec63..0000000
--- a/arch/parisc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_PARISC_MM_ARCH_HOOKS_H
-#define _ASM_PARISC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_PARISC_MM_ARCH_HOOKS_H */
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 3a08eae..3edbb9f 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -72,7 +72,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
+	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
 		/*
 		 * This is the permanent pmd attached to the pgd;
 		 * cannot free it.
@@ -81,6 +81,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 		 */
 		mm_inc_nr_pmds(mm);
 		return;
+	}
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 0a18375..f93c4a4 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -16,7 +16,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-extern spinlock_t pa_dbit_lock;
+extern spinlock_t pa_tlb_lock;
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock;
  */
 #define kern_addr_valid(addr)	(1)
 
+/* Purge data and instruction TLB entries.  Must be called holding
+ * the pa_tlb_lock.  The TLB purge instructions are slow on SMP
+ * machines since the purge must be broadcast to all CPUs.
+ */
+
+static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+{
+	mtsp(mm->context, 1);
+	pdtlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
+}
+
 /* Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
@@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock;
                 *(pteptr) = (pteval);                           \
         } while(0)
 
-extern void purge_tlb_entries(struct mm_struct *, unsigned long);
+#define pte_inserted(x)						\
+	((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED))		\
+	 == (_PAGE_PRESENT|_PAGE_ACCESSED))
 
-#define set_pte_at(mm, addr, ptep, pteval)                      \
-	do {                                                    \
+#define set_pte_at(mm, addr, ptep, pteval)			\
+	do {							\
+		pte_t old_pte;					\
 		unsigned long flags;				\
-		spin_lock_irqsave(&pa_dbit_lock, flags);	\
-		set_pte(ptep, pteval);                          \
-		purge_tlb_entries(mm, addr);                    \
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);	\
+		spin_lock_irqsave(&pa_tlb_lock, flags);		\
+		old_pte = *ptep;				\
+		set_pte(ptep, pteval);				\
+		if (pte_inserted(old_pte))			\
+			purge_tlb_entries(mm, addr);		\
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);	\
 	} while (0)
 
 #endif /* !__ASSEMBLY__ */
@@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page;
 
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm,addr,xp)	do { pte_val(*(xp)) = 0; } while (0)
+#define pte_clear(mm, addr, xp)  set_pte_at(mm, addr, xp, __pte(0))
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
 #define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
 	if (!pte_young(*ptep))
 		return 0;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	pte = *ptep;
 	if (!pte_young(pte)) {
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);
 		return 0;
 	}
 	set_pte(ptep, pte_mkold(pte));
 	purge_tlb_entries(vma->vm_mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 	return 1;
 }
 
@@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	pte_t old_pte;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	old_pte = *ptep;
-	pte_clear(mm,addr,ptep);
-	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	set_pte(ptep, __pte(0));
+	if (pte_inserted(old_pte))
+		purge_tlb_entries(mm, addr);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 
 	return old_pte;
 }
@@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	set_pte(ptep, pte_wrprotect(*ptep));
 	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 }
 
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 9d086a5..e84b964 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -13,6 +13,9 @@
  * active at any one time on the Merced bus.  This tlb purge
  * synchronisation is fairly lightweight and harmless so we activate
  * it on all systems not just the N class.
+
+ * It is also used to ensure PTE updates are atomic and consistent
+ * with the TLB.
  */
 extern spinlock_t pa_tlb_lock;
 
@@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *);
 
 #define smp_flush_tlb_all()	flush_tlb_all()
 
+int __flush_tlb_range(unsigned long sid,
+	unsigned long start, unsigned long end);
+
+#define flush_tlb_range(vma, start, end) \
+	__flush_tlb_range((vma)->vm_mm->context, start, end)
+
+#define flush_tlb_kernel_range(start, end) \
+	__flush_tlb_range(0, start, end)
+
 /*
  * flush_tlb_mm()
  *
- * XXX This code is NOT valid for HP-UX compatibility processes,
- * (although it will probably work 99% of the time). HP-UX
- * processes are free to play with the space id's and save them
- * over long periods of time, etc. so we have to preserve the
- * space and just flush the entire tlb. We need to check the
- * personality in order to do that, but the personality is not
- * currently being set correctly.
- *
- * Of course, Linux processes could do the same thing, but
- * we don't support that (and the compilers, dynamic linker,
- * etc. do not do that).
+ * The code to switch to a new context is NOT valid for processes
+ * which play with the space id's.  Thus, we have to preserve the
+ * space and just flush the entire tlb.  However, the compilers,
+ * dynamic linker, etc, do not manipulate space id's, so there
+ * could be a significant performance benefit in switching contexts
+ * and not flushing the whole tlb.
  */
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
@@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 	BUG_ON(mm == &init_mm); /* Should never happen */
 
 #if 1 || defined(CONFIG_SMP)
+	/* Except for very small threads, flushing the whole TLB is
+	 * faster than using __flush_tlb_range.  The pdtlb and pitlb
+	 * instructions are very slow because of the TLB broadcast.
+	 * It might be faster to do local range flushes on all CPUs
+	 * on PA 2.0 systems.
+	 */
 	flush_tlb_all();
 #else
 	/* FIXME: currently broken, causing space id and protection ids
-	 *  to go out of sync, resulting in faults on userspace accesses.
+	 * to go out of sync, resulting in faults on userspace accesses.
+	 * This approach needs further investigation since running many
+	 * small applications (e.g., GCC testsuite) is faster on HP-UX.
 	 */
 	if (mm) {
 		if (mm->context != 0)
@@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 {
 	unsigned long flags, sid;
 
-	/* For one page, it's not worth testing the split_tlb variable */
-
-	mb();
 	sid = vma->vm_mm->context;
 	purge_tlb_start(flags);
 	mtsp(sid, 1);
 	pdtlb(addr);
-	pitlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
 	purge_tlb_end(flags);
 }
-
-void __flush_tlb_range(unsigned long sid,
-	unsigned long start, unsigned long end);
-
-#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
-
-#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
-
 #endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index f6448c7..cda6dbb 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local);
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
 #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
-int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+
+#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */
+static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD;
 
 void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
-	unsigned long size;
+	unsigned long size, start;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void)
 	/* Racy, but if we see an intermediate value, it's ok too... */
 	parisc_cache_flush_threshold = size * alltime / rangetime;
 
-	parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); 
+	parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
 	if (!parisc_cache_flush_threshold)
 		parisc_cache_flush_threshold = FLUSH_THRESHOLD;
 
 	if (parisc_cache_flush_threshold > cache_info.dc_size)
 		parisc_cache_flush_threshold = cache_info.dc_size;
 
-	printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus());
+	printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+		parisc_cache_flush_threshold/1024);
+
+	/* calculate TLB flush threshold */
+
+	alltime = mfctl(16);
+	flush_tlb_all();
+	alltime = mfctl(16) - alltime;
+
+	size = PAGE_SIZE;
+	start = (unsigned long) _text;
+	rangetime = mfctl(16);
+	while (start < (unsigned long) _end) {
+		flush_tlb_kernel_range(start, start + PAGE_SIZE);
+		start += PAGE_SIZE;
+		size += PAGE_SIZE;
+	}
+	rangetime = mfctl(16) - rangetime;
+
+	printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
+		alltime, size, rangetime);
+
+	parisc_tlb_flush_threshold = size * alltime / rangetime;
+	parisc_tlb_flush_threshold *= num_online_cpus();
+	parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
+	if (!parisc_tlb_flush_threshold)
+		parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
+
+	printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+		parisc_tlb_flush_threshold/1024);
 }
 
 extern void purge_kernel_dcache_page_asm(unsigned long);
@@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
 }
 EXPORT_SYMBOL(copy_user_page);
 
-void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
-{
-	unsigned long flags;
-
-	/* Note: purge_tlb_entries can be called at startup with
-	   no context.  */
-
-	purge_tlb_start(flags);
-	mtsp(mm->context, 1);
-	pdtlb(addr);
-	pitlb(addr);
-	purge_tlb_end(flags);
-}
-EXPORT_SYMBOL(purge_tlb_entries);
-
-void __flush_tlb_range(unsigned long sid, unsigned long start,
-		       unsigned long end)
+/* __flush_tlb_range()
+ *
+ * returns 1 if all TLBs were flushed.
+ */
+int __flush_tlb_range(unsigned long sid, unsigned long start,
+		      unsigned long end)
 {
-	unsigned long npages;
+	unsigned long flags, size;
 
-	npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	if (npages >= 512)  /* 2MB of space: arbitrary, should be tuned */
+	size = (end - start);
+	if (size >= parisc_tlb_flush_threshold) {
 		flush_tlb_all();
-	else {
-		unsigned long flags;
+		return 1;
+	}
 
+	/* Purge TLB entries for small ranges using the pdtlb and
+	   pitlb instructions.  These instructions execute locally
+	   but cause a purge request to be broadcast to other TLBs.  */
+	if (likely(!split_tlb)) {
+		while (start < end) {
+			purge_tlb_start(flags);
+			mtsp(sid, 1);
+			pdtlb(start);
+			purge_tlb_end(flags);
+			start += PAGE_SIZE;
+		}
+		return 0;
+	}
+
+	/* split TLB case */
+	while (start < end) {
 		purge_tlb_start(flags);
 		mtsp(sid, 1);
-		if (split_tlb) {
-			while (npages--) {
-				pdtlb(start);
-				pitlb(start);
-				start += PAGE_SIZE;
-			}
-		} else {
-			while (npages--) {
-				pdtlb(start);
-				start += PAGE_SIZE;
-			}
-		}
+		pdtlb(start);
+		pitlb(start);
 		purge_tlb_end(flags);
+		start += PAGE_SIZE;
 	}
+	return 0;
 }
 
 static void cacheflush_h_tmp_function(void *dummy)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 7581961..c5ef408 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -45,7 +45,7 @@
 	.level 2.0
 #endif
 
-	.import         pa_dbit_lock,data
+	.import		pa_tlb_lock,data
 
 	/* space_to_prot macro creates a prot id from a space id */
 
@@ -420,8 +420,8 @@
 	SHLREG		%r9,PxD_VALUE_SHIFT,\pmd
 	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
 	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
-	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd
-	LDREG		%r0(\pmd),\pte		/* pmd is now pte */
+	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
+	LDREG		%r0(\pmd),\pte
 	bb,>=,n		\pte,_PAGE_PRESENT_BIT,\fault
 	.endm
 
@@ -453,57 +453,53 @@
 	L2_ptep		\pgd,\pte,\index,\va,\fault
 	.endm
 
-	/* Acquire pa_dbit_lock lock. */
-	.macro		dbit_lock	spc,tmp,tmp1
+	/* Acquire pa_tlb_lock lock and recheck page is still present. */
+	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
 	cmpib,COND(=),n	0,\spc,2f
-	load32		PA(pa_dbit_lock),\tmp
+	load32		PA(pa_tlb_lock),\tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
 	nop
+	LDREG		0(\ptp),\pte
+	bb,<,n		\pte,_PAGE_PRESENT_BIT,2f
+	b		\fault
+	stw		 \spc,0(\tmp)
 2:
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock without reloading lock address. */
-	.macro		dbit_unlock0	spc,tmp
+	/* Release pa_tlb_lock lock without reloading lock address. */
+	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
 	or,COND(=)	%r0,\spc,%r0
 	stw             \spc,0(\tmp)
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock. */
-	.macro		dbit_unlock1	spc,tmp
+	/* Release pa_tlb_lock lock. */
+	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-	load32		PA(pa_dbit_lock),\tmp
-	dbit_unlock0	\spc,\tmp
+	load32		PA(pa_tlb_lock),\tmp
+	tlb_unlock0	\spc,\tmp
 #endif
 	.endm
 
 	/* Set the _PAGE_ACCESSED bit of the PTE.  Be clever and
 	 * don't needlessly dirty the cache line if it was already set */
-	.macro		update_ptep	spc,ptep,pte,tmp,tmp1
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_accessed	ptp,pte,tmp,tmp1
 	ldi		_PAGE_ACCESSED,\tmp1
 	or		\tmp1,\pte,\tmp
 	and,COND(<>)	\tmp1,\pte,%r0
-	STREG		\tmp,0(\ptep)
+	STREG		\tmp,0(\ptp)
 	.endm
 
 	/* Set the dirty bit (and accessed bit).  No need to be
 	 * clever, this is only used from the dirty fault */
-	.macro		update_dirty	spc,ptep,pte,tmp
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_dirty	ptp,pte,tmp
 	ldi		_PAGE_ACCESSED|_PAGE_DIRTY,\tmp
 	or		\tmp,\pte,\pte
-	STREG		\pte,0(\ptep)
+	STREG		\pte,0(\ptp)
 	.endm
 
 	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
@@ -1148,14 +1144,14 @@ dtlb_miss_20w:
 
 	L3_ptep		ptp,pte,t0,va,dtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1174,14 +1170,14 @@ nadtlb_miss_20w:
 
 	L3_ptep		ptp,pte,t0,va,nadtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1202,20 +1198,20 @@ dtlb_miss_11:
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1235,21 +1231,20 @@ nadtlb_miss_11:
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1269,16 +1264,16 @@ dtlb_miss_20:
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1297,16 +1292,16 @@ nadtlb_miss_20:
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock1	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1406,14 +1401,14 @@ itlb_miss_20w:
 
 	L3_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1430,14 +1425,14 @@ naitlb_miss_20w:
 
 	L3_ptep		ptp,pte,t0,va,naitlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1458,20 +1453,20 @@ itlb_miss_11:
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1482,20 +1477,20 @@ naitlb_miss_11:
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1516,16 +1511,16 @@ itlb_miss_20:
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0	
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1536,16 +1531,16 @@ naitlb_miss_20:
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1568,14 +1563,14 @@ dbit_trap_20w:
 
 	L3_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 		
 	idtlbt          pte,prot
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #else
@@ -1588,8 +1583,8 @@ dbit_trap_11:
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
@@ -1600,8 +1595,8 @@ dbit_trap_11:
 	idtlbp		prot,(%sr1,va)
 
 	mtsp            t1, %sr1     /* Restore sr1 */
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 
@@ -1612,16 +1607,16 @@ dbit_trap_20:
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock0	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 6548fd1..b99b39f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -43,10 +43,6 @@
 
 #include "../math-emu/math-emu.h"	/* for handle_fpe() */
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-DEFINE_SPINLOCK(pa_dbit_lock);
-#endif
-
 static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
 	struct pt_regs *regs);
 
diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
index d0ece25..04c7e8f 100644
--- a/arch/powerpc/include/asm/mpc52xx_psc.h
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -150,7 +150,10 @@
 
 /* Structure of the hardware registers */
 struct mpc52xx_psc {
-	u8		mode;		/* PSC + 0x00 */
+	union {
+		u8	mode;		/* PSC + 0x00 */
+		u8	mr2;
+	};
 	u8		reserved0[3];
 	union {				/* PSC + 0x04 */
 		u16	status;
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072..523673d 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,8 +28,6 @@
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 
-#define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
-
 #ifdef CONFIG_PPC64
 /* use 0x800000yy when locked, where yy == CPU number */
 #ifdef __BIG_ENDIAN__
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 58abeda..15cca17 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -29,6 +29,7 @@ static inline void save_early_sprs(struct thread_struct *prev) {}
 
 extern void enable_kernel_fp(void);
 extern void enable_kernel_altivec(void);
+extern void enable_kernel_vsx(void);
 extern int emulate_altivec(struct pt_regs *);
 extern void __giveup_vsx(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index ccde8f0..112ccf4 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -52,6 +52,22 @@
 	.text
 
 /*
+ * Used by threads when the lock bit of core_idle_state is set.
+ * Threads will spin in HMT_LOW until the lock bit is cleared.
+ * r14 - pointer to core_idle_state
+ * r15 - used to load contents of core_idle_state
+ */
+
+core_idle_lock_held:
+	HMT_LOW
+3:	lwz	r15,0(r14)
+	andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	3b
+	HMT_MEDIUM
+	lwarx	r15,0,r14
+	blr
+
+/*
  * Pass requested state in r3:
  *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
@@ -150,6 +166,10 @@ power7_enter_nap_mode:
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
 lwarx_loop1:
 	lwarx	r15,0,r14
+
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bnel	core_idle_lock_held
+
 	andc	r15,r15,r7			/* Clear thread bit */
 
 	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
@@ -294,7 +314,7 @@ lwarx_loop2:
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
-	bne	core_idle_lock_held
+	bnel	core_idle_lock_held
 
 	cmpwi	cr2,r15,0
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
@@ -319,15 +339,6 @@ lwarx_loop2:
 	isync
 	b	common_exit
 
-core_idle_lock_held:
-	HMT_LOW
-core_idle_lock_loop:
-	lwz	r15,0(14)
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bne	core_idle_lock_loop
-	HMT_MEDIUM
-	b	lwarx_loop2
-
 first_thread_in_subcore:
 	/* First thread in subcore to wakeup */
 	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index b9de34d..02c1d5d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1044,13 +1044,7 @@ void pcibios_set_master(struct pci_dev *dev)
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
-	/* When called from the generic PCI probe, read PCI<->PCI bridge
-	 * bases. This is -not- called when generating the PCI tree from
-	 * the OF device-tree.
-	 */
-	pci_read_bridge_bases(bus);
-
-	/* Now fixup the bus bus */
+	/* Fixup the bus */
 	pcibios_setup_bus_self(bus);
 
 	/* Now fixup devices on that bus */
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 42e02a2..c8c62c7 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -126,7 +126,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 {
 	struct pci_dev *dev;
 	const char *type;
-	struct pci_slot *slot;
 
 	dev = pci_alloc_dev(bus);
 	if (!dev)
@@ -145,10 +144,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->needs_freset = 0;		/* pcie fundamental reset required */
 	set_pcie_port_type(dev);
 
-	list_for_each_entry(slot, &dev->bus->slots, list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
-			dev->slot = slot;
-
+	pci_dev_assign_slot(dev);
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
 	dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
@@ -191,6 +187,9 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 
 	pci_device_add(dev, bus);
 
+	/* Setup MSI caps & disable MSI/MSI-X interrupts */
+	pci_msi_setup_pci_dev(dev);
+
 	return dev;
 }
 EXPORT_SYMBOL(of_create_pci_dev);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8005e18..64e6e9d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -204,8 +204,6 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-#if 0
-/* not currently used, but some crazy RAID module might want to later */
 void enable_kernel_vsx(void)
 {
 	WARN_ON(preemptible());
@@ -220,7 +218,6 @@ void enable_kernel_vsx(void)
 #endif /* CONFIG_SMP */
 }
 EXPORT_SYMBOL(enable_kernel_vsx);
-#endif
 
 void giveup_vsx(struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index d3a831a..da50e0c 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
 
 int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
 {
-	memset(to, 0, sizeof *to);
-
 	if (copy_from_user(to, from, 3*sizeof(int)) ||
 	    copy_from_user(to->_sifields._pad,
 			   from->_sifields._pad, SI_PAD_SIZE32))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6530f1b..37de90f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -297,6 +297,8 @@ long machine_check_early(struct pt_regs *regs)
 
 	__this_cpu_inc(irq_stat.mce_exceptions);
 
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
 	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6d53597..a67c6d7 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -529,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 		printk(KERN_ALERT "Unable to handle kernel paging request for "
 			"instruction fetch\n");
 		break;
+	case 0x600:
+		printk(KERN_ALERT "Unable to handle kernel paging request for "
+			"unaligned access at address 0x%08lx\n", regs->dar);
+		break;
 	default:
 		printk(KERN_ALERT "Unable to handle kernel paging request for "
 			"unknown fault\n");
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index ec2eb20..df95629 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str)
 	if (!attr)
 		return NULL;
 
+	sysfs_attr_init(&attr->attr.attr);
+
 	attr->var = str;
 	attr->attr.attr.name = name;
 	attr->attr.attr.mode = 0444;
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index f691bca..c50ea76 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 #include <linux/device.h>
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 5cf5e6e..7cf0df8 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1478,7 +1478,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
 	}
 
 	/* Unmask the event */
-	if (eeh_enabled())
+	if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())
 		enable_irq(eeh_event_irq);
 
 	return ret;
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 4949ef0..37f959b 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -237,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
 	return elog;
 }
 
-static void elog_work_fn(struct work_struct *work)
+static irqreturn_t elog_event(int irq, void *data)
 {
 	__be64 size;
 	__be64 id;
@@ -251,7 +251,7 @@ static void elog_work_fn(struct work_struct *work)
 	rc = opal_get_elog_size(&id, &size, &type);
 	if (rc != OPAL_SUCCESS) {
 		pr_err("ELOG: OPAL log info read failed\n");
-		return;
+		return IRQ_HANDLED;
 	}
 
 	elog_size = be64_to_cpu(size);
@@ -270,16 +270,10 @@ static void elog_work_fn(struct work_struct *work)
 	 * entries.
 	 */
 	if (kset_find_obj(elog_kset, name))
-		return;
+		return IRQ_HANDLED;
 
 	create_elog_obj(log_id, elog_size, elog_type);
-}
-
-static DECLARE_WORK(elog_work, elog_work_fn);
 
-static irqreturn_t elog_event(int irq, void *data)
-{
-	schedule_work(&elog_work);
 	return IRQ_HANDLED;
 }
 
@@ -304,8 +298,8 @@ int __init opal_elog_init(void)
 		return irq;
 	}
 
-	rc = request_irq(irq, elog_event,
-			IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL);
+	rc = request_threaded_irq(irq, NULL, elog_event,
+			IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL);
 	if (rc) {
 		pr_err("%s: Can't request OPAL event irq (%d)\n",
 		       __func__, rc);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 46cb3fe..4ece8e4 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -112,6 +112,7 @@ static int opal_prd_open(struct inode *inode, struct file *file)
 static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	size_t addr, size;
+	pgprot_t page_prot;
 	int rc;
 
 	pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
@@ -125,13 +126,11 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!opal_prd_range_is_valid(addr, size))
 		return -EINVAL;
 
-	vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file,
-						vma->vm_pgoff,
-						 size, vma->vm_page_prot))
-					| _PAGE_SPECIAL);
+	page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+					 size, vma->vm_page_prot);
 
 	rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
-			vma->vm_page_prot);
+				page_prot);
 
 	return rc;
 }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5738d31..85cbc96 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2220,7 +2220,7 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
 
 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
 		unsigned levels, unsigned long limit,
-		unsigned long *current_offset)
+		unsigned long *current_offset, unsigned long *total_allocated)
 {
 	struct page *tce_mem = NULL;
 	__be64 *addr, *tmp;
@@ -2236,6 +2236,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
 	}
 	addr = page_address(tce_mem);
 	memset(addr, 0, allocated);
+	*total_allocated += allocated;
 
 	--levels;
 	if (!levels) {
@@ -2245,7 +2246,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
 
 	for (i = 0; i < entries; ++i) {
 		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
-				levels, limit, current_offset);
+				levels, limit, current_offset, total_allocated);
 		if (!tmp)
 			break;
 
@@ -2267,7 +2268,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 		struct iommu_table *tbl)
 {
 	void *addr;
-	unsigned long offset = 0, level_shift;
+	unsigned long offset = 0, level_shift, total_allocated = 0;
 	const unsigned window_shift = ilog2(window_size);
 	unsigned entries_shift = window_shift - page_shift;
 	unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
@@ -2286,7 +2287,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 
 	/* Allocate TCE table */
 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
-			levels, tce_table_size, &offset);
+			levels, tce_table_size, &offset, &total_allocated);
 
 	/* addr==NULL means that the first level allocation failed */
 	if (!addr)
@@ -2308,7 +2309,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 			page_shift);
 	tbl->it_level_size = 1ULL << (level_shift - 3);
 	tbl->it_indirect_levels = levels - 1;
-	tbl->it_allocated_size = offset;
+	tbl->it_allocated_size = total_allocated;
 
 	pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
 			window_size, tce_table_size, bus_offset);
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
index 2bc3367..87f9623 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
@@ -18,6 +18,7 @@
 #include <linux/pci.h>
 #include <linux/semaphore.h>
 #include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
 
 struct ppc4xx_hsta_msi {
 	struct device *dev;
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 2938934..e256592 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -6,3 +6,4 @@ obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
+obj-$(CONFIG_NUMA)		+= numa/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b06dc38..4827870 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -99,18 +99,22 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_DEVICES if !SMP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -153,6 +157,7 @@ config S390
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
 
+
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 
@@ -385,6 +390,76 @@ config HOTPLUG_CPU
 config SCHED_SMT
 	def_bool n
 
+# Some NUMA nodes have memory ranges that span
+# other nodes.	Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.	See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+	def_bool NUMA
+
+config NUMA
+	bool "NUMA support"
+	depends on SMP && 64BIT && SCHED_TOPOLOGY
+	default n
+	help
+	  Enable NUMA support
+
+	  This option adds NUMA support to the kernel.
+
+	  An operation mode can be selected by appending
+	  numa=<method> to the kernel command line.
+
+	  The default behaviour is identical to appending numa=plain to
+	  the command line. This will create just one node with all
+	  available memory and all CPUs in it.
+
+config NODES_SHIFT
+	int "Maximum NUMA nodes (as a power of 2)"
+	range 1 10
+	depends on NUMA
+	default "4"
+	help
+	  Specify the maximum number of NUMA nodes available on the target
+	  system. Increases memory reserved to accommodate various tables.
+
+menu "Select NUMA modes"
+	depends on NUMA
+
+config NUMA_EMU
+	bool "NUMA emulation"
+	default y
+	help
+	  Numa emulation mode will split the available system memory into
+	  equal chunks which then are distributed over the configured number
+	  of nodes in a round-robin manner.
+
+	  The number of fake nodes is limited by the number of available memory
+	  chunks (i.e. memory size / fake size) and the number of supported
+	  nodes in the kernel.
+
+	  The CPUs are assigned to the nodes in a way that partially respects
+	  the original machine topology (if supported by the machine).
+	  Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+	hex "NUMA emulation memory chunk size"
+	default 0x10000000
+	range 0x400000 0x100000000
+	depends on NUMA_EMU
+	help
+	  Select the default size by which the memory is chopped and then
+	  assigned to emulated NUMA nodes.
+
+	  This can be overridden by specifying
+
+	  emu_size=<n>
+
+	  on the kernel command line where also suffixes K, M, G, and T are
+	  supported.
+
+endmenu
+
 config SCHED_MC
 	def_bool n
 
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 667b1bc..e8d4423 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -33,6 +33,8 @@ mflags-$(CONFIG_MARCH_Z196)   := -march=z196
 mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
 mflags-$(CONFIG_MARCH_Z13)   := -march=z13
 
+export CC_FLAGS_MARCH := $(mflags-y)
+
 aflags-y += $(mflags-y)
 cflags-y += $(mflags-y)
 
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 940cbdd..0c98f15 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -50,6 +51,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index d793fec..82083e1 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -49,6 +50,7 @@ CONFIG_DEFAULT_DEADLINE=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 38a77e9..c05c9e0 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -13,6 +13,8 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -48,6 +50,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 5566ce8..0b9b95f 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -24,6 +24,7 @@
 #include <crypto/algapi.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include "crypt_s390.h"
@@ -976,7 +977,7 @@ static void __exit aes_s390_fini(void)
 	crypto_unregister_alg(&aes_alg);
 }
 
-module_init(aes_s390_init);
+module_cpu_feature_match(MSA, aes_s390_init);
 module_exit(aes_s390_fini);
 
 MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 9e05cc4..fba1c10 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/des.h>
@@ -616,7 +617,7 @@ static void __exit des_s390_exit(void)
 	crypto_unregister_alg(&des_alg);
 }
 
-module_init(des_s390_init);
+module_cpu_feature_match(MSA, des_s390_init);
 module_exit(des_s390_exit);
 
 MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index b258110..26e14ef 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -9,6 +9,7 @@
 
 #include <crypto/internal/hash.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 #include "crypt_s390.h"
 
@@ -158,7 +159,7 @@ static void __exit ghash_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_init(ghash_mod_init);
+module_cpu_feature_match(MSA, ghash_mod_init);
 module_exit(ghash_mod_exit);
 
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 9d5192c..b8045b9 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <asm/debug.h>
@@ -914,6 +915,5 @@ static void __exit prng_exit(void)
 	}
 }
 
-
-module_init(prng_init);
+module_cpu_feature_match(MSA, prng_init);
 module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5b2bee3..9208ead 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -26,6 +26,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
@@ -100,7 +101,7 @@ static void __exit sha1_s390_fini(void)
 	crypto_unregister_shash(&alg);
 }
 
-module_init(sha1_s390_init);
+module_cpu_feature_match(MSA, sha1_s390_init);
 module_exit(sha1_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index b74ff15..667888f 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -16,6 +16,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
@@ -140,7 +141,7 @@ static void __exit sha256_s390_fini(void)
 	crypto_unregister_shash(&sha256_alg);
 }
 
-module_init(sha256_s390_init);
+module_cpu_feature_match(MSA, sha256_s390_init);
 module_exit(sha256_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 0c36989..2ba66b1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 #include "sha.h"
 #include "crypt_s390.h"
@@ -148,7 +149,7 @@ static void __exit fini(void)
 	crypto_unregister_shash(&sha384_alg);
 }
 
-module_init(init);
+module_cpu_feature_match(MSA, init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index dc5385e..5ad26dd 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -3,5 +3,6 @@
 generic-y += clkdev.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 0000000..fa7e69b
--- /dev/null
+++ b/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags.  Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now.  Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES	(8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES	MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat)	ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index cfad7fca..17a3735 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 	__ctl_load(reg, cr, cr);
 }
 
+void __ctl_set_vx(void);
+
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
@@ -57,7 +59,10 @@ union ctlreg0 {
 		unsigned long lap  : 1; /* Low-address-protection control */
 		unsigned long	   : 4;
 		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
-		unsigned long	   : 23;
+		unsigned long	   : 4;
+		unsigned long afp  : 1; /* AFP-register control */
+		unsigned long vx   : 1; /* Vector enablement control */
+		unsigned long	   : 17;
 	};
 };
 
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index 629b79a..f7e5c36 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -214,6 +214,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
 void etr_switch_to_local(void);
 void etr_sync_check(void);
 
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
 /* STP interruption parameter */
 struct stp_irq_parm {
 	unsigned int _pad0	: 14;
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
new file mode 100644
index 0000000..55dc2c0
--- /dev/null
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -0,0 +1,110 @@
+/*
+ * General floating pointer and vector register helpers
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#define FPU_USE_VX		1	/* Vector extension is active */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm/linkage.h>
+#include <asm/ctl_reg.h>
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;			/* Floating-point control */
+	__u32 flags;
+	union {
+		void *regs;
+		freg_t *fprs;		/* Floating-point register save area */
+		__vector128 *vxrs;	/* Vector register save area */
+	};
+};
+
+void save_fpu_regs(void);
+
+#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
+#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(struct vx_array *) vxrs) : : "1");
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		*(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	if (is_vx_fpu(fpu))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+	else
+		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	if (is_vx_fpu(fpu))
+		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+	else
+		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+#endif
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 0130d03..d9be7c0 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -14,6 +14,7 @@
 
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range			free_pgd_range
+#define hugepages_supported()			(MACHINE_HAS_HPAGE)
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3024acb..3d012e0 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
+#include <asm/fpu-internal.h>
 #include <asm/isc.h>
 
 #define KVM_MAX_VCPUS 64
@@ -258,6 +259,9 @@ struct kvm_vcpu_stat {
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
+	u32 diagnose_258;
+	u32 diagnose_308;
+	u32 diagnose_500;
 };
 
 #define PGM_OPERATION			0x01
@@ -498,10 +502,9 @@ struct kvm_guestdbg_info_arch {
 
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
-	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
-	s390_fp_regs      guest_fpregs;
-	struct kvm_s390_vregs	*host_vregs;
+	struct fpu	  host_fpregs;
+	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
@@ -630,7 +633,6 @@ extern char sie_exit;
 
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
-static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index fc8a828..27da78c 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -6,4 +6,26 @@
 #define __ALIGN .align 4, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)	\
+	".section __ex_table,\"a\"\n"	\
+	".align	4\n"			\
+	".long	(" #_fault ") - .\n"	\
+	".long	(" #_target ") - .\n"	\
+	".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target)	\
+	.section __ex_table,"a"	;	\
+	.align	4 ;			\
+	.long	(_fault) - . ;		\
+	.long	(_target) - . ;		\
+	.previous
+
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/mm-arch-hooks.h b/arch/s390/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 07680b2..0000000
--- a/arch/s390/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_S390_MM_ARCH_HOOKS_H
-#define _ASM_S390_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_S390_MM_ARCH_HOOKS_H */
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
new file mode 100644
index 0000000..a9e834e
--- /dev/null
+++ b/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,16 @@
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
new file mode 100644
index 0000000..2a0efc6
--- /dev/null
+++ b/arch/s390/include/asm/numa.h
@@ -0,0 +1,35 @@
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index dd34523..53eacbd 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -17,10 +17,7 @@
 #define PAGE_DEFAULT_ACC	0
 #define PAGE_DEFAULT_KEY	(PAGE_DEFAULT_ACC << 4)
 
-#include <asm/setup.h>
-#ifndef __ASSEMBLY__
-
-extern int HPAGE_SHIFT;
+#define HPAGE_SHIFT	20
 #define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
@@ -30,6 +27,9 @@ extern int HPAGE_SHIFT;
 #define ARCH_HAS_PREPARE_HUGEPAGE
 #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
 
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
 static inline void storage_key_init_range(unsigned long start, unsigned long end)
 {
 #if PAGE_DEFAULT_KEY
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a648338..34d9603 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -170,7 +170,11 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
 #endif /* CONFIG_HOTPLUG_PCI_S390 */
 
 /* Helpers */
-struct zpci_dev *get_zdev(struct pci_dev *);
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+	return pdev->sysdata;
+}
+
 struct zpci_dev *get_zdev_by_fid(u32);
 
 /* DMA */
@@ -188,4 +192,20 @@ void zpci_debug_init_device(struct zpci_dev *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+	return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
 #endif
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 4cb19fe..f897ec7 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -87,7 +87,15 @@ struct sf_raw_sample {
 } __packed;
 
 /* Perf hardware reserve and release functions */
+#ifdef CONFIG_PERF_EVENTS
 int perf_reserve_sampling(void);
 void perf_release_sampling(void);
+#else /* CONFIG_PERF_EVENTS */
+static inline int perf_reserve_sampling(void)
+{
+	return 0;
+}
+static inline void perf_release_sampling(void) {}
+#endif /* CONFIG_PERF_EVENTS */
 
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f66d827..bdb2f51 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -576,6 +576,19 @@ static inline int pte_same(pte_t a, pte_t b)
 	return pte_val(a) == pte_val(b);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+	return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	/* pmd_large(pmd) implies pmd_present(pmd) */
+	return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
 	unsigned long new = 0;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dedb621..085fb0d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,10 +14,12 @@
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+#define CIF_FPU			3	/* restore vector registers */
 
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_ASCE		(1<<CIF_ASCE)
 #define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
+#define _CIF_FPU		(1<<CIF_FPU)
 
 #ifndef __ASSEMBLY__
 
@@ -28,6 +30,7 @@
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
+#include <asm/fpu-internal.h>
 
 static inline void set_cpu_flag(int flag)
 {
@@ -85,7 +88,7 @@ typedef struct {
  * Thread structure
  */
 struct thread_struct {
-	s390_fp_regs fp_regs;
+	struct fpu fpu;			/* FP and VX register save area */
 	unsigned int  acrs[NUM_ACRS];
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
@@ -101,7 +104,6 @@ struct thread_struct {
 	struct runtime_instr_cb *ri_cb;
 	int ri_signum;
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
-	__vector128 *vxrs;		/* Vector register save area */
 };
 
 /* Flag to disable transactions. */
@@ -231,6 +233,17 @@ static inline void __load_psw_mask (unsigned long mask)
 }
 
 /*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+	unsigned int reg1, reg2;
+
+	asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
+/*
  * Rewind PSW instruction address by specified number of bytes.
  */
 static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
@@ -336,25 +349,6 @@ extern void memcpy_absolute(void *, void *, size_t);
 	memcpy_absolute(&(dest), &__tmp, sizeof(__tmp));	\
 }
 
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target)	\
-	".section __ex_table,\"a\"\n"	\
-	".align	4\n"			\
-	".long	(" #_fault ") - .\n"	\
-	".long	(" #_target ") - .\n"	\
-	".previous\n"
-
-#else /* __ASSEMBLY__ */
-
-#define EX_TABLE(_fault, _target)	\
-	.section __ex_table,"a"	;	\
-	.align	4 ;			\
-	.long	(_fault) - . ;		\
-	.long	(_target) - . ;		\
-	.previous
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index f6ff060..821dde5 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,6 @@ int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 void sclp_early_detect(void);
-long _sclp_print_early(const char *);
+int _sclp_print_early(const char *);
 
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index d62e7a6..dcadfde 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,139 +8,12 @@
 #define __ASM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
+#include <asm/fpu-internal.h>
 #include <asm/ptrace.h>
 
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
 
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_ctl(u32 *fpc)
-{
-	asm volatile(
-		"       stfpc   %0\n"
-		: "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
-	int rc;
-
-	asm volatile(
-		"	lfpc    %1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
-	asm volatile("std 0,%0" : "=Q" (fprs[0]));
-	asm volatile("std 2,%0" : "=Q" (fprs[2]));
-	asm volatile("std 4,%0" : "=Q" (fprs[4]));
-	asm volatile("std 6,%0" : "=Q" (fprs[6]));
-	asm volatile("std 1,%0" : "=Q" (fprs[1]));
-	asm volatile("std 3,%0" : "=Q" (fprs[3]));
-	asm volatile("std 5,%0" : "=Q" (fprs[5]));
-	asm volatile("std 7,%0" : "=Q" (fprs[7]));
-	asm volatile("std 8,%0" : "=Q" (fprs[8]));
-	asm volatile("std 9,%0" : "=Q" (fprs[9]));
-	asm volatile("std 10,%0" : "=Q" (fprs[10]));
-	asm volatile("std 11,%0" : "=Q" (fprs[11]));
-	asm volatile("std 12,%0" : "=Q" (fprs[12]));
-	asm volatile("std 13,%0" : "=Q" (fprs[13]));
-	asm volatile("std 14,%0" : "=Q" (fprs[14]));
-	asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
-	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
-	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
-	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
-	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
-	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
-	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
-	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
-	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
-	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
-	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
-	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
-	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
-	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
-	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
-	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
-	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
-	unsigned long cr0, flags;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_set_bit(0, 17);
-	__ctl_set_bit(0, 18);
-	save_vx_regs(vxrs);
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		: : "Q" (*(addrtype *) vxrs) : "1");
-}
-
-static inline void save_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		save_vx_regs(task->thread.vxrs);
-	else
-		save_fp_regs(task->thread.fp_regs.fprs);
-}
-
-static inline void restore_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		restore_vx_regs(task->thread.vxrs);
-	else
-		restore_fp_regs(task->thread.fp_regs.fprs);
-}
-
 static inline void save_access_regs(unsigned int *acrs)
 {
 	typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -157,15 +30,13 @@ static inline void restore_access_regs(unsigned int *acrs)
 
 #define switch_to(prev,next,last) do {					\
 	if (prev->mm) {							\
-		save_fp_ctl(&prev->thread.fp_regs.fpc);			\
-		save_fp_vx_regs(prev);					\
+		save_fpu_regs();					\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
 	}								\
 	if (next->mm) {							\
 		update_cr_regs(next);					\
-		restore_fp_ctl(&next->thread.fp_regs.fpc);		\
-		restore_fp_vx_regs(next);				\
+		set_cpu_flag(CIF_FPU);					\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 	}								\
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 4990f6c..27ebde6 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -2,6 +2,7 @@
 #define _ASM_S390_TOPOLOGY_H
 
 #include <linux/cpumask.h>
+#include <asm/numa.h>
 
 struct sysinfo_15_1_x;
 struct cpu;
@@ -13,6 +14,7 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short book_id;
+	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
@@ -52,6 +54,43 @@ static inline void topology_expect_change(void) { }
 #define POLARIZATION_VM		(2)
 #define POLARIZATION_VH		(3)
 
+#define SD_BOOK_INIT	SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+	return per_cpu(cpu_topology, cpu).node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/*
+ * Returns the number of the node containing node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 91f56b1..525cef7 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -11,16 +11,24 @@
 
 #define __IGNORE_time
 
-/* Ignore NUMA system calls. Not wired up on s390. */
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
-
-/* Ignore system calls that are also reachable via sys_socket */
+/* Ignore system calls that are also reachable via sys_socketcall */
 #define __IGNORE_recvmmsg
 #define __IGNORE_sendmmsg
+#define __IGNORE_socket
+#define __IGNORE_socketpair
+#define __IGNORE_bind
+#define __IGNORE_connect
+#define __IGNORE_listen
+#define __IGNORE_accept4
+#define __IGNORE_getsockopt
+#define __IGNORE_setsockopt
+#define __IGNORE_getsockname
+#define __IGNORE_getpeername
+#define __IGNORE_sendto
+#define __IGNORE_sendmsg
+#define __IGNORE_recvfrom
+#define __IGNORE_recvmsg
+#define __IGNORE_shutdown
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 0000000..4a31356
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,480 @@
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+#define REG_NUM_INVALID	       255
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd:	Operand to store register number
+ * @r64:	String designation register in the format "%rN"
+ */
+.macro	GR_NUM	opd gr
+	\opd = REG_NUM_INVALID
+	.ifc \gr,%r0
+		\opd = 0
+	.endif
+	.ifc \gr,%r1
+		\opd = 1
+	.endif
+	.ifc \gr,%r2
+		\opd = 2
+	.endif
+	.ifc \gr,%r3
+		\opd = 3
+	.endif
+	.ifc \gr,%r4
+		\opd = 4
+	.endif
+	.ifc \gr,%r5
+		\opd = 5
+	.endif
+	.ifc \gr,%r6
+		\opd = 6
+	.endif
+	.ifc \gr,%r7
+		\opd = 7
+	.endif
+	.ifc \gr,%r8
+		\opd = 8
+	.endif
+	.ifc \gr,%r9
+		\opd = 9
+	.endif
+	.ifc \gr,%r10
+		\opd = 10
+	.endif
+	.ifc \gr,%r11
+		\opd = 11
+	.endif
+	.ifc \gr,%r12
+		\opd = 12
+	.endif
+	.ifc \gr,%r13
+		\opd = 13
+	.endif
+	.ifc \gr,%r14
+		\opd = 14
+	.endif
+	.ifc \gr,%r15
+		\opd = 15
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid general-purpose register designation: \gr"
+	.endif
+.endm
+
+/* VX_R() - Macro to encode the VX_NUM into the instruction */
+#define VX_R(v)		(v & 0x0F)
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd:	Operand to store register number
+ * @vxr:	String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction.  To encode the particular vector register number,
+ * use the VX_R(v) macro to extract the instruction opcode.
+ */
+.macro	VX_NUM	opd vxr
+	\opd = REG_NUM_INVALID
+	.ifc \vxr,%v0
+		\opd = 0
+	.endif
+	.ifc \vxr,%v1
+		\opd = 1
+	.endif
+	.ifc \vxr,%v2
+		\opd = 2
+	.endif
+	.ifc \vxr,%v3
+		\opd = 3
+	.endif
+	.ifc \vxr,%v4
+		\opd = 4
+	.endif
+	.ifc \vxr,%v5
+		\opd = 5
+	.endif
+	.ifc \vxr,%v6
+		\opd = 6
+	.endif
+	.ifc \vxr,%v7
+		\opd = 7
+	.endif
+	.ifc \vxr,%v8
+		\opd = 8
+	.endif
+	.ifc \vxr,%v9
+		\opd = 9
+	.endif
+	.ifc \vxr,%v10
+		\opd = 10
+	.endif
+	.ifc \vxr,%v11
+		\opd = 11
+	.endif
+	.ifc \vxr,%v12
+		\opd = 12
+	.endif
+	.ifc \vxr,%v13
+		\opd = 13
+	.endif
+	.ifc \vxr,%v14
+		\opd = 14
+	.endif
+	.ifc \vxr,%v15
+		\opd = 15
+	.endif
+	.ifc \vxr,%v16
+		\opd = 16
+	.endif
+	.ifc \vxr,%v17
+		\opd = 17
+	.endif
+	.ifc \vxr,%v18
+		\opd = 18
+	.endif
+	.ifc \vxr,%v19
+		\opd = 19
+	.endif
+	.ifc \vxr,%v20
+		\opd = 20
+	.endif
+	.ifc \vxr,%v21
+		\opd = 21
+	.endif
+	.ifc \vxr,%v22
+		\opd = 22
+	.endif
+	.ifc \vxr,%v23
+		\opd = 23
+	.endif
+	.ifc \vxr,%v24
+		\opd = 24
+	.endif
+	.ifc \vxr,%v25
+		\opd = 25
+	.endif
+	.ifc \vxr,%v26
+		\opd = 26
+	.endif
+	.ifc \vxr,%v27
+		\opd = 27
+	.endif
+	.ifc \vxr,%v28
+		\opd = 28
+	.endif
+	.ifc \vxr,%v29
+		\opd = 29
+	.endif
+	.ifc \vxr,%v30
+		\opd = 30
+	.endif
+	.ifc \vxr,%v31
+		\opd = 31
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid vector register designation: \vxr"
+	.endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb:	Operand to store computed RXB value
+ * @v1:		First vector register designated operand
+ * @v2:		Second vector register designated operand
+ * @v3:		Third vector register designated operand
+ * @v4:		Fourth vector register designated operand
+ */
+.macro	RXB	rxb v1 v2=0 v3=0 v4=0
+	\rxb = 0
+	.if \v1 & 0x10
+		\rxb = \rxb | 0x08
+	.endif
+	.if \v2 & 0x10
+		\rxb = \rxb | 0x04
+	.endif
+	.if \v3 & 0x10
+		\rxb = \rxb | 0x02
+	.endif
+	.if \v4 & 0x10
+		\rxb = \rxb | 0x01
+	.endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m:		Element size control
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXB	m v1 v2=0 v3=0 v4=0
+	rxb = 0
+	RXB	rxb, \v1, \v2, \v3, \v4
+	.byte	(\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m:		Element size control
+ * @opc:	Opcode
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
+	MRXB	\m, \v1, \v2, \v3, \v4
+	.byte	\opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro	VGBM	vr imm2
+	VX_NUM	v1, \vr
+	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	\imm2
+	MRXBOPC	0, 0x44, v1
+.endm
+.macro	VZERO	vxr
+	VGBM	\vxr, 0
+.endm
+.macro	VONE	vxr
+	VGBM	\vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro	VLVG	v, gr, disp, m
+	VX_NUM	v1, \v
+	GR_NUM	b2, "%r0"
+	GR_NUM	r3, \gr
+	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x22, v1
+.endm
+.macro	VLVGB	v, gr, index, base
+	VLVG	\v, \gr, \index, \base, 0
+.endm
+.macro	VLVGH	v, gr, index
+	VLVG	\v, \gr, \index, 1
+.endm
+.macro	VLVGF	v, gr, index
+	VLVG	\v, \gr, \index, 2
+.endm
+.macro	VLVGG	v, gr, index
+	VLVG	\v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD */
+.macro	VL	v, disp, index="%r0", base
+	VX_NUM	v1, \v
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro	VLEx	vr1, disp, index="%r0", base, m3, opc
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEB	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro	VLEH	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro	VLEF	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro	VLEG	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro	VLEIx	vr1, imm2, m3, opc
+	VX_NUM	v1, \vr1
+	.word	0xE700 | (VX_R(v1) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEIB	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x40
+.endm
+.macro	VLEIH	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x41
+.endm
+.macro	VLEIF	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x43
+.endm
+.macro	VLEIG	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro	VLGV	gr, vr, disp, base="%r0", m
+	GR_NUM	r1, \gr
+	GR_NUM	b2, \base
+	VX_NUM	v3, \vr
+	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x21, v3
+.endm
+.macro	VLGVB	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 0
+.endm
+.macro	VLGVH	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 1
+.endm
+.macro	VLGVF	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 2
+.endm
+.macro	VLGVG	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro	VLM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro	VSTM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro	VPERM	vr1, vr2, vr3, vr4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro	VUPLL	vr1, vr2, m3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0x0000
+	MRXBOPC	\m3, 0xD4, v1, v2
+.endm
+.macro	VUPLLB	vr1, vr2
+	VUPLL	\vr1, \vr2, 0
+.endm
+.macro	VUPLLH	vr1, vr2
+	VUPLL	\vr1, \vr2, 1
+.endm
+.macro	VUPLLF	vr1, vr2
+	VUPLL	\vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR EXCLUSIVE OR */
+.macro	VX	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro	VGFM	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	\m4, 0xB4, v1, v2, v3
+.endm
+.macro	VGFMB	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VGFMH	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VGFMF	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VGFMG	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro	VGFMA	vr1, vr2, vr3, vr4, m5
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12) | (\m5 << 8)
+	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+.endm
+.macro	VGFMAB	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro	VGFMAH	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro	VGFMAF	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro	VGFMAG	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro	VSRLB	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x7D, v1, v2, v3
+.endm
+
+
+#endif	/* __ASSEMBLY__ */
+#endif	/* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 67878af..59d2bb4 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -204,9 +204,9 @@
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
 #define __NR_remap_file_pages	267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mbind		268
+#define __NR_get_mempolicy	269
+#define __NR_set_mempolicy	270
 #define __NR_mq_open		271
 #define __NR_mq_unlink		272
 #define __NR_mq_timedsend	273
@@ -223,7 +223,7 @@
 #define __NR_inotify_init	284
 #define __NR_inotify_add_watch	285
 #define __NR_inotify_rm_watch	286
-/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_migrate_pages	287
 #define __NR_openat		288
 #define __NR_mkdirat		289
 #define __NR_mknodat		290
@@ -245,7 +245,7 @@
 #define __NR_sync_file_range	307
 #define __NR_tee		308
 #define __NR_vmsplice		309
-/* Number 310 is reserved for new sys_move_pages */
+#define __NR_move_pages		310
 #define __NR_getcpu		311
 #define __NR_epoll_pwait	312
 #define __NR_utimes		313
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index ffb8761..b756c63 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -28,6 +28,17 @@ CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 CFLAGS_sysinfo.o += -w
 
+#
+# Use -march=z900 for sclp.c to be able to print an error message if
+# the kernel is started on a machine which is too old
+#
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o	+= -march=z900
+endif
+GCOV_PROFILE_sclp.o := n
+
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c7d1b9d..48c9af7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -23,15 +23,18 @@
 
 int main(void)
 {
-	DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
-	DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
-	DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
-	BLANK();
+	DEFINE(__TASK_thread_info, offsetof(struct task_struct, stack));
+	DEFINE(__TASK_thread, offsetof(struct task_struct, thread));
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
-	DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
-	DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
-	DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
+	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
+	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
+	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
+	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
+	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
+	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
+	DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
 	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
 	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
@@ -176,7 +179,6 @@ int main(void)
 	DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
 	DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
 	DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
-	DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
 	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
 	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
 	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index bff5e3b..8ba3243 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -138,6 +138,8 @@ int init_cache_level(unsigned int cpu)
 	union cache_topology ct;
 	enum cache_type ctype;
 
+	if (!test_facility(34))
+		return -EOPNOTSUPP;
 	if (!this_cpu_ci)
 		return -EINVAL;
 	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index fe8d692..eb46642 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -153,33 +153,14 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 /* Store registers needed to create the signal frame */
 static void store_sigregs(void)
 {
-	int i;
-
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs();
 }
 
 /* Load registers after signal return */
 static void load_sigregs(void)
 {
-	int i;
-
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
-	if (current->thread.vxrs) {
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -196,8 +177,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
 		return -EFAULT;
 	return 0;
@@ -217,8 +197,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
 		return -EINVAL;
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -235,9 +215,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
-
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -258,13 +236,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -286,15 +264,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	return 0;
 }
@@ -308,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
 	set_current_blocked(&set);
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -330,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -472,7 +452,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3238893..247b7aa 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,6 +20,8 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
+#include <asm/fpu-internal.h>
+#include <asm/vx-insn.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -46,10 +48,10 @@ _TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		   _TIF_UPROBE)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
 _PIF_WORK	= (_PIF_PER_TRAP)
 
-#define BASED(name) name-system_call(%r13)
+#define BASED(name) name-cleanup_critical(%r13)
 
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -73,38 +75,6 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.+8
-	.insn	s,0xb2800000,\newpp
-#endif
-	.endm
-
-	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	.+62
-	lgr	\scratch,%r9
-	slg	\scratch,BASED(.Lsie_critical)
-	clg	\scratch,BASED(.Lsie_critical_length)
-	.if	\reason==1
-	# Some program interrupts are suppressing (e.g. protection).
-	# We must also check the instruction after SIE in that case.
-	# do_protection_exception will rewind to .Lrewind_pad
-	jh	.+42
-	.else
-	jhe	.+42
-	.endif
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
-#endif
-	.endm
-
 	.macro	CHECK_STACK stacksize,savearea
 #ifdef CONFIG_CHECK_STACK
 	tml	%r15,\stacksize - CONFIG_STACK_GUARD
@@ -113,7 +83,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro	SWITCH_ASYNC savearea,stack,shift
+	.macro	SWITCH_ASYNC savearea,timer
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
 	lgr	%r14,%r9
@@ -124,26 +94,28 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	brasl	%r14,cleanup_critical
 	tmhh	%r8,0x0001		# retest problem state after cleanup
 	jnz	1f
-0:	lg	%r14,\stack		# are we already on the target stack?
+0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the async stack?
 	slgr	%r14,%r15
-	srag	%r14,%r14,\shift
-	jnz	1f
-	CHECK_STACK 1<<\shift,\savearea
+	srag	%r14,%r14,STACK_SHIFT
+	jnz	2f
+	CHECK_STACK 1<<STACK_SHIFT,\savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	lg	%r15,\stack		# load target stack
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	j	3f
+1:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,\timer
+2:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
-	.macro UPDATE_VTIME scratch,enter_timer
-	lg	\scratch,__LC_EXIT_TIMER
-	slg	\scratch,\enter_timer
-	alg	\scratch,__LC_USER_TIMER
-	stg	\scratch,__LC_USER_TIMER
-	lg	\scratch,__LC_LAST_UPDATE_TIMER
-	slg	\scratch,__LC_EXIT_TIMER
-	alg	\scratch,__LC_SYSTEM_TIMER
-	stg	\scratch,__LC_SYSTEM_TIMER
+	.macro UPDATE_VTIME w1,w2,enter_timer
+	lg	\w1,__LC_EXIT_TIMER
+	lg	\w2,__LC_LAST_UPDATE_TIMER
+	slg	\w1,\enter_timer
+	slg	\w2,__LC_EXIT_TIMER
+	alg	\w1,__LC_USER_TIMER
+	alg	\w2,__LC_SYSTEM_TIMER
+	stg	\w1,__LC_USER_TIMER
+	stg	\w2,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	.endm
 
@@ -178,21 +150,88 @@ _PIF_WORK	= (_PIF_PER_TRAP)
  */
 ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
-	stg	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
-	lg	%r4,__THREAD_info(%r2)		# get thread_info of prev
-	lg	%r5,__THREAD_info(%r3)		# get thread_info of next
+	lgr	%r1,%r2
+	aghi	%r1,__TASK_thread		# thread_struct of prev task
+	lg	%r4,__TASK_thread_info(%r2)	# get thread_info of prev
+	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
+	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
+	lgr	%r1,%r3
+	aghi	%r1,__TASK_thread		# thread_struct of next task
 	lgr	%r15,%r5
 	aghi	%r15,STACK_INIT			# end of kernel stack of next
 	stg	%r3,__LC_CURRENT		# store task struct of next
 	stg	%r5,__LC_THREAD_INFO		# store thread info of next
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	lg	%r15,__THREAD_ksp(%r1)		# load kernel stack of next
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
-	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
 .L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
+	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	jno	.Lsie_load_guest_gprs
+	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
+.Lsie_load_guest_gprs:
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),3		# last exit...
+	jnz	.Lsie_skip
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsie_skip			# exit if fp/vx regs changed
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_enter
+	.insn	s,0xb2800000,__LC_CURRENT_PID	# set guest id to pid
+.Lsie_enter:
+	sie	0(%r14)
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_skip
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+.Lsie_skip:
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also .Lcleanup_sie
+.Lrewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
+	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	br	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
+
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
  * are executed with interrupts enabled.
@@ -208,9 +247,9 @@ ENTRY(system_call)
 .Lsysc_per:
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
-	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
 	LAST_BREAK %r13
+.Lsysc_vtime:
+	UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
@@ -240,8 +279,6 @@ ENTRY(system_call)
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
-	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	.Lsysc_restore
 	tm	__PT_FLAGS+7(%r11),_PIF_WORK
 	jnz	.Lsysc_work
 	tm	__TI_flags+7(%r12),_TIF_WORK
@@ -276,6 +313,8 @@ ENTRY(system_call)
 	jo	.Lsysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsysc_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
@@ -303,6 +342,13 @@ ENTRY(system_call)
 	j	.Lsysc_return
 
 #
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+	larl	%r14,.Lsysc_return
+	jg	load_fpu_regs
+
+#
 # _TIF_SIGPENDING is set, call do_signal
 #
 .Lsysc_sigpending:
@@ -401,27 +447,35 @@ ENTRY(pgm_check_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,1
 	tmhh	%r8,0x0001		# test problem state bit
-	jnz	1f			# -> fault in user space
-	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	0f			# -> enabled, can't be a double fault
+	jnz	2f			# -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+	# cleanup critical section for sie64a
+	lgr	%r14,%r9
+	slg	%r14,BASED(.Lsie_critical_start)
+	clg	%r14,BASED(.Lsie_critical_length)
+	jhe	0f
+	brasl	%r14,.Lcleanup_sie
+#endif
+0:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	1f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
-	LAST_BREAK %r14
+	j	3f
+2:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r14,__TI_task(%r12)
+	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	lghi	%r13,__LC_PGM_TDB
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	2f
+	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -430,24 +484,28 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	0f
+	jz	4f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0:	REENABLE_IRQS
+4:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
 	sll	%r10,2
-	je	.Lsysc_return
+	je	.Lpgm_return
 	lgf	%r1,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
-	j	.Lsysc_return
+.Lpgm_return:
+	LOCKDEP_SYS_EXIT
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jno	.Lsysc_restore
+	j	.Lsysc_tif
 
 #
 # PER event in supervisor state, must be kprobes
@@ -457,7 +515,7 @@ ENTRY(pgm_check_handler)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_per_trap
-	j	.Lsysc_return
+	j	.Lpgm_return
 
 #
 # single stepped system call
@@ -478,15 +536,9 @@ ENTRY(io_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,2
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user?
-	jz	.Lio_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lio_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -582,6 +634,8 @@ ENTRY(io_int_handler)
 	jo	.Lio_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lio_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
@@ -604,6 +658,13 @@ ENTRY(io_int_handler)
 	j	.Lio_return
 
 #
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+	larl	%r14,.Lio_return
+	jg	load_fpu_regs
+
+#
 # _TIF_NEED_RESCHED is set, call schedule
 #
 .Lio_reschedule:
@@ -647,15 +708,9 @@ ENTRY(ext_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,3
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	.Lext_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lext_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -685,6 +740,122 @@ ENTRY(psw_idle)
 	br	%r14
 .Lpsw_idle_end:
 
+/* Store floating-point controls and floating-point or vector extension
+ * registers instead.  A critical section cleanup assures that the registers
+ * are stored even if interrupted for some other work.	The register %r2
+ * designates a struct fpu to store register contents.	If the specified
+ * structure does not contain a register save area, the register store is
+ * omitted (see also comments in arch_dup_task_struct()).
+ *
+ * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
+ * of the register contents at system call or io return.
+ */
+ENTRY(save_fpu_regs)
+	lg	%r2,__LC_CURRENT
+	aghi	%r2,__TASK_thread
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	stfpc	__THREAD_FPU_fpc(%r2)
+.Lsave_fpu_regs_fpc_end:
+	lg	%r3,__THREAD_FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
+.Lsave_fpu_regs_vx_low:
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+.Lsave_fpu_regs_vx_high:
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	.Lsave_fpu_regs_done	  # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+.Lsave_fpu_regs_done:
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	br	%r14
+.Lsave_fpu_regs_end:
+
+/* Load floating-point controls and floating-point or vector extension
+ * registers.  A critical section cleanup assures that the register contents
+ * are loaded even if interrupted for some other work.	Depending on the saved
+ * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ *	%r15:	<kernel stack>
+ * The function requires:
+ *	%r4 and __SF_EMPTY+32(%r15)
+ */
+load_fpu_regs:
+	lg	%r4,__LC_CURRENT
+	aghi	%r4,__TASK_thread
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	lfpc	__THREAD_FPU_fpc(%r4)
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
+	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
+.Lload_fpu_regs_vx_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	.Lload_fpu_regs_vx
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_vx:
+	VLM	%v0,%v15,0,%r4
+.Lload_fpu_regs_vx_high:
+	VLM	%v16,%v31,256,%r4
+	j	.Lload_fpu_regs_done
+.Lload_fpu_regs_fp_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	.Lload_fpu_regs_fp
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_fp:
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+.Lload_fpu_regs_done:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	br	%r14
+.Lload_fpu_regs_end:
+
+/* Test and set the vector enablement control in CR0.46 */
+ENTRY(__ctl_set_vx)
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	br	%r14
+.L__ctl_set_vx_end:
+
 .L__critical_end:
 
 /*
@@ -697,9 +868,8 @@ ENTRY(mcck_int_handler)
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
@@ -720,11 +890,7 @@ ENTRY(mcck_int_handler)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	.Lmcck_panic		# no -> skip cleanup critical
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
-	tm	%r8,0x0001		# interrupting from user ?
-	jz	.Lmcck_skip
-	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
-	LAST_BREAK %r14
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -759,12 +925,8 @@ ENTRY(mcck_int_handler)
 	lpswe	__LC_RETURN_MCCK_PSW
 
 .Lmcck_panic:
-	lg	%r14,__LC_PANIC_STACK
-	slgr	%r14,%r15
-	srag	%r14,%r14,PAGE_SHIFT
-	jz	0f
 	lg	%r15,__LC_PANIC_STACK
-0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	.Lmcck_skip
 
 #
@@ -814,20 +976,13 @@ stack_overflow:
 	jg	kernel_stack_overflow
 #endif
 
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-
 cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+	jl	.Lcleanup_sie
+#endif
 	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
@@ -848,8 +1003,54 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
 	jl	.Lcleanup_idle
+	clg	%r9,BASED(.Lcleanup_table+80)	# save_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+88)	# .Lsave_fpu_regs_end
+	jl	.Lcleanup_save_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+96)	# load_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
+	jl	.Lcleanup_load_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
+	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
+	.align	8
+.Lcleanup_table:
+	.quad	system_call
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
+	.quad	psw_idle
+	.quad	.Lpsw_idle_end
+	.quad	save_fpu_regs
+	.quad	.Lsave_fpu_regs_end
+	.quad	load_fpu_regs
+	.quad	.Lload_fpu_regs_end
+	.quad	__ctl_set_vx
+	.quad	.L__ctl_set_vx_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+	.quad	.Lsie_gmap
+	.quad	.Lsie_done
+
+.Lcleanup_sie:
+	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	0f
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+0:	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	br	%r14
+#endif
 
 .Lcleanup_system_call:
 	# check if stpt has been executed
@@ -910,7 +1111,7 @@ cleanup_critical:
 	.quad	system_call
 	.quad	.Lsysc_stmg
 	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+36
 	.quad	.Lsysc_vtime+42
 
 .Lcleanup_sysc_tif:
@@ -976,6 +1177,145 @@ cleanup_critical:
 .Lcleanup_idle_insn:
 	.quad	.Lpsw_idle_lpsw
 
+.Lcleanup_save_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_fp)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
+	jhe	1f
+	lg	%r2,__LC_CURRENT
+0:	# Store floating-point controls
+	stfpc	__THREAD_FPU_fpc(%r2)
+1:	# Load register save area and check if VX is active
+	lg	%r3,__THREAD_FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	5f			  # no save area -> set CIF_FPU
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	jz	4f			  # no VX -> store FP regs
+2:	# Store vector registers (V0-V15)
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+3:	# Store vector registers (V16-V31)
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	5f			  # -> done, set CIF_FPU flag
+4:	# Store floating-point registers
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+5:	# Set CIF_FPU flag
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	lg	%r9,48(%r11)		# return from save_fpu_regs
+	br	%r14
+.Lcleanup_save_fpu_fpc_end:
+	.quad	.Lsave_fpu_regs_fpc_end
+.Lcleanup_save_fpu_regs_vx_low:
+	.quad	.Lsave_fpu_regs_vx_low
+.Lcleanup_save_fpu_regs_vx_high:
+	.quad	.Lsave_fpu_regs_vx_high
+.Lcleanup_save_fpu_regs_fp:
+	.quad	.Lsave_fpu_regs_fp
+.Lcleanup_save_fpu_regs_done:
+	.quad	.Lsave_fpu_regs_done
+
+.Lcleanup_load_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
+	jhe	1f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
+	jhe	6f
+	lg	%r4,__LC_CURRENT
+	lfpc	__THREAD_FPU_fpc(%r4)
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
+	jz	3f				# -> no VX, load FP regs
+6:	# Set VX-enablement control
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	5f
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+5:	# Load V0 ..V15 registers
+	VLM	%v0,%v15,0,%r4
+4:	# Load V16..V31 registers
+	VLM	%v16,%v31,256,%r4
+	j	1f
+3:	# Clear VX-enablement control for FP
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	2f
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+2:	# Load floating-point registers
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+1:	# Clear CIF_FPU bit
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	lg	%r9,48(%r11)		# return from load_fpu_regs
+	br	%r14
+.Lcleanup_load_fpu_regs_vx_ctl:
+	.quad	.Lload_fpu_regs_vx_ctl
+.Lcleanup_load_fpu_regs_vx:
+	.quad	.Lload_fpu_regs_vx
+.Lcleanup_load_fpu_regs_vx_high:
+	.quad	.Lload_fpu_regs_vx_high
+.Lcleanup_load_fpu_regs_fp_ctl:
+	.quad	.Lload_fpu_regs_fp_ctl
+.Lcleanup_load_fpu_regs_fp:
+	.quad	.Lload_fpu_regs_fp
+.Lcleanup_load_fpu_regs_done:
+	.quad	.Lload_fpu_regs_done
+
+.Lcleanup___ctl_set_vx:
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	lg	%r9,48(%r11)		# return from __ctl_set_vx
+	br	%r14
+
 /*
  * Integer constants
  */
@@ -984,62 +1324,11 @@ cleanup_critical:
 	.quad	.L__critical_start
 .Lcritical_length:
 	.quad	.L__critical_end - .L__critical_start
-
-
 #if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
-	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
-	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
-	tm	__SIE_PROG20+3(%r14),3		# last exit...
-	jnz	.Lsie_done
-	LPP	__SF_EMPTY(%r15)		# set guest id
-	sie	0(%r14)
-.Lsie_done:
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
-	nop	0
-	.globl sie_exit
-sie_exit:
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
-	br	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
-	j	sie_exit
-
-	.align	8
-.Lsie_critical:
+.Lsie_critical_start:
 	.quad	.Lsie_gmap
 .Lsie_critical_length:
 	.quad	.Lsie_done - .Lsie_gmap
-
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
 #endif
 
 	.section .rodata, "a"
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 59b7c64..1255c6c 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -370,6 +370,7 @@ ENTRY(startup_kdump)
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x300(256),0x300
 	xc	0xe00(256),0xe00
+	lctlg	%c0,%c15,0x200(%r0)	# initialize control registers
 	stck	__LC_LAST_UPDATE_CLOCK
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
@@ -413,9 +414,9 @@ ENTRY(startup_kdump)
 # followed by the facility words.
 
 #if defined(CONFIG_MARCH_Z13)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_Z196)
 	.long 2, 0xc100eff2, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index a902996..c9dac21 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,12 +44,9 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
 	unsigned char *ipn = (unsigned char *)new;
 
 	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
-	pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
-		 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
-	pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
-		 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
-	pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
-		 ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+	pr_emerg("Found:    %6ph\n", ipc);
+	pr_emerg("Expected: %6ph\n", ipe);
+	pr_emerg("New:      %6ph\n", ipn);
 	panic("Corrupted kernel text");
 }
 
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 505c17c..0ae6f8e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,6 +21,8 @@
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
+#include <asm/ctl_reg.h>
 
 struct mcck_struct {
 	int kill_task;
@@ -129,26 +131,30 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 	} else
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
-	asm volatile(
-		"	ld	0,0(%0)\n"
-		"	ld	1,8(%0)\n"
-		"	ld	2,16(%0)\n"
-		"	ld	3,24(%0)\n"
-		"	ld	4,32(%0)\n"
-		"	ld	5,40(%0)\n"
-		"	ld	6,48(%0)\n"
-		"	ld	7,56(%0)\n"
-		"	ld	8,64(%0)\n"
-		"	ld	9,72(%0)\n"
-		"	ld	10,80(%0)\n"
-		"	ld	11,88(%0)\n"
-		"	ld	12,96(%0)\n"
-		"	ld	13,104(%0)\n"
-		"	ld	14,112(%0)\n"
-		"	ld	15,120(%0)\n"
-		: : "a" (fpt_save_area));
-	/* Revalidate vector registers */
-	if (MACHINE_HAS_VX && current->thread.vxrs) {
+	if (!MACHINE_HAS_VX) {
+		/* Revalidate floating point registers */
+		asm volatile(
+			"	ld	0,0(%0)\n"
+			"	ld	1,8(%0)\n"
+			"	ld	2,16(%0)\n"
+			"	ld	3,24(%0)\n"
+			"	ld	4,32(%0)\n"
+			"	ld	5,40(%0)\n"
+			"	ld	6,48(%0)\n"
+			"	ld	7,56(%0)\n"
+			"	ld	8,64(%0)\n"
+			"	ld	9,72(%0)\n"
+			"	ld	10,80(%0)\n"
+			"	ld	11,88(%0)\n"
+			"	ld	12,96(%0)\n"
+			"	ld	13,104(%0)\n"
+			"	ld	14,112(%0)\n"
+			"	ld	15,120(%0)\n"
+			: : "a" (fpt_save_area));
+	} else {
+		/* Revalidate vector registers */
+		union ctlreg0 cr0;
+
 		if (!mci->vr) {
 			/*
 			 * Vector registers can't be restored and therefore
@@ -156,8 +162,16 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			 */
 			kill_task = 1;
 		}
-		restore_vx_regs((__vector128 *)
-				S390_lowcore.vector_save_area_addr);
+		cr0.val = S390_lowcore.cregs_save_area[0];
+		cr0.afp = cr0.vx = 1;
+		__ctl_load(cr0.val, 0, 0);
+		asm volatile(
+			"	la	1,%0\n"
+			"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+			"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+			: : "Q" (*(struct vx_array *)
+				 &S390_lowcore.vector_save_area) : "1");
+		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
 	/* Revalidate access registers */
 	asm volatile(
@@ -349,4 +363,4 @@ static int __init machine_check_init(void)
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 	return 0;
 }
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index afe05bf..b973972 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,12 +1019,9 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 		break;
 	}
 
-	/* The host-program-parameter (hpp) contains the sie control
-	 * block that is set by sie64a() in entry64.S.	Check if hpp
-	 * refers to a valid control block and set sde_regs flags
-	 * accordingly.  This would allow to use hpp values for other
-	 * purposes too.
-	 * For now, simply use a non-zero value as guest indicator.
+	/* The host-program-parameter (hpp) contains the pid of
+	 * the CPU thread as set by sie64a() in entry.S.
+	 * If non-zero assume a guest sample.
 	 */
 	if (sfr->basic.hpp)
 		sde_regs->in_guest = 1;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index dc5edc2..f2dac9f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -81,8 +81,38 @@ void release_thread(struct task_struct *dead_task)
 
 void arch_release_task_struct(struct task_struct *tsk)
 {
-	if (tsk->thread.vxrs)
-		kfree(tsk->thread.vxrs);
+	/* Free either the floating-point or the vector register save area */
+	kfree(tsk->thread.fpu.regs);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	*dst = *src;
+
+	/* Set up a new floating-point register save area */
+	dst->thread.fpu.fpc = 0;
+	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
+	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+				       GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.fprs)
+		return -ENOMEM;
+
+	/*
+	 * Save the floating-point or vector register state of the current
+	 * task.  The state is not saved for early kernel threads, for example,
+	 * the init_task, which do not have an allocated save area.
+	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
+	 * state when switching to a different task or returning to user space.
+	 */
+	save_fpu_regs();
+	dst->thread.fpu.fpc = current->thread.fpu.fpc;
+	if (is_vx_task(current))
+		convert_vx_to_fp(dst->thread.fpu.fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
+		       sizeof(freg_t) * __NUM_FPRS);
+	return 0;
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -142,11 +172,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
-	/* Save the fpu registers to new thread structure. */
-	save_fp_ctl(&p->thread.fp_regs.fpc);
-	save_fp_regs(p->thread.fp_regs.fprs);
-	p->thread.fp_regs.pad = 0;
-	p->thread.vxrs = NULL;
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
 		unsigned long tls = frame->childregs.gprs[6];
@@ -162,8 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 
 asmlinkage void execve_tail(void)
 {
-	current->thread.fp_regs.fpc = 0;
-	asm volatile("sfpc %0,%0" : : "d" (0));
+	current->thread.fpu.fpc = 0;
+	asm volatile("sfpc %0" : : "d" (0));
 }
 
 /*
@@ -171,8 +196,15 @@ asmlinkage void execve_tail(void)
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
-	save_fp_ctl(&fpregs->fpc);
-	save_fp_regs(fpregs->fprs);
+	save_fpu_regs();
+	fpregs->fpc = current->thread.fpu.fpc;
+	fpregs->pad = 0;
+	if (is_vx_task(current))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+		       sizeof(fpregs->fprs));
 	return 1;
 }
 EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index dc488e1..e6e077a 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -41,6 +41,15 @@ void cpu_init(void)
 }
 
 /*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
+/*
  * show_cpuinfo - Get information on one CPU for use by procfs.
  */
 static int show_cpuinfo(struct seq_file *m, void *v)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index d363c9c..8b1c8e3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task)
 	struct per_regs old, new;
 
 	/* Take care of the enable/disable of transactional execution. */
-	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+	if (MACHINE_HAS_TE) {
 		unsigned long cr, cr_new;
 
 		__ctl_store(cr, 0, 0);
-		cr_new = cr;
-		if (MACHINE_HAS_TE) {
-			/* Set or clear transaction execution TXC bit 8. */
-			cr_new |= (1UL << 55);
-			if (task->thread.per_flags & PER_FLAG_NO_TE)
-				cr_new &= ~(1UL << 55);
-		}
-		if (MACHINE_HAS_VX) {
-			/* Enable/disable of vector extension */
-			cr_new &= ~(1UL << 17);
-			if (task->thread.vxrs)
-				cr_new |= (1UL << 17);
-		}
+		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
+		if (task->thread.per_flags & PER_FLAG_NO_TE)
+			cr_new &= ~(1UL << 55);
 		if (cr_new != cr)
 			__ctl_load(cr_new, 0, 0);
-		if (MACHINE_HAS_TE) {
-			/* Set/clear transaction execution TDC bits 62/63. */
-			__ctl_store(cr, 2, 2);
-			cr_new = cr & ~3UL;
-			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
-				if (task->thread.per_flags &
-				    PER_FLAG_TE_ABORT_RAND_TEND)
-					cr_new |= 1UL;
-				else
-					cr_new |= 2UL;
-			}
-			if (cr_new != cr)
-				__ctl_load(cr_new, 2, 2);
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr_new |= 1UL;
+			else
+				cr_new |= 2UL;
 		}
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 	/* Copy user specified PER registers */
 	new.control = thread->per_user.control;
@@ -242,21 +230,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 		tmp <<= BITS_PER_LONG - 32;
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(addr_t *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -387,20 +375,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		if ((unsigned int) data != 0 ||
 		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(addr_t *)((addr_t)
-				child->thread.vxrs + 2*offset) = data;
+				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 			*(addr_t *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = data;
+				&child->thread.fpu.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -621,20 +609,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(__u32 *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -746,20 +734,20 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		if (test_fp_ctl(tmp))
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data;
+		child->thread.fpu.fpc = data;
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(__u32 *)((addr_t)
-				child->thread.vxrs + 2*offset) = tmp;
+				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 			*(__u32 *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = tmp;
+				&child->thread.fpu.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -952,18 +940,16 @@ static int s390_fpregs_get(struct task_struct *target,
 			   const struct user_regset *regset, unsigned int pos,
 			   unsigned int count, void *kbuf, void __user *ubuf)
 {
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	} else if (target->thread.vxrs) {
-		int i;
+	_s390_fp_regs fp_regs;
+
+	if (target == current)
+		save_fpu_regs();
+
+	fp_regs.fpc = target->thread.fpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.fpu);
 
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			target->thread.fp_regs.fprs[i] =
-				*(freg_t *)(target->thread.vxrs + i);
-	}
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fp_regs, 0, -1);
+				   &fp_regs, 0, -1);
 }
 
 static int s390_fpregs_set(struct task_struct *target,
@@ -972,41 +958,33 @@ static int s390_fpregs_set(struct task_struct *target,
 			   const void __user *ubuf)
 {
 	int rc = 0;
+	freg_t fprs[__NUM_FPRS];
 
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	}
+	if (target == current)
+		save_fpu_regs();
 
 	/* If setting FPC, must validate it first. */
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
-		u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
 					0, offsetof(s390_fp_regs, fprs));
 		if (rc)
 			return rc;
 		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
 			return -EINVAL;
-		target->thread.fp_regs.fpc = ufpc[0];
+		target->thread.fpu.fpc = ufpc[0];
 	}
 
 	if (rc == 0 && count > 0)
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					target->thread.fp_regs.fprs,
-					offsetof(s390_fp_regs, fprs), -1);
-
-	if (rc == 0) {
-		if (target == current) {
-			restore_fp_ctl(&target->thread.fp_regs.fpc);
-			restore_fp_regs(target->thread.fp_regs.fprs);
-		} else if (target->thread.vxrs) {
-			int i;
-
-			for (i = 0; i < __NUM_VXRS_LOW; i++)
-				*(freg_t *)(target->thread.vxrs + i) =
-					target->thread.fp_regs.fprs[i];
-		}
-	}
+					fprs, offsetof(s390_fp_regs, fprs), -1);
+	if (rc)
+		return rc;
+
+	if (is_vx_task(target))
+		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+	else
+		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
 
 	return rc;
 }
@@ -1069,11 +1047,11 @@ static int s390_vxrs_low_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
+			save_fpu_regs();
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1089,20 +1067,17 @@ static int s390_vxrs_low_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
-	if (rc == 0) {
+	if (rc == 0)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
-		if (target == current)
-			restore_vx_regs(target->thread.vxrs);
-	}
+			*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
 
 	return rc;
 }
@@ -1116,10 +1091,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
-		memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+			save_fpu_regs();
+		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
 		       sizeof(vxrs));
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
@@ -1135,18 +1110,15 @@ static int s390_vxrs_high_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
-	if (rc == 0 && target == current)
-		restore_vx_regs(target->thread.vxrs);
-
+				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
 	return rc;
 }
 
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9f60467..5090d3d 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
+#include <asm/fpu-internal.h>
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount);
 #if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
+EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
deleted file mode 100644
index 43c3169..0000000
--- a/arch/s390/kernel/sclp.S
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- *   Author(s):	Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- *		Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/irq.h>
-
-LC_EXT_NEW_PSW		= 0x58			# addr of ext int handler
-LC_EXT_NEW_PSW_64	= 0x1b0			# addr of ext int handler 64 bit
-LC_EXT_INT_PARAM	= 0x80			# addr of ext int parameter
-LC_EXT_INT_CODE		= 0x86			# addr of ext int code
-LC_AR_MODE_ID		= 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-#   R2	= 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-#   R2	= 0 on interrupt, 2 on timeout
-#   R3	= external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
-	stm	%r6,%r15,24(%r15)		# save registers
-	basr	%r13,0				# get base register
-.LbaseS1:
-	ahi	%r15,-96			# create stack frame
-	la	%r8,LC_EXT_NEW_PSW		# register int handler
-	la	%r9,.LextpswS1-.LbaseS1(%r13)
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa1
-	la	%r8,LC_EXT_NEW_PSW_64		# register int handler 64 bit
-	la	%r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
-	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
-	mvc	0(16,%r8),0(%r9)
-	epsw	%r6,%r7				# set current addressing mode
-	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
-	nilh	%r7,0x8000
-	stm	%r6,%r7,0(%r8)
-	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
-	ltr	%r2,%r2
-	jz	.LsetctS1
-	ahi	%r6,0x0800			# cr mask for clock int (cr0.52)
-	stck	.LtimeS1-.LbaseS1(%r13)		# initiate timeout
-	al	%r2,.LtimeS1-.LbaseS1(%r13)
-	st	%r2,.LtimeS1-.LbaseS1(%r13)
-	sckc	.LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
-	stctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# enable required interrupts
-	l	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r1,~(0x200 | 0x800)		# clear old values
-	nr	%r1,%r0
-	or	%r1,%r6				# set new value
-	st	%r1,.LctlS1-.LbaseS1(%r13)
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)
-	st	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r2,2				# return code for timeout
-.LloopS1:
-	lpsw	.LwaitpswS1-.LbaseS1(%r13)	# wait until interrupt
-.LwaitS1:
-	lh	%r7,LC_EXT_INT_CODE
-	chi	%r7,EXT_IRQ_CLK_COMP		# timeout?
-	je	.LtimeoutS1
-	chi	%r7,EXT_IRQ_SERVICE_SIG		# service int?
-	jne	.LloopS1
-	sr	%r2,%r2
-	l	%r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# restore interrupt setting
-	# restore old handler
-	mvc	0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14				# return to caller
-
-	.align	8
-.LoldpswS1:
-	.long	0, 0, 0, 0			# old ext int PSW
-.LextpswS1:
-	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
-.LextpswS1_64:
-	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
-.LwaitpswS1:
-	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
-.LtimeS1:
-	.quad	0				# current time
-.LctlS1:
-	.long	0				# CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-#   R2	= command word
-#   R3	= sccb address
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#   R3	= sccb response code if R2 = 0
-#
-
-_sclp_servc:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	lr	%r6,%r2				# save command word
-	lr	%r7,%r3				# save sccb address
-.LretryS2:
-	lhi	%r2,1				# error return code
-	.insn	rre,0xb2200000,%r6,%r7		# servc
-	brc	1,.LendS2			# exit if not operational
-	brc	8,.LnotbusyS2			# go on if not busy
-	sr	%r2,%r2				# wait until no longer busy
-	bras	%r14,_sclp_wait_int
-	j	.LretryS2			# retry
-.LnotbusyS2:
-	sr	%r2,%r2				# wait until result
-	bras	%r14,_sclp_wait_int
-	sr	%r2,%r2
-	lh	%r3,6(%r7)
-.LendS2:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-#   R2	= 0 to activate, non-zero to deactivate
-#
-# Returns:
-#   R2	= 0 on success, non-zero on failure
-#
-
-_sclp_setup:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS3:
-	l	%r6,.LsccbS0-.LbaseS3(%r13)	# prepare init mask sccb
-	mvc	0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
-	ltr	%r2,%r2				# initialization?
-	jz	.LdoinitS3			# go ahead
-	# clear masks
-	xc	.LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
-	l	%r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
-	lr	%r3,%r6				# get sccb address
-	bras	%r14,_sclp_servc		# issue service call
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LerrorS3
-	chi	%r3,0x20			# write mask successful?
-	jne	.LerrorS3
-	# check masks
-	la	%r2,.LinitmaskS3-.LinitsccbS3(%r6)
-	l	%r1,0(%r2)			# receive mask ok?
-	n	%r1,12(%r2)
-	cl	%r1,0(%r2)
-	jne	.LerrorS3
-	l	%r1,4(%r2)			# send mask ok?
-	n	%r1,8(%r2)
-	cl	%r1,4(%r2)
-	sr	%r2,%r2
-	je	.LendS3
-.LerrorS3:
-	lhi	%r2,1				# error return code
-.LendS3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.LwritemaskS3:
-	.long	0x00780005			# SCLP command for write mask
-.LinitsccbS3:
-	.word	.LinitendS3-.LinitsccbS3
-	.byte	0,0,0,0
-	.word	0
-	.word	0
-	.word	4
-.LinitmaskS3:
-	.long	0x80000000
-	.long	0x40000000
-	.long	0
-	.long	0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-_sclp_print:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS4:
-	l	%r8,.LsccbS0-.LbaseS4(%r13)	# prepare write data sccb
-	mvc	0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
-	la	%r7,.LmtoS4-.LwritesccbS4(%r8)	# current mto addr
-	sr	%r0,%r0
-	l	%r10,.Lascebc-.LbaseS4(%r13)	# address of translation table
-.LinitmtoS4:
-	# initialize mto
-	mvc	0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
-	lhi	%r6,.LmtoendS4-.LmtoS4		# current mto length
-.LloopS4:
-	ic	%r0,0(%r2)			# get character
-	ahi	%r2,1
-	ltr	%r0,%r0				# end of string?
-	jz	.LfinalizemtoS4
-	chi	%r0,0x0a			# end of line (NL)?
-	jz	.LfinalizemtoS4
-	stc	%r0,0(%r6,%r7)			# copy to mto
-	la	%r11,0(%r6,%r7)
-	tr	0(1,%r11),0(%r10)		# translate to EBCDIC
-	ahi	%r6,1
-	j	.LloopS4
-.LfinalizemtoS4:
-	sth	%r6,0(%r7)			# update mto length
-	lh	%r9,.LmdbS4-.LwritesccbS4(%r8)	# update mdb length
-	ar	%r9,%r6
-	sth	%r9,.LmdbS4-.LwritesccbS4(%r8)
-	lh	%r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
-	ar	%r9,%r6
-	sth	%r9,.LevbufS4-.LwritesccbS4(%r8)
-	lh	%r9,0(%r8)			# update sccb length
-	ar	%r9,%r6
-	sth	%r9,0(%r8)
-	ar	%r7,%r6				# update current mto address
-	ltr	%r0,%r0				# more characters?
-	jnz	.LinitmtoS4
-	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
-	lr	%r3,%r8
-	bras	%r14,_sclp_servc
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LendS4
-	chi	%r3,0x20			# write data successful?
-	je	.LendS4
-	lhi	%r2,1				# error return code
-.LendS4:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa2
-	ahi	%r15,-80
-	stmh	%r6,%r15,96(%r15)		# store upper register halves
-.Lesa2:
-	lr	%r10,%r2			# save string pointer
-	lhi	%r2,0
-	bras	%r14,_sclp_setup		# enable console
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lr	%r2,%r10
-	bras	%r14,_sclp_print		# print string
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lhi	%r2,1
-	bras	%r14,_sclp_setup		# disable console
-.LendS5:
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa3
-	lgfr	%r2,%r2				# sign extend return value
-	lmh	%r6,%r15,96(%r15)		# restore upper register halves
-	ahi	%r15,80
-.Lesa3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-.LwritedataS4:
-	.long	0x00760005			# SCLP command for write data
-.LwritesccbS4:
-	# sccb
-	.word	.LmtoS4-.LwritesccbS4
-	.byte	0
-	.byte	0,0,0
-	.word	0
-
-	# evbuf
-.LevbufS4:
-	.word	.LmtoS4-.LevbufS4
-	.byte	0x02
-	.byte	0
-	.word	0
-
-.LmdbS4:
-	# mdb
-	.word	.LmtoS4-.LmdbS4
-	.word	1
-	.long	0xd4c4c240
-	.long	1
-
-	# go
-.LgoS4:
-	.word	.LmtoS4-.LgoS4
-	.word	1
-	.long	0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0
-	.byte	0
-	.byte	0,0,0,0,0,0,0
-	.byte	0
-	.word	0
-	.byte	0,0,0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-
-.LmtoS4:
-	.word	.LmtoendS4-.LmtoS4
-	.word	4
-	.word	0x1000
-	.byte	0
-	.byte	0,0,0
-.LmtoendS4:
-
-	# Global constants
-.LsccbS0:
-	.long	_sclp_work_area
-.Lascebc:
-	.long	_ascebc
-
-.section .data,"aw",@progbits
-	.balign 4096
-_sclp_work_area:
-	.fill	4096
-.previous
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
new file mode 100644
index 0000000..fa0bdff
--- /dev/null
+++ b/arch/s390/kernel/sclp.c
@@ -0,0 +1,160 @@
+/*
+ *    Copyright IBM Corp. 2015
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/irq.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+
+static void _sclp_wait_int(void)
+{
+	unsigned long cr0, cr0_new, psw_mask, addr;
+	psw_t psw_ext_save, psw_wait;
+
+	__ctl_store(cr0, 0, 0);
+	cr0_new = cr0 | 0x200;
+	__ctl_load(cr0_new, 0, 0);
+
+	psw_ext_save = S390_lowcore.external_new_psw;
+	psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+	S390_lowcore.external_new_psw.mask = psw_mask;
+	psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
+	S390_lowcore.ext_int_code = 0;
+
+	do {
+		asm volatile(
+			"	larl	%[addr],0f\n"
+			"	stg	%[addr],%[psw_wait_addr]\n"
+			"	stg	%[addr],%[psw_ext_addr]\n"
+			"	lpswe	%[psw_wait]\n"
+			"0:\n"
+			: [addr] "=&d" (addr),
+			  [psw_wait_addr] "=Q" (psw_wait.addr),
+			  [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr)
+			: [psw_wait] "Q" (psw_wait)
+			: "cc", "memory");
+	} while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG);
+
+	__ctl_load(cr0, 0, 0);
+	S390_lowcore.external_new_psw = psw_ext_save;
+}
+
+static int _sclp_servc(unsigned int cmd, char *sccb)
+{
+	unsigned int cc;
+
+	do {
+		asm volatile(
+			"	.insn	rre,0xb2200000,%1,%2\n"
+			"	ipm	%0\n"
+			: "=d" (cc) : "d" (cmd), "a" (sccb)
+			: "cc", "memory");
+		cc >>= 28;
+		if (cc == 3)
+			return -EINVAL;
+		_sclp_wait_int();
+	} while (cc != 0);
+	return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO;
+}
+
+static int _sclp_setup(int disable)
+{
+	static unsigned char init_sccb[] = {
+		0x00, 0x1c,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x04,
+		0x80, 0x00, 0x00, 0x00,	0x40, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00
+	};
+	unsigned int *masks;
+	int rc;
+
+	memcpy(_sclp_work_area, init_sccb, 28);
+	masks = (unsigned int *)(_sclp_work_area + 12);
+	if (disable)
+		memset(masks, 0, 16);
+	/* SCLP write mask */
+	rc = _sclp_servc(0x00780005, _sclp_work_area);
+	if (rc)
+		return rc;
+	if ((masks[0] & masks[3]) != masks[0] ||
+	    (masks[1] & masks[2]) != masks[1])
+		return -EIO;
+	return 0;
+}
+
+static int _sclp_print(const char *str)
+{
+	static unsigned char write_head[] = {
+		/* sccb header */
+		0x00, 0x52,					/* 0 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00,		/* 2 */
+		/* evbuf */
+		0x00, 0x4a,					/* 8 */
+		0x02, 0x00, 0x00, 0x00,				/* 10 */
+		/* mdb */
+		0x00, 0x44,					/* 14 */
+		0x00, 0x01,					/* 16 */
+		0xd4, 0xc4, 0xc2, 0x40,				/* 18 */
+		0x00, 0x00, 0x00, 0x01,				/* 22 */
+		/* go */
+		0x00, 0x38,					/* 26 */
+		0x00, 0x01,					/* 28 */
+		0x00, 0x00, 0x00, 0x00,				/* 30 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 34 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 42 */
+		0x00, 0x00, 0x00, 0x00,				/* 50 */
+		0x00, 0x00,					/* 54 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 56 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 64 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 72 */
+		0x00, 0x00,					/* 80 */
+	};
+	static unsigned char write_mto[] = {
+		/* mto	*/
+		0x00, 0x0a,					/* 0 */
+		0x00, 0x04,					/* 2 */
+		0x10, 0x00,					/* 4 */
+		0x00, 0x00, 0x00, 0x00				/* 6 */
+	};
+	unsigned char *ptr, ch;
+	unsigned int count;
+
+	memcpy(_sclp_work_area, write_head, sizeof(write_head));
+	ptr = _sclp_work_area + sizeof(write_head);
+	do {
+		memcpy(ptr, write_mto, sizeof(write_mto));
+		for (count = sizeof(write_mto); (ch = *str++) != 0; count++) {
+			if (ch == 0x0a)
+				break;
+			ptr[count] = _ascebc[ch];
+		}
+		/* Update length fields in mto, mdb, evbuf and sccb */
+		*(unsigned short *) ptr = count;
+		*(unsigned short *)(_sclp_work_area + 14) += count;
+		*(unsigned short *)(_sclp_work_area + 8) += count;
+		*(unsigned short *)(_sclp_work_area + 0) += count;
+		ptr += count;
+	} while (ch != 0);
+
+	/* SCLP write data */
+	return _sclp_servc(0x00760005, _sclp_work_area);
+}
+
+int _sclp_print_early(const char *str)
+{
+	int rc;
+
+	rc = _sclp_setup(0);
+	if (rc)
+		return rc;
+	rc = _sclp_print(str);
+	if (rc)
+		return rc;
+	return _sclp_setup(1);
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f7f027c..ce0cbd6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -62,6 +62,7 @@
 #include <asm/os_info.h>
 #include <asm/sclp.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 #include "entry.h"
 
 /*
@@ -76,7 +77,7 @@ EXPORT_SYMBOL(console_devno);
 unsigned int console_irq = -1;
 EXPORT_SYMBOL(console_irq);
 
-unsigned long elf_hwcap = 0;
+unsigned long elf_hwcap __read_mostly = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
 int __initdata memory_end_set;
@@ -688,7 +689,7 @@ static void __init setup_memory(void)
 /*
  * Setup hardware capabilities.
  */
-static void __init setup_hwcaps(void)
+static int __init setup_hwcaps(void)
 {
 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 	struct cpuid cpu_id;
@@ -754,9 +755,11 @@ static void __init setup_hwcaps(void)
 		elf_hwcap |= HWCAP_S390_TE;
 
 	/*
-	 * Vector extension HWCAP_S390_VXRS is bit 11.
+	 * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
+	 * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
+	 * instead of facility bit 129.
 	 */
-	if (test_facility(129))
+	if (MACHINE_HAS_VX)
 		elf_hwcap |= HWCAP_S390_VXRS;
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
@@ -793,7 +796,9 @@ static void __init setup_hwcaps(void)
 		strcpy(elf_platform, "z13");
 		break;
 	}
+	return 0;
 }
+arch_initcall(setup_hwcaps);
 
 /*
  * Add system information as device randomness
@@ -879,13 +884,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_lowcore();
 	smp_fill_possible_mask();
         cpu_init();
-
-	/*
-	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
-	 */
-	setup_hwcaps();
-
-	HPAGE_SHIFT = MACHINE_HAS_HPAGE ? 20 : 0;
+	numa_setup();
 
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index c551f22..9549af1 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -105,32 +105,13 @@ struct rt_sigframe
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		int i;
-
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs();
 }
 
 /* Load registers after signal return */
 static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
-	if (current->thread.vxrs) {
-		int i;
-
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 
 /* Returns non-zero on fault. */
@@ -146,8 +127,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
 		return -EFAULT;
 	return 0;
@@ -166,8 +146,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
 		return -EINVAL;
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
@@ -185,8 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
 
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -200,13 +179,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -220,15 +199,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	return 0;
 }
@@ -243,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
 	set_current_blocked(&set);
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -266,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -400,7 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6f54c17..c6355e6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -532,8 +532,8 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
 
 #ifdef CONFIG_CRASH_DUMP
 
-static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
-				  int is_boot_cpu)
+static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
+					 u16 address, int is_boot_cpu)
 {
 	void *lc = (void *)(unsigned long) store_prefix();
 	unsigned long vx_sa;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 1acad02..f3f4a13 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -276,9 +276,9 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
 SYSCALL(sys_statfs64,compat_sys_statfs64)
 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
 SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL						/* 268 sys_mbind */
-NI_SYSCALL						/* 269 sys_get_mempolicy */
-NI_SYSCALL						/* 270 sys_set_mempolicy */
+SYSCALL(sys_mbind,compat_sys_mbind)
+SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
+SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
 SYSCALL(sys_mq_open,compat_sys_mq_open)
 SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
 SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
@@ -295,7 +295,7 @@ SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
 SYSCALL(sys_inotify_init,sys_inotify_init)
 SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
 SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL						/* 287 sys_migrate_pages */
+SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
 SYSCALL(sys_openat,compat_sys_openat)
 SYSCALL(sys_mkdirat,compat_sys_mkdirat)
 SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
@@ -318,7 +318,7 @@ SYSCALL(sys_splice,compat_sys_splice)
 SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
 SYSCALL(sys_tee,compat_sys_tee)
 SYSCALL(sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL						/* 310 sys_move_pages */
+SYSCALL(sys_move_pages,compat_sys_move_pages)
 SYSCALL(sys_getcpu,compat_sys_getcpu)
 SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
 SYSCALL(sys_utimes,compat_sys_utimes)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9e733d9..627887b 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -752,7 +755,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 static int etr_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long clock, old_clock, delay, delta;
+	unsigned long long clock, old_clock, clock_delta, delay, delta;
 	struct clock_sync_data *etr_sync;
 	struct etr_aib *sync_port, *aib;
 	int port;
@@ -789,6 +792,9 @@ static int etr_sync_clock(void *data)
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
 		delta = adjust_time(old_clock, clock, delay);
+		clock_delta = clock - old_clock;
+		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
+					   &clock_delta);
 		etr_sync->fixup_cc = delta;
 		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
@@ -1526,7 +1532,7 @@ void stp_island_check(void)
 static int stp_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long old_clock, delta;
+	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
 	int rc;
 
@@ -1551,7 +1557,11 @@ static int stp_sync_clock(void *data)
 		old_clock = get_tod_clock();
 		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
 		if (rc == 0) {
-			delta = adjust_time(old_clock, get_tod_clock(), 0);
+			new_clock = get_tod_clock();
+			delta = adjust_time(old_clock, new_clock, 0);
+			clock_delta = new_clock - old_clock;
+			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
+						   0, &clock_delta);
 			fixup_clock_comparator(delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 5728c5b..bf05e7f 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -18,7 +18,10 @@
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
@@ -37,8 +40,10 @@ static struct sysinfo_15_1_x *tl_info;
 static int topology_enabled = 1;
 static DECLARE_WORK(topology_work, topology_work_fn);
 
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * protected by "sched_domains_mutex".
+ */
 static struct mask_info socket_info;
 static struct mask_info book_info;
 
@@ -188,7 +193,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 	struct cpuid cpu_id;
 
-	spin_lock_irq(&topology_lock);
 	get_cpu_id(&cpu_id);
 	clear_masks();
 	switch (cpu_id.machine) {
@@ -199,7 +203,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 	default:
 		__tl_to_masks_generic(info);
 	}
-	spin_unlock_irq(&topology_lock);
 }
 
 static void topology_update_polarization_simple(void)
@@ -244,10 +247,8 @@ int topology_set_cpu_management(int fc)
 
 static void update_cpu_masks(void)
 {
-	unsigned long flags;
 	int cpu;
 
-	spin_lock_irqsave(&topology_lock, flags);
 	for_each_possible_cpu(cpu) {
 		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
 		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
@@ -259,7 +260,7 @@ static void update_cpu_masks(void)
 			per_cpu(cpu_topology, cpu).book_id = cpu;
 		}
 	}
-	spin_unlock_irqrestore(&topology_lock, flags);
+	numa_update_cpu_topology();
 }
 
 void store_topology(struct sysinfo_15_1_x *info)
@@ -274,21 +275,21 @@ int arch_update_cpu_topology(void)
 {
 	struct sysinfo_15_1_x *info = tl_info;
 	struct device *dev;
-	int cpu;
+	int cpu, rc = 0;
 
-	if (!MACHINE_HAS_TOPOLOGY) {
-		update_cpu_masks();
-		topology_update_polarization_simple();
-		return 0;
+	if (MACHINE_HAS_TOPOLOGY) {
+		rc = 1;
+		store_topology(info);
+		tl_to_masks(info);
 	}
-	store_topology(info);
-	tl_to_masks(info);
 	update_cpu_masks();
+	if (!MACHINE_HAS_TOPOLOGY)
+		topology_update_polarization_simple();
 	for_each_online_cpu(cpu) {
 		dev = get_cpu_device(cpu);
 		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
-	return 1;
+	return rc;
 }
 
 static void topology_work_fn(struct work_struct *work)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 4d96c9f..9861613 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include "entry.h"
 
 int show_unhandled_signals = 1;
@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
 	      "transaction constraint exception")
 
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
 {
 	int si_code = 0;
 	/* FPC[2] is Data Exception Code */
@@ -227,7 +227,7 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 int alloc_vector_registers(struct task_struct *tsk)
 {
 	__vector128 *vxrs;
-	int i;
+	freg_t *fprs;
 
 	/* Allocate vector register save area. */
 	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
@@ -236,15 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk)
 		return -ENOMEM;
 	preempt_disable();
 	if (tsk == current)
-		save_fp_regs(tsk->thread.fp_regs.fprs);
+		save_fpu_regs();
 	/* Copy the 16 floating point registers */
-	for (i = 0; i < 16; i++)
-		*(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
-	tsk->thread.vxrs = vxrs;
-	if (tsk == current) {
-		__ctl_set_bit(0, 17);
-		restore_vx_regs(vxrs);
-	}
+	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
+	fprs = tsk->thread.fpu.fprs;
+	tsk->thread.fpu.vxrs = vxrs;
+	tsk->thread.fpu.flags |= FPU_USE_VX;
+	kfree(fprs);
 	preempt_enable();
 	return 0;
 }
@@ -259,8 +257,8 @@ void vector_exception(struct pt_regs *regs)
 	}
 
 	/* get vector interrupt code from fpc */
-	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
-	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+	save_fpu_regs();
+	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
 		si_code = FPE_FLTINV;
@@ -297,22 +295,22 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+	save_fpu_regs();
 	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !current->thread.vxrs &&
-	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+	if (MACHINE_HAS_VX && !is_vx_task(current) &&
+	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
 		alloc_vector_registers(current);
 		/* Vector data exception is suppressing, rewind psw. */
 		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
 		clear_pt_regs_flag(regs, PIF_PER_TRAP);
 		return;
 	}
-	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
 		signal = SIGILL;
 	if (signal == SIGFPE)
-		do_fp_trap(regs, current->thread.fp_regs.fpc);
+		do_fp_trap(regs, current->thread.fpu.fpc);
 	else if (signal)
 		do_trap(regs, signal, ILL_ILLOPN, "data exception");
 }
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 8ad2b34..ee8a18e 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
 KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 2a8ddfd..c4b03f9 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e53d359..b9ce650 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -28,6 +28,7 @@ static atomic64_t virt_timer_elapsed;
 static DEFINE_PER_CPU(u64, mt_cycles[32]);
 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
 
 static inline u64 get_vtimer(void)
 {
@@ -85,7 +86,8 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
 	/* Do MT utilization calculation */
-	if (smp_cpu_mtid) {
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) {
 		u64 cycles_new[32], *cycles_old;
 		u64 delta, mult, div;
 
@@ -105,6 +107,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 				       sizeof(u64) * (smp_cpu_mtid + 1));
 			}
 		}
+		__this_cpu_write(mt_scaling_jiffies, jiffies_64);
 	}
 
 	user = S390_lowcore.user_timer - ti->user_timer;
@@ -376,4 +379,11 @@ void vtime_init(void)
 {
 	/* set initial cpu timer */
 	set_vtimer(VTIMER_MAX_SLICE);
+	/* Setup initial MT scaling values */
+	if (smp_cpu_mtid) {
+		__this_cpu_write(mt_scaling_jiffies, jiffies);
+		__this_cpu_write(mt_scaling_mult, 1);
+		__this_cpu_write(mt_scaling_div, 1);
+		stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+	}
 }
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index fc7ec95..5fbfb88 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+	vcpu->stat.diagnose_10++;
 
 	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
 	    || start < 2 * PAGE_SIZE)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
-	vcpu->stat.diagnose_10++;
 
 	/*
 	 * We checked for start >= end above, so lets check for the
@@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
 
+	VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
+		   vcpu->run->s.regs.gprs[rx]);
+	vcpu->stat.diagnose_258++;
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
 	switch (parm.subcode) {
 	case 0: /* TOKEN */
+		VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
+			   "select mask 0x%llx compare mask 0x%llx",
+			   parm.token_addr, parm.select_mask, parm.compare_mask);
 		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
 			/*
 			 * If the pagefault handshake is already activated,
@@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 		 * the cancel, therefore to reduce code complexity, we assume
 		 * all outstanding tokens are already pending.
 		 */
+		VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
 		if (parm.token_addr || parm.select_mask ||
 		    parm.compare_mask || parm.zarch)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
 	unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
 
-	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
+	vcpu->stat.diagnose_308++;
 	switch (subcode) {
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
 	int ret;
 
+	vcpu->stat.diagnose_500++;
 	/* No virtio-ccw notification? Get out quickly. */
 	if (!vcpu->kvm->arch.css_support ||
 	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index e97b345..47518a3 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
 }
 
+#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
+#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
+#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
+#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
+
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
 {
+	int new_as;
+
 	if (debug_exit_required(vcpu))
 		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
 
 	filter_guest_per_event(vcpu);
+
+	/*
+	 * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
+	 * a space-switch event. PER events enforce space-switch events
+	 * for these instructions. So if no PER event for the guest is left,
+	 * we might have to filter the space-switch element out, too.
+	 */
+	if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
+		vcpu->arch.sie_block->iprcc = 0;
+		new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+
+		/*
+		 * If the AS changed from / to home, we had RP, SAC or SACF
+		 * instruction. Check primary and home space-switch-event
+		 * controls. (theoretically home -> home produced no event)
+		 */
+		if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
+		     (pssec(vcpu) || hssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+
+		/*
+		 * PT, PTI, PR, PC instruction operate on primary AS only. Check
+		 * if the primary-space-switch-event control was or got set.
+		 */
+		if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
+		    (pssec(vcpu) || old_ssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+	}
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c98d897..b277d50 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -30,7 +30,6 @@
 #define IOINT_SCHID_MASK 0x0000ffff
 #define IOINT_SSID_MASK 0x00030000
 #define IOINT_CSSID_MASK 0x03fc0000
-#define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
@@ -72,9 +71,13 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
+	preempt_disable();
 	if (!(vcpu->arch.sie_block->ckc <
-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
+		preempt_enable();
 		return 0;
+	}
+	preempt_enable();
 	return ckc_interrupts_enabled(vcpu);
 }
 
@@ -311,8 +314,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
 	li->irq.ext.ext_params2 = 0;
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
-		   0, ext.ext_params2);
+	VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+		   ext.ext_params2);
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_PFAULT_INIT,
 					 0, ext.ext_params2);
@@ -368,7 +371,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 	spin_unlock(&fi->lock);
 
 	if (deliver) {
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
 			   mchk.mcic);
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 						 KVM_S390_MCHK,
@@ -403,7 +406,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
 	vcpu->stat.deliver_restart_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
@@ -427,7 +430,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
 	vcpu->stat.deliver_prefix_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_SIGP_SET_PREFIX,
@@ -450,7 +452,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
 		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
 	vcpu->stat.deliver_emergency_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
 					 cpu_addr, 0);
@@ -477,7 +479,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
 	vcpu->stat.deliver_external_call++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_EXTERNAL_CALL,
@@ -506,7 +508,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	memset(&li->irq.pgm, 0, sizeof(pgm_info));
 	spin_unlock(&li->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+	VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
 		   pgm_info.code, ilc);
 	vcpu->stat.deliver_program_int++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
@@ -622,7 +624,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
 	spin_unlock(&fi->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+	VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
 		   ext.ext_params);
 	vcpu->stat.deliver_service_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
@@ -651,9 +653,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-				KVM_S390_INT_PFAULT_DONE, 0,
-				inti->ext.ext_params2);
 		list_del(&inti->list);
 		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
 	}
@@ -662,6 +661,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 	spin_unlock(&fi->lock);
 
 	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_INT_PFAULT_DONE, 0,
+						 inti->ext.ext_params2);
+		VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+			   inti->ext.ext_params2);
+
 		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
 				(u16 *)__LC_EXT_INT_CODE);
 		rc |= put_guest_lc(vcpu, PFAULT_DONE,
@@ -691,7 +696,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 					list);
 	if (inti) {
 		VCPU_EVENT(vcpu, 4,
-			   "interrupt: virtio parm:%x,parm64:%llx",
+			   "deliver: virtio parm: 0x%x,parm64: 0x%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
 		vcpu->stat.deliver_virtio_interrupt++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
@@ -741,7 +746,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+		VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 				inti->type,
@@ -855,7 +860,9 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		goto no_timer;
 	}
 
+	preempt_disable();
 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	preempt_enable();
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/* underflow */
@@ -864,7 +871,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
 	__set_cpu_idle(vcpu);
 	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
-	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
+	VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
 no_timer:
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	kvm_vcpu_block(vcpu);
@@ -894,7 +901,9 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 	u64 now, sltime;
 
 	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	preempt_disable();
 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	preempt_enable();
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/*
@@ -968,6 +977,10 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   irq->u.pgm.code, 0);
+
 	li->irq.pgm = irq->u.pgm;
 	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
 	return 0;
@@ -978,9 +991,6 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_irq irq;
 
-	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
-				   0, 1);
 	spin_lock(&li->lock);
 	irq.u.pgm.code = code;
 	__inject_prog(vcpu, &irq);
@@ -996,10 +1006,6 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 	struct kvm_s390_irq irq;
 	int rc;
 
-	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
-		   pgm_info->code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
-				   pgm_info->code, 0, 1);
 	spin_lock(&li->lock);
 	irq.u.pgm = *pgm_info;
 	rc = __inject_prog(vcpu, &irq);
@@ -1012,11 +1018,11 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
-		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+		   irq->u.ext.ext_params2);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
 				   irq->u.ext.ext_params,
-				   irq->u.ext.ext_params2, 2);
+				   irq->u.ext.ext_params2);
 
 	li->irq.ext = irq->u.ext;
 	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
@@ -1045,10 +1051,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
 	uint16_t src_id = irq->u.extcall.code;
 
-	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+	VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
 		   src_id);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
-				   src_id, 0, 2);
+				   src_id, 0);
 
 	/* sending vcpu invalid */
 	if (src_id >= KVM_MAX_VCPUS ||
@@ -1070,10 +1076,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
 
-	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
 		   irq->u.prefix.address);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
-				   irq->u.prefix.address, 0, 2);
+				   irq->u.prefix.address, 0);
 
 	if (!is_vcpu_stopped(vcpu))
 		return -EBUSY;
@@ -1090,7 +1096,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_stop_info *stop = &li->irq.stop;
 	int rc = 0;
 
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
 
 	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
 		return -EINVAL;
@@ -1114,8 +1120,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
 	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
 	return 0;
@@ -1126,10 +1132,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+	VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
 		   irq->u.emerg.code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
-				   irq->u.emerg.code, 0, 2);
+				   irq->u.emerg.code, 0);
 
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
@@ -1142,10 +1148,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
 
-	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+	VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
 		   irq->u.mchk.mcic);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
-				   irq->u.mchk.mcic, 2);
+				   irq->u.mchk.mcic);
 
 	/*
 	 * Because repressible machine checks can be indicated along with
@@ -1172,9 +1178,9 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
-				   0, 0, 2);
+				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1185,9 +1191,9 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
-	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
-				   0, 0, 2);
+				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
@@ -1435,20 +1441,20 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_INT_SERVICE:
-		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
 		inti->ext.ext_params = s390int->parm;
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_MCHK:
-		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+		VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
 			 s390int->parm64);
 		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
 		inti->mchk.mcic = s390int->parm64;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (inti->type & IOINT_AI_MASK)
+		if (inti->type & KVM_S390_INT_IO_AI_MASK)
 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
 		else
 			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
@@ -1535,8 +1541,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
 	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   irq->u.pgm.code);
 		rc = __inject_prog(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2078f92..98df53c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
+#include <asm/etr.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
@@ -108,6 +109,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
+	{ "diagnose_258", VCPU_STAT(diagnose_258) },
+	{ "diagnose_308", VCPU_STAT(diagnose_308) },
+	{ "diagnose_500", VCPU_STAT(diagnose_500) },
 	{ NULL }
 };
 
@@ -124,6 +128,7 @@ unsigned long kvm_s390_fac_list_mask_size(void)
 }
 
 static struct gmap_notifier gmap_notifier;
+debug_info_t *kvm_s390_dbf;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void)
@@ -134,24 +139,69 @@ int kvm_arch_hardware_enable(void)
 
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 
+/*
+ * This callback is executed during stop_machine(). All CPUs are therefore
+ * temporarily stopped. In order not to change guest behavior, we have to
+ * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ * so a CPU won't be stopped while calculating with the epoch.
+ */
+static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+			  void *v)
+{
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i;
+	unsigned long long *delta = v;
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		kvm->arch.epoch -= *delta;
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			vcpu->arch.sie_block->epoch -= *delta;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_clock_notifier = {
+	.notifier_call = kvm_clock_sync,
+};
+
 int kvm_arch_hardware_setup(void)
 {
 	gmap_notifier.notifier_call = kvm_gmap_notifier;
 	gmap_register_ipte_notifier(&gmap_notifier);
+	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+				       &kvm_clock_notifier);
 	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
 {
 	gmap_unregister_ipte_notifier(&gmap_notifier);
+	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+					 &kvm_clock_notifier);
 }
 
 int kvm_arch_init(void *opaque)
 {
+	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+	if (!kvm_s390_dbf)
+		return -ENOMEM;
+
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
+		debug_unregister(kvm_s390_dbf);
+		return -ENOMEM;
+	}
+
 	/* Register floating interrupt controller interface. */
 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
+void kvm_arch_exit(void)
+{
+	debug_unregister(kvm_s390_dbf);
+}
+
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -281,10 +331,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
 	switch (cap->cap) {
 	case KVM_CAP_S390_IRQCHIP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 		kvm->arch.use_irqchip = 1;
 		r = 0;
 		break;
 	case KVM_CAP_S390_USER_SIGP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 		kvm->arch.user_sigp = 1;
 		r = 0;
 		break;
@@ -295,8 +347,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 			r = 0;
 		} else
 			r = -EINVAL;
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+			 r ? "(not available)" : "(success)");
 		break;
 	case KVM_CAP_S390_USER_STSI:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		kvm->arch.user_stsi = 1;
 		r = 0;
 		break;
@@ -314,6 +369,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_LIMIT_SIZE:
 		ret = 0;
+		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+			 kvm->arch.gmap->asce_end);
 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 			ret = -EFAULT;
 		break;
@@ -330,7 +387,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	unsigned int idx;
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		/* enable CMMA only for z10 and later (EDAT_1) */
+		ret = -EINVAL;
+		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+			break;
+
 		ret = -EBUSY;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 		mutex_lock(&kvm->lock);
 		if (atomic_read(&kvm->online_vcpus) == 0) {
 			kvm->arch.use_cmma = 1;
@@ -339,6 +402,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_S390_VM_MEM_CLR_CMMA:
+		ret = -EINVAL;
+		if (!kvm->arch.use_cmma)
+			break;
+
+		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 		mutex_lock(&kvm->lock);
 		idx = srcu_read_lock(&kvm->srcu);
 		s390_reset_cmma(kvm->arch.gmap->mm);
@@ -374,6 +442,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 			}
 		}
 		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 		break;
 	}
 	default:
@@ -400,22 +469,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 		kvm->arch.crypto.aes_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 		get_random_bytes(
 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 		kvm->arch.crypto.dea_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 		kvm->arch.crypto.aes_kw = 0;
 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 		break;
 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 		kvm->arch.crypto.dea_kw = 0;
 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 		break;
 	default:
 		mutex_unlock(&kvm->lock);
@@ -440,6 +513,7 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 	if (gtod_high != 0)
 		return -EINVAL;
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 
 	return 0;
 }
@@ -459,12 +533,15 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 		return r;
 
 	mutex_lock(&kvm->lock);
+	preempt_disable();
 	kvm->arch.epoch = gtod - host_tod;
 	kvm_s390_vcpu_block_all(kvm);
 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 	kvm_s390_vcpu_unblock_all(kvm);
+	preempt_enable();
 	mutex_unlock(&kvm->lock);
+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 	return 0;
 }
 
@@ -496,6 +573,7 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
 					 sizeof(gtod_high)))
 		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 
 	return 0;
 }
@@ -509,9 +587,12 @@ static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (r)
 		return r;
 
+	preempt_disable();
 	gtod = host_tod + kvm->arch.epoch;
+	preempt_enable();
 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 
 	return 0;
 }
@@ -821,7 +902,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 	}
 
 	/* Enable storage key handling for the guest */
-	s390_enable_skey();
+	r = s390_enable_skey();
+	if (r)
+		goto out;
 
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -879,8 +962,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.use_irqchip) {
 			/* Set up dummy routing. */
 			memset(&routing, 0, sizeof(routing));
-			kvm_set_irq_routing(kvm, &routing, 0, 0);
-			r = 0;
+			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 		}
 		break;
 	}
@@ -1043,7 +1125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	sprintf(debug_name, "kvm-%u", current->pid);
 
-	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
 	if (!kvm->arch.dbf)
 		goto out_err;
 
@@ -1086,7 +1168,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
-	VM_EVENT(kvm, 3, "%s", "vm created");
+	VM_EVENT(kvm, 3, "vm created with type %lu", type);
 
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
@@ -1103,6 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
+	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
 
 	return 0;
 out_err:
@@ -1110,6 +1193,7 @@ out_err:
 	free_page((unsigned long)kvm->arch.model.fac);
 	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)(kvm->arch.sca));
+	KVM_EVENT(3, "creation of vm failed: %d", rc);
 	return rc;
 }
 
@@ -1131,7 +1215,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
 
-	if (kvm_s390_cmma_enabled(vcpu->kvm))
+	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
@@ -1166,6 +1250,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
+	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
 }
 
 /* Section: vcpu related */
@@ -1198,21 +1283,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+/*
+ * Backs up the current FP/VX register save area on a particular
+ * destination.  Used to switch between different register save
+ * areas.
+ */
+static inline void save_fpu_to(struct fpu *dst)
+{
+	dst->fpc = current->thread.fpu.fpc;
+	dst->flags = current->thread.fpu.flags;
+	dst->regs = current->thread.fpu.regs;
+}
+
+/*
+ * Switches the FP/VX register save area from which to lazy
+ * restore register contents.
+ */
+static inline void load_fpu_from(struct fpu *from)
+{
+	current->thread.fpu.fpc = from->fpc;
+	current->thread.fpu.flags = from->flags;
+	current->thread.fpu.regs = from->regs;
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	if (test_kvm_facility(vcpu->kvm, 129))
-		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
-	else
-		save_fp_regs(vcpu->arch.host_fpregs.fprs);
-	save_access_regs(vcpu->arch.host_acrs);
+	/* Save host register state */
+	save_fpu_regs();
+	save_fpu_to(&vcpu->arch.host_fpregs);
+
 	if (test_kvm_facility(vcpu->kvm, 129)) {
-		restore_fp_ctl(&vcpu->run->s.regs.fpc);
-		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
+		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+		current->thread.fpu.flags = FPU_USE_VX;
+		/*
+		 * Use the register save area in the SIE-control block
+		 * for register restore and save in kvm_arch_vcpu_put()
+		 */
+		current->thread.fpu.vxrs =
+			(__vector128 *)&vcpu->run->s.regs.vrs;
+		/* Always enable the vector extension for KVM */
+		__ctl_set_vx();
+	} else
+		load_fpu_from(&vcpu->arch.guest_fpregs);
+
+	if (test_fp_ctl(current->thread.fpu.fpc))
+		/* User space provided an invalid FPC, let's clear it */
+		current->thread.fpu.fpc = 0;
+
+	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1222,19 +1340,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		save_fp_ctl(&vcpu->run->s.regs.fpc);
-		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
-	save_access_regs(vcpu->run->s.regs.acrs);
-	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+
+	save_fpu_regs();
+
 	if (test_kvm_facility(vcpu->kvm, 129))
-		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+		/*
+		 * kvm_arch_vcpu_load() set up the register save area to
+		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
+		 * are already saved.  Only the floating-point control must be
+		 * copied.
+		 */
+		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	else
-		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+		save_fpu_to(&vcpu->arch.guest_fpregs);
+	load_fpu_from(&vcpu->arch.host_fpregs);
+
+	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1264,7 +1385,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 {
 	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 	if (!kvm_is_ucontrol(vcpu->kvm))
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
@@ -1342,7 +1465,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
-	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+	if (vcpu->kvm->arch.use_cmma) {
 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
 		if (rc)
 			return rc;
@@ -1377,7 +1500,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
-	vcpu->arch.host_vregs = &sie_page->vregs;
 
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
@@ -1399,6 +1521,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
+	/*
+	 * Allocate a save area for floating-point registers.  If the vector
+	 * extension is available, register contents are saved in the SIE
+	 * control block.  The allocated save area is still required in
+	 * particular places, for example, in kvm_s390_vcpu_store_status().
+	 */
+	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+					       GFP_KERNEL);
+	if (!vcpu->arch.guest_fpregs.fprs) {
+		rc = -ENOMEM;
+		goto out_free_sie_block;
+	}
+
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_sie_block;
@@ -1621,16 +1756,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
-	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs();
+	load_fpu_from(&vcpu->arch.guest_fpregs);
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
 	return 0;
 }
@@ -1723,18 +1858,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-bool kvm_s390_cmma_enabled(struct kvm *kvm)
-{
-	if (!MACHINE_IS_LPAR)
-		return false;
-	/* only enable for z10 and later */
-	if (!MACHINE_HAS_EDAT1)
-		return false;
-	if (!kvm->arch.use_cmma)
-		return false;
-	return true;
-}
-
 static bool ibs_enabled(struct kvm_vcpu *vcpu)
 {
 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
@@ -1742,10 +1865,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
 
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->requests)
-		return 0;
 retry:
 	kvm_s390_vcpu_request_handled(vcpu);
+	if (!vcpu->requests)
+		return 0;
 	/*
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -2193,8 +2316,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * copying in vcpu load/put. Lets update our copies before we save
 	 * it into the save area
 	 */
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs();
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		/*
+		 * If the vector extension is available, the vector registers
+		 * which overlaps with floating-point registers are saved in
+		 * the SIE-control block.  Hence, extract the floating-point
+		 * registers and the FPC value and store them in the
+		 * guest_fpregs structure.
+		 */
+		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
+		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
+		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
+				 current->thread.fpu.vxrs);
+	} else
+		save_fpu_to(&vcpu->arch.guest_fpregs);
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2221,10 +2357,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
 
 	/*
 	 * The guest VXRS are in the host VXRs due to the lazy
-	 * copying in vcpu load/put. Let's update our copies before we save
-	 * it into the save area.
+	 * copying in vcpu load/put. We can simply call save_fpu_regs()
+	 * to save the current register state because we are in the
+	 * middle of a load/put cycle.
+	 *
+	 * Let's update our copies before we save it into the save area.
 	 */
-	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	save_fpu_regs();
 
 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
 }
@@ -2340,6 +2479,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	case KVM_CAP_S390_CSS_SUPPORT:
 		if (!vcpu->kvm->arch.css_support) {
 			vcpu->kvm->arch.css_support = 1;
+			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
 			trace_kvm_s390_enable_css(vcpu->kvm);
 		}
 		r = 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c570478..c446aab 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 #define TDB_FORMAT1		1
 #define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
 
+extern debug_info_t *kvm_s390_dbf;
+#define KVM_EVENT(d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 do { \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
 
 static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
+	VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
+		   prefix);
 	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
@@ -217,8 +226,6 @@ void exit_sie(struct kvm_vcpu *vcpu);
 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
-/* is cmma enabled */
-bool kvm_s390_cmma_enabled(struct kvm *kvm);
 unsigned long kvm_s390_fac_list_mask_size(void);
 extern unsigned long kvm_s390_fac_list_mask[];
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ad42422..4d21dc4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -53,11 +53,14 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 		kvm_s390_set_psw_cc(vcpu, 3);
 		return 0;
 	}
+	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
 	val = (val - hostclk) & ~0x3fUL;
 
 	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
 	kvm_for_each_vcpu(i, cpup, vcpu->kvm)
 		cpup->arch.sie_block->epoch = val;
+	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 
 	kvm_s390_set_psw_cc(vcpu, 0);
@@ -98,8 +101,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	kvm_s390_set_prefix(vcpu, address);
-
-	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
 	trace_kvm_s390_handle_prefix(vcpu, 1, address);
 	return 0;
 }
@@ -129,7 +130,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+	VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
 	trace_kvm_s390_handle_prefix(vcpu, 0, address);
 	return 0;
 }
@@ -155,7 +156,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+	VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
 	trace_kvm_s390_handle_stap(vcpu, ga);
 	return 0;
 }
@@ -167,6 +168,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu)
 		return rc;
 
 	rc = s390_enable_skey();
+	VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
 	trace_kvm_s390_skey_related_inst(vcpu);
 	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
 	return rc;
@@ -370,7 +372,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 			    &fac, sizeof(fac));
 	if (rc)
 		return rc;
-	VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
+	VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
 	trace_kvm_s390_handle_stfl(vcpu, fac);
 	return 0;
 }
@@ -468,7 +470,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+	VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
 	return 0;
 }
 
@@ -521,7 +523,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	ar_t ar;
 
 	vcpu->stat.instruction_stsi++;
-	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+	VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -758,10 +760,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 	struct gmap *gmap;
 	int i;
 
-	VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+	VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
 	gmap = vcpu->arch.gmap;
 	vcpu->stat.instruction_essa++;
-	if (!kvm_s390_cmma_enabled(vcpu->kvm))
+	if (!vcpu->kvm->arch.use_cmma)
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -829,7 +831,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -868,7 +870,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
@@ -902,7 +904,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
@@ -940,7 +942,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
 	reg = reg1;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 72e58bd..da690b6 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		return SIGP_CC_STATUS_STORED;
-	} else if (rc == 0) {
-		VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
-			   dst_vcpu->vcpu_id, irq.u.prefix.address);
 	}
 
 	return rc;
@@ -371,7 +368,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 	return rc;
 }
 
-static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
+					   u16 cpu_addr)
 {
 	if (!vcpu->kvm->arch.user_sigp)
 		return 0;
@@ -414,9 +412,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
 	default:
 		vcpu->stat.instruction_sigp_unknown++;
 	}
-
-	VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
-		   order_code);
+	VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
+		   order_code, cpu_addr);
 
 	return 1;
 }
@@ -435,7 +432,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
-	if (handle_sigp_order_in_user_space(vcpu, order_code))
+	if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
 		return -EOPNOTSUPP;
 
 	if (r1 % 2)
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 3208d33..cc1d6c6 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop,
 	{KVM_S390_PROGRAM_INT, "program interrupt"},			\
 	{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"},			\
 	{KVM_S390_RESTART, "sigp restart"},				\
+	{KVM_S390_INT_PFAULT_INIT, "pfault init"},			\
+	{KVM_S390_INT_PFAULT_DONE, "pfault done"},			\
+	{KVM_S390_MCHK, "machine check"},				\
+	{KVM_S390_INT_CLOCK_COMP, "clock comparator"},			\
+	{KVM_S390_INT_CPU_TIMER, "cpu timer"},				\
 	{KVM_S390_INT_VIRTIO, "virtio interrupt"},			\
 	{KVM_S390_INT_SERVICE, "sclp interrupt"},			\
 	{KVM_S390_INT_EMERGENCY, "sigp emergency"},			\
 	{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
 
+#define get_irq_name(__type) \
+	(__type > KVM_S390_INT_IO_MAX ? \
+	__print_symbolic(__type, kvm_s390_int_type) : \
+		(__type & KVM_S390_INT_IO_AI_MASK ? \
+		 "adapter I/O interrupt" : "subchannel I/O interrupt"))
+
 TRACE_EVENT(kvm_s390_inject_vm,
 	    TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
 	    TP_ARGS(type, parm, parm64, who),
@@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm,
 	    TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
 		      (__entry->who == 1) ? " (from kernel)" :
 		      (__entry->who == 2) ? " (from user)" : "",
-		      __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
+		      __entry->inttype, get_irq_name(__entry->inttype),
 		      __entry->parm, __entry->parm64)
 	);
 
 TRACE_EVENT(kvm_s390_inject_vcpu,
-	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
-		     int who),
-	    TP_ARGS(id, type, parm, parm64, who),
+	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
+	    TP_ARGS(id, type, parm, parm64),
 
 	    TP_STRUCT__entry(
 		    __field(int, id)
 		    __field(__u32, inttype)
 		    __field(__u32, parm)
 		    __field(__u64, parm64)
-		    __field(int, who)
 		    ),
 
 	    TP_fast_assign(
@@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
 		    __entry->inttype = type & 0x00000000ffffffff;
 		    __entry->parm = parm;
 		    __entry->parm64 = parm64;
-		    __entry->who = who;
 		    ),
 
-	    TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
-		      (__entry->who == 1) ? " (from kernel)" :
-		      (__entry->who == 2) ? " (from user)" : "",
+	    TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
 		      __entry->id, __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
-		      __entry->parm, __entry->parm64)
+		      get_irq_name(__entry->inttype), __entry->parm,
+		      __entry->parm64)
 	);
 
 /*
@@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
 	    TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "	\
 		      "data:%08llx %016llx",
 		      __entry->id, __entry->inttype,
-		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
-		      __entry->data0, __entry->data1)
+		      get_irq_name(__entry->inttype), __entry->data0,
+		      __entry->data1)
 	);
 
 /*
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 16dc42d..246a7eb 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -26,6 +26,7 @@ void __delay(unsigned long loops)
          */
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
+EXPORT_SYMBOL(__delay);
 
 static void __udelay_disabled(unsigned long long usecs)
 {
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 4614d41..0d002a7 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -370,22 +370,9 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 }
 EXPORT_SYMBOL(__strncpy_from_user);
 
-/*
- * The "old" uaccess variant without mvcos can be enforced with the
- * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- */
-static int uaccess_primary __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
-	uaccess_primary = 1;
-	return 0;
-}
-early_param("uaccess_primary", parse_uaccess_pt);
-
 static int __init uaccess_init(void)
 {
-	if (!uaccess_primary && test_facility(27))
+	if (test_facility(27))
 		static_key_slow_inc(&have_mvcos);
 	return 0;
 }
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4c8f5d7..f985856 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -646,7 +646,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 		return;
 	inc_irq_stat(IRQEXT_PFL);
 	/* Get the token (= pid of the affected task). */
-	pid = sizeof(void *) == 4 ? param32 : param64;
+	pid = param64;
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	if (tsk)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1eb41bb..12bbf0e 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	do {
 		pte = *ptep;
 		barrier();
+		/* Similar to the PMD case, NUMA hinting must take slow path */
+		if (pte_protnone(pte))
+			return 0;
 		if ((pte_val(pte) & mask) != 0)
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 			return 0;
 		if (unlikely(pmd_large(pmd))) {
+			/*
+			 * NUMA hinting faults need to be handled in the GUP
+			 * slowpath for accounting purposes and so that they
+			 * can be serialised against THP migration.
+			 */
+			if (pmd_protnone(pmd))
+				return 0;
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 					  write, pages, nr))
 				return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76e8737..2963b56 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -27,6 +27,7 @@
 #include <linux/initrd.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
+#include <linux/memblock.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -138,7 +139,7 @@ void __init mem_init(void)
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 	atomic_set(&init_mm.context.attach_count, 1);
 
-        max_mapnr = max_low_pfn;
+	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 	/* Setup guest page hinting */
@@ -170,37 +171,36 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
-	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
-	struct zone *zone;
-	int rc;
+	unsigned long nr_pages;
+	int rc, zone_enum;
 
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
-	for_each_zone(zone) {
-		if (zone_idx(zone) != ZONE_MOVABLE) {
-			/* Add range within existing zone limits */
-			zone_start_pfn = zone->zone_start_pfn;
-			zone_end_pfn = zone->zone_start_pfn +
-				       zone->spanned_pages;
+
+	while (size_pages > 0) {
+		if (start_pfn < dma_end_pfn) {
+			nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+				   dma_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_DMA;
+		} else if (start_pfn < normal_end_pfn) {
+			nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+				   normal_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_NORMAL;
 		} else {
-			/* Add remaining range to ZONE_MOVABLE */
-			zone_start_pfn = start_pfn;
-			zone_end_pfn = start_pfn + size_pages;
+			nr_pages = size_pages;
+			zone_enum = ZONE_MOVABLE;
 		}
-		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
-			continue;
-		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
-			   zone_end_pfn - start_pfn : size_pages;
-		rc = __add_pages(nid, zone, start_pfn, nr_pages);
+		rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+				 start_pfn, size_pages);
 		if (rc)
 			break;
 		start_pfn += nr_pages;
 		size_pages -= nr_pages;
-		if (!size_pages)
-			break;
 	}
 	if (rc)
 		vmem_remove_mapping(start, size);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 33082d0..54ef3bc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -10,11 +10,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
 #include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
@@ -28,14 +24,9 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-#define ALLOC_ORDER	2
-#define FRAG_MASK	0x03
-
-int HPAGE_SHIFT;
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
-	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	struct page *page = alloc_pages(GFP_KERNEL, 2);
 
 	if (!page)
 		return NULL;
@@ -44,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	free_pages((unsigned long) table, ALLOC_ORDER);
+	free_pages((unsigned long) table, 2);
 }
 
 static void __crst_table_upgrade(void *arg)
@@ -178,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
 	spin_lock_init(&gmap->guest_table_lock);
 	gmap->mm = mm;
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		goto out_free;
 	page->index = 0;
@@ -249,7 +240,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Free all segment & region tables. */
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 	down_write(&gmap->mm->mmap_sem);
@@ -289,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	unsigned long *new;
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
@@ -304,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	}
 	spin_unlock(&gmap->mm->page_table_lock);
 	if (page)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	return 0;
 }
 
@@ -797,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
 }
 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	struct page *page;
-	unsigned long *table;
-
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-	if (!page)
-		return NULL;
-	if (!pgtable_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	atomic_set(&page->_mapcount, 0);
-	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
-	return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-	struct page *page;
-
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	pgtable_page_dtor(page);
-	atomic_set(&page->_mapcount, -1);
-	__free_page(page);
-}
-
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq)
 {
@@ -959,20 +916,6 @@ __initcall(page_table_register_sysctl);
 
 #else /* CONFIG_PGSTE */
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
 static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 			unsigned long vmaddr)
 {
@@ -996,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
  */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
-	unsigned long *uninitialized_var(table);
-	struct page *uninitialized_var(page);
+	unsigned long *table;
+	struct page *page;
 	unsigned int mask, bit;
 
-	if (mm_alloc_pgste(mm))
-		return page_table_alloc_pgste(mm);
-	/* Allocate fragments of a 4K page as 1K/2K page table */
-	spin_lock_bh(&mm->context.list_lock);
-	mask = FRAG_MASK;
-	if (!list_empty(&mm->context.pgtable_list)) {
-		page = list_first_entry(&mm->context.pgtable_list,
-					struct page, lru);
-		table = (unsigned long *) page_to_phys(page);
-		mask = atomic_read(&page->_mapcount);
-		mask = mask | (mask >> 4);
-	}
-	if ((mask & FRAG_MASK) == FRAG_MASK) {
-		spin_unlock_bh(&mm->context.list_lock);
-		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-		if (!page)
-			return NULL;
-		if (!pgtable_page_ctor(page)) {
-			__free_page(page);
-			return NULL;
+	/* Try to get a fragment of a 4K page as a 2K page table */
+	if (!mm_alloc_pgste(mm)) {
+		table = NULL;
+		spin_lock_bh(&mm->context.list_lock);
+		if (!list_empty(&mm->context.pgtable_list)) {
+			page = list_first_entry(&mm->context.pgtable_list,
+						struct page, lru);
+			mask = atomic_read(&page->_mapcount);
+			mask = (mask | (mask >> 4)) & 3;
+			if (mask != 3) {
+				table = (unsigned long *) page_to_phys(page);
+				bit = mask & 1;		/* =1 -> second 2K */
+				if (bit)
+					table += PTRS_PER_PTE;
+				atomic_xor_bits(&page->_mapcount, 1U << bit);
+				list_del(&page->lru);
+			}
 		}
+		spin_unlock_bh(&mm->context.list_lock);
+		if (table)
+			return table;
+	}
+	/* Allocate a fresh page */
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	/* Initialize page table */
+	table = (unsigned long *) page_to_phys(page);
+	if (mm_alloc_pgste(mm)) {
+		/* Return 4K page table with PGSTEs */
+		atomic_set(&page->_mapcount, 3);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	} else {
+		/* Return the first 2K fragment of the page */
 		atomic_set(&page->_mapcount, 1);
-		table = (unsigned long *) page_to_phys(page);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
-	} else {
-		for (bit = 1; mask & bit; bit <<= 1)
-			table += PTRS_PER_PTE;
-		mask = atomic_xor_bits(&page->_mapcount, bit);
-		if ((mask & FRAG_MASK) == FRAG_MASK)
-			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
 	}
-	spin_unlock_bh(&mm->context.list_lock);
 	return table;
 }
 
@@ -1043,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	unsigned int bit, mask;
 
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page))
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
-	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit);
-	if (mask & FRAG_MASK)
-		list_add(&page->lru, &mm->context.pgtable_list);
-	spin_unlock_bh(&mm->context.list_lock);
-	if (mask == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
+	if (!mm_alloc_pgste(mm)) {
+		/* Free 2K page table fragment of a 4K page */
+		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		spin_lock_bh(&mm->context.list_lock);
+		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+		if (mask & 3)
+			list_add(&page->lru, &mm->context.pgtable_list);
+		else
+			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
+		if (mask != 0)
+			return;
 	}
-}
-
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
-	struct page *page;
 
-	if (bit == FRAG_MASK)
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
-	}
+	pgtable_page_dtor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
 }
 
 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1085,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 
 	mm = tlb->mm;
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page)) {
+	if (mm_alloc_pgste(mm)) {
 		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		table = (unsigned long *) (__pa(table) | 3);
 		tlb_remove_table(tlb, table);
 		return;
 	}
-	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
 	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
-	if (mask & FRAG_MASK)
+	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+	if (mask & 3)
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
+	else
+		list_del(&page->lru);
 	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *) (__pa(table) | (bit << 4));
+	table = (unsigned long *) (__pa(table) | (1U << bit));
 	tlb_remove_table(tlb, table);
 }
 
 static void __tlb_remove_table(void *_table)
 {
-	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
-	void *table = (void *)((unsigned long) _table & ~mask);
-	unsigned type = (unsigned long) _table & mask;
-
-	if (type)
-		__page_table_free_rcu(table, type);
-	else
-		free_pages((unsigned long) table, ALLOC_ORDER);
+	unsigned int mask = (unsigned long) _table & 3;
+	void *table = (void *)((unsigned long) _table ^ mask);
+	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+	switch (mask) {
+	case 0:		/* pmd or pud */
+		free_pages((unsigned long) table, 2);
+		break;
+	case 1:		/* lower 2K of a 4K page table */
+	case 2:		/* higher 2K of a 4K page table */
+		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+			break;
+		/* fallthrough */
+	case 3:		/* 4K page table with pgstes */
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+		break;
+	}
 }
 
 static void tlb_remove_table_smp_sync(void *arg)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index fee782a..8d2e516 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -448,13 +448,13 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
 			      BPF_REG_1, offsetof(struct sk_buff, data));
 	}
-	/* BPF compatibility: clear A (%b7) and X (%b8) registers */
-	if (REG_SEEN(BPF_REG_7))
-		/* lghi %b7,0 */
-		EMIT4_IMM(0xa7090000, BPF_REG_7, 0);
-	if (REG_SEEN(BPF_REG_8))
-		/* lghi %b8,0 */
-		EMIT4_IMM(0xa7090000, BPF_REG_8, 0);
+	/* BPF compatibility: clear A (%b0) and X (%b7) registers */
+	if (REG_SEEN(BPF_REG_A))
+		/* lghi %ba,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_A, 0);
+	if (REG_SEEN(BPF_REG_X))
+		/* lghi %bx,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_X, 0);
 }
 
 /*
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
new file mode 100644
index 0000000..f94ecaf
--- /dev/null
+++ b/arch/s390/numa/Makefile
@@ -0,0 +1,3 @@
+obj-y			+= numa.o
+obj-y			+= toptree.o
+obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
new file mode 100644
index 0000000..7de4e2f
--- /dev/null
+++ b/arch/s390/numa/mode_emu.c
@@ -0,0 +1,530 @@
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY	0
+#define DIST_CORE	1
+#define DIST_MC		2
+#define DIST_BOOK	3
+#define DIST_MAX	4
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST	10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE	-1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
+static struct {
+	s32 to_node_id[CONFIG_NR_CPUS];	/* Pinned core to node mapping */
+	int total;			/* Total number of pinned cores */
+	int per_node_target;		/* Cores per node without extra cores */
+	int per_node[MAX_NUMNODES];	/* Number of cores pinned to node */
+} *emu_cores;
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+	if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+		emu_cores->per_node[node_id]++;
+		emu_cores->to_node_id[core_id] = node_id;
+		emu_cores->total++;
+	} else {
+		WARN_ON(emu_cores->to_node_id[core_id] != node_id);
+	}
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+	return emu_cores->per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+	return emu_cores->to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+	struct toptree *core;
+	int count = 0;
+
+	toptree_for_each(core, tree, CORE) {
+		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+			count++;
+	}
+	return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+	return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+	return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+	return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+	if (core_book(core1)->id != core_book(core2)->id)
+		return DIST_BOOK;
+	if (core_mc(core1)->id != core_mc(core2)->id)
+		return DIST_MC;
+	/* Same core or sibling on same MC */
+	return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+	struct toptree *core_node;
+	int dist_min = DIST_MAX;
+
+	toptree_for_each(core_node, node, CORE)
+		dist_min = min(dist_min, dist_core_to_core(core_node, core));
+	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+	int nid;
+
+	toptree_unify(tree);
+	for (nid = 0; nid < emu_nodes; nid++)
+		toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "emu_cores->per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+				     int extra)
+{
+	struct toptree *node, *node_best = NULL;
+	int dist_cur, dist_best, cores_target;
+
+	cores_target = emu_cores->per_node_target + extra;
+	dist_best = DIST_MAX;
+	node_best = NULL;
+	toptree_for_each(node, numa, NODE) {
+		/* Already pinned cores must use their nodes */
+		if (core_pinned_to_node_id(core) == node->id) {
+			node_best = node;
+			break;
+		}
+		/* Skip nodes that already have enough cores */
+		if (cores_pinned(node) >= cores_target)
+			continue;
+		dist_cur = dist_node_to_core(node, core);
+		if (dist_cur < dist_best) {
+			dist_best = dist_cur;
+			node_best = node;
+		}
+	}
+	return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+				   int extra)
+{
+	struct toptree *node, *core, *tmp;
+
+	toptree_for_each_safe(core, tmp, phys, CORE) {
+		node = node_for_core(numa, core, extra);
+		if (!node)
+			return;
+		toptree_move(core, node);
+		pin_core_to_node(core->id, node->id);
+	}
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+				    enum toptree_level level, bool perfect)
+{
+	int cores_free, cores_target = emu_cores->per_node_target;
+	struct toptree *cur, *tmp;
+
+	toptree_for_each_safe(cur, tmp, phys, level) {
+		cores_free = cores_target - toptree_count(node, CORE);
+		if (perfect) {
+			if (cores_free == toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		} else {
+			if (cores_free >= toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		}
+	}
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+			       enum toptree_level level, bool perfect)
+{
+	struct toptree *node;
+
+	toptree_for_each(node, numa, NODE)
+		move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+	struct toptree *core;
+
+	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, BOOK, true);
+	move_level_to_numa(numa, phys, BOOK, false);
+	move_level_to_numa(numa, phys, MC, true);
+	move_level_to_numa(numa, phys, MC, false);
+	/* Now pin all the moved cores */
+	toptree_for_each(core, numa, CORE)
+		pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+	struct toptree *tree;
+	int nid;
+
+	tree = toptree_alloc(TOPOLOGY, id);
+	if (!tree)
+		goto fail;
+	for (nid = 0; nid < nodes; nid++) {
+		if (!toptree_get_child(tree, nid))
+			goto fail;
+	}
+	return tree;
+fail:
+	panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void create_core_to_node_map(void)
+{
+	int i;
+
+	emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
+	if (emu_cores == NULL)
+		panic("Could not allocate cores to node memory");
+	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+		emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+	static int first = 1;
+	struct toptree *numa;
+	int cores_total;
+
+	cores_total = emu_cores->total + cores_free(phys);
+	emu_cores->per_node_target = cores_total / emu_nodes;
+	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+	if (first) {
+		toptree_to_numa_first(numa, phys);
+		first = 0;
+	}
+	toptree_to_numa_single(numa, phys, 0);
+	toptree_to_numa_single(numa, phys, 1);
+	toptree_unify_tree(numa);
+
+	WARN_ON(cpumask_weight(&phys->mask));
+	return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+	struct toptree *phys, *node, *book, *mc, *core;
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+	for_each_online_cpu(cpu) {
+		top = &per_cpu(cpu_topology, cpu);
+		node = toptree_get_child(phys, 0);
+		book = toptree_get_child(node, top->book_id);
+		mc = toptree_get_child(book, top->socket_id);
+		core = toptree_get_child(mc, top->core_id);
+		if (!book || !mc || !core)
+			panic("NUMA emulation could not allocate memory");
+		cpumask_set_cpu(cpu, &core->mask);
+		toptree_update_mask(mc);
+	}
+	return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	for_each_cpu(cpu, &core->mask) {
+		top = &per_cpu(cpu_topology, cpu);
+		cpumask_copy(&top->thread_mask, &core->mask);
+		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
+		top->node_id = core_node(core)->id;
+	}
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+	struct toptree *core;
+	int i;
+
+	/* Clear all node masks */
+	for (i = 0; i < MAX_NUMNODES; i++)
+		cpumask_clear(node_to_cpumask_map[i]);
+
+	/* Rebuild all masks */
+	toptree_for_each(core, numa, CORE)
+		topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+	int nid, cid;
+
+	if (!numa_debug_enabled)
+		return;
+	printk(KERN_DEBUG "NUMA node to core mapping\n");
+	for (nid = 0; nid < emu_nodes; nid++) {
+		printk(KERN_DEBUG "  node %3d: ", nid);
+		for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+			if (emu_cores->to_node_id[cid] == nid)
+				printk(KERN_CONT "%d ", cid);
+		}
+		printk(KERN_CONT "\n");
+	}
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * Must be called with "sched_domains_mutex" lock held.
+ */
+static void emu_update_cpu_topology(void)
+{
+	struct toptree *phys, *numa;
+
+	if (emu_cores == NULL)
+		create_core_to_node_map();
+	phys = toptree_from_topology();
+	numa = toptree_to_numa(phys);
+	toptree_free(phys);
+	toptree_to_topology(numa);
+	toptree_free(numa);
+	print_node_to_core_map();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+	size = size ? : CONFIG_EMU_SIZE;
+	size = roundup(size, memory_block_size_bytes());
+	return size;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+	int nodes_max;
+
+	nodes_max = memblock.memory.total_size / emu_size;
+	nodes_max = max(nodes_max, 1);
+	if (nodes_max >= nodes)
+		return nodes;
+	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+	return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+	emu_size = emu_setup_size_adjust(emu_size);
+	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+		emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+	return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+	return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+	.name = "emu",
+	.setup = emu_setup,
+	.update_cpu_topology = emu_update_cpu_topology,
+	.__pfn_to_nid = emu_pfn_to_nid,
+	.align = emu_align,
+	.distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+	int count;
+
+	if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+		return 0;
+	if (count <= 0)
+		return 0;
+	emu_nodes = min(count, MAX_NUMNODES);
+	return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+	emu_size = memparse(p, NULL);
+	return 0;
+}
+early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
new file mode 100644
index 0000000..09b1d23
--- /dev/null
+++ b/arch/s390/numa/numa.c
@@ -0,0 +1,184 @@
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+const struct numa_mode numa_mode_plain = {
+	.name = "plain",
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+	return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+	if (mode->update_cpu_topology)
+		mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+	return mode->distance ? mode->distance(a, b) : 0;
+}
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+	pg_data_t *res;
+
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+	if (!res)
+		panic("Could not allocate memory for node data!\n");
+	memset(res, 0, sizeof(pg_data_t));
+	return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+	unsigned long cur_base, align, end_of_dram;
+	int nid = 0;
+
+	end_of_dram = memblock_end_of_DRAM();
+	align = mode->align ? mode->align() : ULONG_MAX;
+
+	/*
+	 * Step through all available memory and assign it to the nodes
+	 * indicated by the mode implementation.
+	 * All nodes which are seen here will be set online.
+	 */
+	cur_base = 0;
+	do {
+		nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+		node_set_online(nid);
+		memblock_set_node(cur_base, align, &memblock.memory, nid);
+		cur_base += align;
+	} while (cur_base < end_of_dram);
+
+	/* Allocate and fill out node_data */
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		NODE_DATA(nid) = alloc_node_data();
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn, end_pfn;
+		unsigned long t_start, t_end;
+		int i;
+
+		start_pfn = ULONG_MAX;
+		end_pfn = 0;
+		for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+			if (t_start < start_pfn)
+				start_pfn = t_start;
+			if (t_end > end_pfn)
+				end_pfn = t_end;
+		}
+		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+		NODE_DATA(nid)->node_id = nid;
+	}
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+	pr_info("NUMA mode: %s\n", mode->name);
+	if (mode->setup)
+		mode->setup();
+	numa_setup_memory();
+	memblock_dump_all();
+}
+
+
+/*
+ * numa_init_early() - Initialization initcall
+ *
+ * This runs when only one CPU is online and before the first
+ * topology update is called for by the scheduler.
+ */
+static int __init numa_init_early(void)
+{
+	/* Attach all possible CPUs to node 0 for now. */
+	cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask);
+	return 0;
+}
+early_initcall(numa_init_early);
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+	int nid;
+
+	for_each_online_node(nid)
+		register_one_node(nid);
+	return 0;
+}
+device_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+	numa_debug_enabled = 1;
+	return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+	if (strcmp(parm, numa_mode_plain.name) == 0)
+		mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+	if (strcmp(parm, numa_mode_emu.name) == 0)
+		mode = &numa_mode_emu;
+#endif
+	return 0;
+}
+early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
new file mode 100644
index 0000000..08953b0b
--- /dev/null
+++ b/arch/s390/numa/numa_mode.h
@@ -0,0 +1,24 @@
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+	char *name;				/* Name of mode */
+	void (*setup)(void);			/* Initizalize mode */
+	void (*update_cpu_topology)(void);	/* Called by topology code */
+	int (*__pfn_to_nid)(unsigned long pfn);	/* PFN to node ID */
+	unsigned long (*align)(void);		/* Minimum node alignment */
+	int (*distance)(int a, int b);		/* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
+
+#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
new file mode 100644
index 0000000..902d350
--- /dev/null
+++ b/arch/s390/numa/toptree.c
@@ -0,0 +1,342 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree *toptree_alloc(int level, int id)
+{
+	struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+
+	if (!res)
+		return res;
+
+	INIT_LIST_HEAD(&res->children);
+	INIT_LIST_HEAD(&res->sibling);
+	cpumask_clear(&res->mask);
+	res->level = level;
+	res->id = id;
+	return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+	struct toptree *oldparent;
+
+	list_del_init(&cand->sibling);
+	oldparent = cand->parent;
+	cand->parent = NULL;
+	toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void toptree_free(struct toptree *cand)
+{
+	struct toptree *child, *tmp;
+
+	if (cand->parent)
+		toptree_remove(cand);
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_free(child);
+	kfree(cand);
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+	struct toptree *child;
+
+	cpumask_clear(&cand->mask);
+	list_for_each_entry(child, &cand->children, sibling)
+		cpumask_or(&cand->mask, &cand->mask, &child->mask);
+	if (cand->parent)
+		toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+	if (!cand || !target)
+		return -1;
+	if (target->level != (cand->level + 1))
+		return -1;
+	list_add_tail(&cand->sibling, &target->children);
+	cand->parent = target;
+	toptree_update_mask(target);
+	return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *child, *tmp;
+
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+	struct toptree *child, *tmp, *cand_copy;
+
+	/* Threads cannot be split, cores are not split */
+	if (cand->level < 2)
+		return;
+
+	cand_copy = toptree_alloc(cand->level, 0);
+	toptree_for_each_child_safe(child, tmp, cand) {
+		struct toptree *tmpchild;
+
+		if (!cpumask_empty(&child->mask)) {
+			tmpchild = toptree_get_child(cand_copy, child->id);
+			toptree_move_children(child, tmpchild);
+		}
+		toptree_free(child);
+	}
+	toptree_move_children(cand_copy, cand);
+	toptree_free(cand_copy);
+
+	toptree_for_each_child(child, cand)
+		toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+	if (cand->level + 1 == target->level) {
+		toptree_remove(cand);
+		toptree_insert(cand, target);
+		return;
+	}
+
+	real_insert_point = NULL;
+	ptr = cand;
+	stack_target = NULL;
+
+	do {
+		tmp = stack_target;
+		stack_target = toptree_alloc(ptr->level + 1,
+					     ptr->parent->id);
+		toptree_insert(tmp, stack_target);
+		if (!real_insert_point)
+			real_insert_point = stack_target;
+		ptr = ptr->parent;
+	} while (stack_target->level < (target->level - 1));
+
+	toptree_remove(cand);
+	toptree_insert(cand, real_insert_point);
+	toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+	struct toptree *child;
+
+	toptree_for_each_child(child, cand)
+		if (child->id == id)
+			return child;
+	child = toptree_alloc(cand->level-1, id);
+	toptree_insert(child, cand);
+	return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+	struct toptree *child, *tmp;
+
+	if (context->level == level)
+		return context;
+
+	if (!list_empty(&context->children)) {
+		list_for_each_entry(child, &context->children, sibling) {
+			tmp = toptree_first(child, level);
+			if (tmp)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+	if (cur->parent == NULL)
+		return NULL;
+
+	if (cur == list_last_entry(&cur->parent->children,
+				   struct toptree, sibling))
+		return NULL;
+	return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level)
+{
+	struct toptree *cur_context, *tmp;
+
+	if (!cur)
+		return NULL;
+
+	if (context->level == level)
+		return NULL;
+
+	tmp = toptree_next_sibling(cur);
+	if (tmp != NULL)
+		return tmp;
+
+	cur_context = cur;
+	while (cur_context->level < context->level - 1) {
+		/* Step up */
+		cur_context = cur_context->parent;
+		/* Step aside */
+		tmp = toptree_next_sibling(cur_context);
+		if (tmp != NULL) {
+			/* Step down */
+			tmp = toptree_first(tmp, level);
+			if (tmp != NULL)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+	struct toptree *cur;
+	int cnt = 0;
+
+	toptree_for_each(cur, context, level)
+		cnt++;
+	return cnt;
+}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
new file mode 100644
index 0000000..bdf5020
--- /dev/null
+++ b/arch/s390/numa/toptree.h
@@ -0,0 +1,60 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+	int level;
+	int id;
+	cpumask_t mask;
+	struct toptree *parent;
+	struct list_head sibling;
+	struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level);
+
+#define toptree_for_each_child(child, ptree)				\
+	list_for_each_entry(child,  &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree)			\
+	list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree)					\
+	((ptree->parent == NULL) ||				\
+	 (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype);	\
+	     ptree != NULL;				\
+	     ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype),		\
+		     tmp = toptree_next(ptree, cont, ttype);	\
+	     ptree != NULL;					\
+	     ptree = tmp,					\
+		     tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start)			\
+	toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index bc927a0..9cfa2ff 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <asm/processor.h>
+#include <asm/perf_event.h>
 
 #include "../../../drivers/oprofile/oprof.h"
 
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 598f023..17c04c7 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -76,11 +76,6 @@ EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
 static struct kmem_cache *zdev_fmb_cache;
 
-struct zpci_dev *get_zdev(struct pci_dev *pdev)
-{
-	return (struct zpci_dev *) pdev->sysdata;
-}
-
 struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 	struct zpci_dev *tmp, *zdev = NULL;
@@ -269,7 +264,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 			      unsigned long offset,
 			      unsigned long max)
 {
-	struct zpci_dev *zdev =	get_zdev(pdev);
+	struct zpci_dev *zdev =	to_zpci(pdev);
 	u64 addr;
 	int idx;
 
@@ -385,7 +380,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
 
 int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	unsigned int hwirq, msi_vecs;
 	unsigned long aisb;
 	struct msi_desc *msi;
@@ -460,7 +455,7 @@ out:
 
 void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct msi_desc *msi;
 	int rc;
 
@@ -637,7 +632,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 	int i;
 
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		if (!zdev->bars[i].size)
+		if (!zdev->bars[i].size || !zdev->bars[i].res)
 			continue;
 
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
@@ -648,7 +643,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 
 int pcibios_add_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
 	int i;
 
@@ -673,7 +668,7 @@ void pcibios_release_device(struct pci_dev *pdev)
 
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
@@ -684,7 +679,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
 
 void pcibios_disable_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	zpci_fmb_disable_device(zdev);
 	zpci_debug_exit_device(zdev);
@@ -695,7 +690,7 @@ void pcibios_disable_device(struct pci_dev *pdev)
 static int zpci_restore(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret = 0;
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -717,7 +712,7 @@ out:
 static int zpci_freeze(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 		return 0;
@@ -777,17 +772,22 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
 
 	ret = zpci_setup_bus_resources(zdev, &resources);
 	if (ret)
-		return ret;
+		goto error;
 
 	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
 				      zdev, &resources);
 	if (!zdev->bus) {
-		zpci_cleanup_bus_resources(zdev);
-		return -EIO;
+		ret = -EIO;
+		goto error;
 	}
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
 	pci_bus_add_devices(zdev->bus);
 	return 0;
+
+error:
+	zpci_cleanup_bus_resources(zdev);
+	pci_free_resource_list(&resources);
+	return ret;
 }
 
 int zpci_enable_device(struct zpci_dev *zdev)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 6fd8d58..42b7658 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -277,7 +277,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     enum dma_data_direction direction,
 				     struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
@@ -316,7 +316,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
 				 struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long iommu_page_index;
 	int npages;
 
@@ -337,7 +337,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	struct page *page;
 	unsigned long pa;
 	dma_addr_t map;
@@ -367,7 +367,7 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
 	size = PAGE_ALIGN(size);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ed2394d..369a3e0 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,15 +46,13 @@ struct zpci_ccdf_avail {
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 
 	zpci_err("error CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
-	if (!zdev)
-		return;
-
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
-	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 }
 
 void zpci_event_error(void *data)
@@ -89,7 +87,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			break;
+		pci_lock_rescan_remove();
 		pci_rescan_bus(zdev->bus);
+		pci_unlock_rescan_remove();
 		break;
 	case 0x0302: /* Reserved -> Standby */
 		if (!zdev)
@@ -97,7 +97,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (pdev)
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 
 		ret = zpci_disable_device(zdev);
 		if (ret)
@@ -114,7 +114,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 			/* Give the driver a hint that the function is
 			 * already unusable. */
 			pdev->error_state = pci_channel_io_perm_failure;
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 		}
 
 		zdev->fh = ccdf->fh;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 85267c0..dcc2634 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,10 +8,23 @@
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
 #include <asm/processor.h>
 
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
 
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+	struct {
+		u8 cc;
+		u8 status;
+		u64 req;
+		u64 offset;
+	} data = {cc, status, req, offset};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
 /* Modify PCI Function Controls */
 static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 {
@@ -38,8 +51,8 @@ int zpci_mod_fc(u64 req, struct zpci_fib *fib)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d\n",
-			     __func__, cc, status);
+		zpci_err_insn(cc, status, req, 0);
+
 	return (cc) ? -EIO : 0;
 }
 
@@ -72,8 +85,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  dma_addr: %Lx  size: %Lx\n",
-			    __func__, cc, status, addr, range);
+		zpci_err_insn(cc, status, addr, range);
+
 	return (cc) ? -EIO : 0;
 }
 
@@ -121,8 +134,8 @@ int zpci_load(u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_load);
@@ -159,8 +172,8 @@ int zpci_store(u64 data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			__func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_store);
@@ -195,8 +208,8 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index fa3ce89..f37a580 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -16,7 +16,7 @@
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));		\
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));		\
 									\
 	return sprintf(buf, fmt, zdev->member);				\
 }									\
@@ -38,23 +38,30 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret;
 
 	if (!device_remove_file_self(dev, attr))
 		return count;
 
+	pci_lock_rescan_remove();
 	pci_stop_and_remove_bus_device(pdev);
 	ret = zpci_disable_device(zdev);
 	if (ret)
-		return ret;
+		goto error;
 
 	ret = zpci_enable_device(zdev);
 	if (ret)
-		return ret;
+		goto error;
 
 	pci_rescan_bus(zdev->bus);
+	pci_unlock_rescan_remove();
+
 	return count;
+
+error:
+	pci_unlock_rescan_remove();
+	return ret;
 }
 static DEVICE_ATTR_WO(recover);
 
@@ -64,7 +71,7 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 				       sizeof(zdev->util_str));
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 138fb3d..92ffe39 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/score/include/asm/mm-arch-hooks.h b/arch/score/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 5e38689..0000000
--- a/arch/score/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SCORE_MM_ARCH_HOOKS_H
-#define _ASM_SCORE_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_SCORE_MM_ARCH_HOOKS_H */
diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h
index cbf763b..0288efc 100644
--- a/arch/sh/drivers/pci/pci-sh4.h
+++ b/arch/sh/drivers/pci/pci-sh4.h
@@ -11,14 +11,6 @@
 
 #include <asm/io.h>
 
-/* startup values */
-#define PCI_PROBE_BIOS		1
-#define PCI_PROBE_CONF1		2
-#define PCI_PROBE_CONF2		4
-#define PCI_NO_CHECKS		0x400
-#define PCI_ASSIGN_ROMS		0x1000
-#define PCI_BIOS_IRQ_SCAN	0x2000
-
 #define SH4_PCICR		0x100		/* PCI Control Register */
   #define SH4_PCICR_PREFIX	  0xA5000000	/* CR prefix for write */
   #define SH4_PCICR_FTO		  0x00000400	/* TRDY/IRDY Enable */
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 9ac4626..aac452b 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += param.h
diff --git a/arch/sh/include/asm/mm-arch-hooks.h b/arch/sh/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 1808729..0000000
--- a/arch/sh/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SH_MM_ARCH_HOOKS_H
-#define _ASM_SH_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_SH_MM_ARCH_HOOKS_H */
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 0a47bd3..4ca78ed 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -355,13 +355,12 @@ static int sq_dev_add(struct device *dev, struct subsys_interface *sif)
 	return error;
 }
 
-static int sq_dev_remove(struct device *dev, struct subsys_interface *sif)
+static void sq_dev_remove(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id;
 	struct kobject *kobj = sq_kobject[cpu];
 
 	kobject_put(kobj);
-	return 0;
 }
 
 static struct subsys_interface sq_interface = {
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 2b2a69d..e9286188 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
diff --git a/arch/sparc/include/asm/mm-arch-hooks.h b/arch/sparc/include/asm/mm-arch-hooks.h
deleted file mode 100644
index b89ba44..0000000
--- a/arch/sparc/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_SPARC_MM_ARCH_HOOKS_H
-#define _ASM_SPARC_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_SPARC_MM_ARCH_HOOKS_H */
diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h
index 1f0aa20..6424249 100644
--- a/arch/sparc/include/asm/visasm.h
+++ b/arch/sparc/include/asm/visasm.h
@@ -28,16 +28,10 @@
  * Must preserve %o5 between VISEntryHalf and VISExitHalf */
 
 #define VISEntryHalf					\
-	rd		%fprs, %o5;			\
-	andcc		%o5, FPRS_FEF, %g0;		\
-	be,pt		%icc, 297f;			\
-	 sethi		%hi(298f), %g7;			\
-	sethi		%hi(VISenterhalf), %g1;		\
-	jmpl		%g1 + %lo(VISenterhalf), %g0;	\
-	 or		%g7, %lo(298f), %g7;		\
-	clr		%o5;				\
-297:	wr		%o5, FPRS_FEF, %fprs;		\
-298:
+	VISEntry
+
+#define VISExitHalf					\
+	VISExit
 
 #define VISEntryHalfFast(fail_label)			\
 	rd		%fprs, %o5;			\
@@ -47,7 +41,7 @@
 	ba,a,pt		%xcc, fail_label;		\
 297:	wr		%o5, FPRS_FEF, %fprs;
 
-#define VISExitHalf					\
+#define VISExitHalfFast					\
 	wr		%o5, 0, %fprs;
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index c928bc6..3a0e1a9 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -249,7 +249,6 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 					 struct pci_bus *bus, int devfn)
 {
 	struct dev_archdata *sd;
-	struct pci_slot *slot;
 	struct platform_device *op;
 	struct pci_dev *dev;
 	const char *type;
@@ -290,10 +289,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	dev->multifunction = 0;		/* maybe a lie? */
 	set_pcie_port_type(dev);
 
-	list_for_each_entry(slot, &dev->bus->slots, list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
-			dev->slot = slot;
-
+	pci_dev_assign_slot(dev);
 	dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff);
 	dev->device = of_getintprop_default(node, "device-id", 0xffff);
 	dev->subsystem_vendor =
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 140527a..83aeeb1 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -240,8 +240,11 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	add		%o0, 0x40, %o0
 	bne,pt		%icc, 1b
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
+#ifdef NON_USER_COPY
+	VISExitHalfFast
+#else
 	VISExitHalf
-
+#endif
 	brz,pn		%o2, .Lexit
 	 cmp		%o2, 19
 	ble,pn		%icc, .Lsmall_unaligned
diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S
index b320ae9..a063d84 100644
--- a/arch/sparc/lib/VISsave.S
+++ b/arch/sparc/lib/VISsave.S
@@ -44,9 +44,8 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 
 	 stx		%g3, [%g6 + TI_GSR]
 2:	add		%g6, %g1, %g3
-	cmp		%o5, FPRS_DU
-	be,pn		%icc, 6f
-	 sll		%g1, 3, %g1
+	mov		FPRS_DU | FPRS_DL | FPRS_FEF, %o5
+	sll		%g1, 3, %g1
 	stb		%o5, [%g3 + TI_FPSAVED]
 	rd		%gsr, %g2
 	add		%g6, %g1, %g3
@@ -80,65 +79,3 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	.align		32
 80:	jmpl		%g7 + %g0, %g0
 	 nop
-
-6:	ldub		[%g3 + TI_FPSAVED], %o5
-	or		%o5, FPRS_DU, %o5
-	add		%g6, TI_FPREGS+0x80, %g2
-	stb		%o5, [%g3 + TI_FPSAVED]
-
-	sll		%g1, 5, %g1
-	add		%g6, TI_FPREGS+0xc0, %g3
-	wr		%g0, FPRS_FEF, %fprs
-	membar		#Sync
-	stda		%f32, [%g2 + %g1] ASI_BLK_P
-	stda		%f48, [%g3 + %g1] ASI_BLK_P
-	membar		#Sync
-	ba,pt		%xcc, 80f
-	 nop
-
-	.align		32
-80:	jmpl		%g7 + %g0, %g0
-	 nop
-
-	.align		32
-VISenterhalf:
-	ldub		[%g6 + TI_FPDEPTH], %g1
-	brnz,a,pn	%g1, 1f
-	 cmp		%g1, 1
-	stb		%g0, [%g6 + TI_FPSAVED]
-	stx		%fsr, [%g6 + TI_XFSR]
-	clr		%o5
-	jmpl		%g7 + %g0, %g0
-	 wr		%g0, FPRS_FEF, %fprs
-
-1:	bne,pn		%icc, 2f
-	 srl		%g1, 1, %g1
-	ba,pt		%xcc, vis1
-	 sub		%g7, 8, %g7
-2:	addcc		%g6, %g1, %g3
-	sll		%g1, 3, %g1
-	andn		%o5, FPRS_DU, %g2
-	stb		%g2, [%g3 + TI_FPSAVED]
-
-	rd		%gsr, %g2
-	add		%g6, %g1, %g3
-	stx		%g2, [%g3 + TI_GSR]
-	add		%g6, %g1, %g2
-	stx		%fsr, [%g2 + TI_XFSR]
-	sll		%g1, 5, %g1
-3:	andcc		%o5, FPRS_DL, %g0
-	be,pn		%icc, 4f
-	 add		%g6, TI_FPREGS, %g2
-
-	add		%g6, TI_FPREGS+0x40, %g3
-	membar		#Sync
-	stda		%f0, [%g2 + %g1] ASI_BLK_P
-	stda		%f16, [%g3 + %g1] ASI_BLK_P
-	membar		#Sync
-	ba,pt		%xcc, 4f
-	 nop
-
-	.align		32
-4:	and		%o5, FPRS_DU, %o5
-	jmpl		%g7 + %g0, %g0
-	 wr		%o5, FPRS_FEF, %fprs
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 1d649a9..8069ce1 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -135,10 +135,6 @@ EXPORT_SYMBOL(copy_user_page);
 void VISenter(void);
 EXPORT_SYMBOL(VISenter);
 
-/* CRYPTO code needs this */
-void VISenterhalf(void);
-EXPORT_SYMBOL(VISenterhalf);
-
 extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
 extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
 		unsigned long *);
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index d536544..d8a8431 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_regs.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += param.h
diff --git a/arch/tile/include/asm/mm-arch-hooks.h b/arch/tile/include/asm/mm-arch-hooks.h
deleted file mode 100644
index d1709ea..0000000
--- a/arch/tile/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_TILE_MM_ARCH_HOOKS_H
-#define _ASM_TILE_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_TILE_MM_ARCH_HOOKS_H */
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index e8c2c04..c667e10 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
 	if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
 		return -EFAULT;
 
-	memset(to, 0, sizeof(*to));
-
 	err = __get_user(to->si_signo, &from->si_signo);
 	err |= __get_user(to->si_errno, &from->si_errno);
 	err |= __get_user(to->si_code, &from->si_code);
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 99c9ff8..6b755d1 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1139,7 +1139,7 @@ static void __init load_hv_initrd(void)
 
 void __init free_initrd_mem(unsigned long begin, unsigned long end)
 {
-	free_bootmem(__pa(begin), end - begin);
+	free_bootmem_late(__pa(begin), end - begin);
 }
 
 static int __init setup_initrd(char *str)
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
index a3ed12f..825867c 100644
--- a/arch/tile/kernel/sysfs.c
+++ b/arch/tile/kernel/sysfs.c
@@ -198,16 +198,13 @@ static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif)
 	return err;
 }
 
-static int hv_stats_device_remove(struct device *dev,
-				  struct subsys_interface *sif)
+static void hv_stats_device_remove(struct device *dev,
+				   struct subsys_interface *sif)
 {
 	int cpu = dev->id;
 
-	if (!cpu_online(cpu))
-		return 0;
-
-	sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr);
-	return 0;
+	if (cpu_online(cpu))
+		sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr);
 }
 
 
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 88c7016..97bbb60 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -28,7 +28,7 @@
 #define _ST(p, inst, v)						\
 	({							\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
@@ -41,7 +41,7 @@
 	({							\
 		unsigned long __v;				\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 3d63ff6..149ec55 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mutex.h
 generic-y += param.h
 generic-y += pci.h
diff --git a/arch/um/include/asm/mm-arch-hooks.h b/arch/um/include/asm/mm-arch-hooks.h
deleted file mode 100644
index a7c8b0d..0000000
--- a/arch/um/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_UM_MM_ARCH_HOOKS_H
-#define _ASM_UM_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_UM_MM_ARCH_HOOKS_H */
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index d12b377..1fc7a28 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -26,6 +26,7 @@ generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
 generic-y += msgbuf.h
diff --git a/arch/unicore32/include/asm/mm-arch-hooks.h b/arch/unicore32/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 4d79a85..0000000
--- a/arch/unicore32/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_UNICORE32_MM_ARCH_HOOKS_H
-#define _ASM_UNICORE32_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_UNICORE32_MM_ARCH_HOOKS_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 55bced1..06dbb5d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -41,6 +41,7 @@ config X86
 	select ARCH_USE_CMPXCHG_LOCKREF		if X86_64
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -254,6 +255,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0xdffffc0000000000
+
 config HAVE_INTEL_TXT
 	def_bool y
 	depends on INTEL_IOMMU && ACPI
@@ -949,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 
 config X86_MCE
 	bool "Machine Check / overheating reporting"
+	select GENERIC_ALLOCATOR
 	default y
 	---help---
 	  Machine Check support allows the processor to notify the
@@ -2015,7 +2022,7 @@ config CMDLINE_BOOL
 
 	  To compile command line arguments into the kernel,
 	  set this option to 'Y', then fill in the
-	  the boot arguments in CONFIG_CMDLINE.
+	  boot arguments in CONFIG_CMDLINE.
 
 	  Systems with fully functional boot loaders (i.e. non-embedded)
 	  should leave this option set to 'N'.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index a15893d..d8c0d32 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -297,6 +297,18 @@ config OPTIMIZE_INLINING
 
 	  If unsure, say N.
 
+config DEBUG_ENTRY
+	bool "Debug low-level entry code"
+	depends on DEBUG_KERNEL
+	---help---
+	  This option enables sanity checks in x86's low-level entry code.
+	  Some of these sanity checks may slow down kernel entries and
+	  exits or otherwise impact performance.
+
+	  This is currently used to help test NMI code.
+
+	  If unsure, say N.
+
 config DEBUG_NMI_SELFTEST
 	bool "NMI Selftest"
 	depends on DEBUG_KERNEL && X86_LOCAL_APIC
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 118e6de..0f38418 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -212,6 +212,8 @@ drivers-$(CONFIG_PM) += arch/x86/power/
 
 drivers-$(CONFIG_FB) += arch/x86/video/
 
+drivers-$(CONFIG_RAS) += arch/x86/ras/
+
 ####
 # boot loader support. Several targets are kept for legacy purposes
 
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 2c82bd1..7d69afd 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1193,6 +1193,10 @@ static efi_status_t setup_e820(struct boot_params *params,
 		unsigned int e820_type = 0;
 		unsigned long m = efi->efi_memmap;
 
+#ifdef CONFIG_X86_64
+		m |= (u64)efi->efi_memmap_hi << 32;
+#endif
+
 		d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size));
 		switch (d->type) {
 		case EFI_RESERVED_TYPE:
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5a4a089..9a2838c 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
+obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -30,6 +31,7 @@ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
+obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -60,6 +62,7 @@ blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
+chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
 ifeq ($(avx_supported),yes)
@@ -75,6 +78,7 @@ endif
 
 ifeq ($(avx2_supported),yes)
 	camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
+	chacha20-x86_64-y += chacha20-avx2-x86_64.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 endif
 
@@ -82,8 +86,10 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
 ifeq ($(avx2_supported),yes)
 sha1-ssse3-y += sha1_avx2_x86_64_asm.o
+poly1305-x86_64-y += poly1305-avx2-x86_64.o
 endif
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index dccad38..3633ad6 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -803,10 +803,7 @@ static int rfc4106_init(struct crypto_aead *aead)
 		return PTR_ERR(cryptd_tfm);
 
 	*ctx = cryptd_tfm;
-	crypto_aead_set_reqsize(
-		aead,
-		sizeof(struct aead_request) +
-		crypto_aead_reqsize(&cryptd_tfm->base));
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
 	return 0;
 }
 
@@ -955,8 +952,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 
 	/* Assuming we are supporting rfc4106 64-bit extended */
 	/* sequence numbers We need to have the AAD length equal */
-	/* to 8 or 12 bytes */
-	if (unlikely(req->assoclen != 8 && req->assoclen != 12))
+	/* to 16 or 20 bytes */
+	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
 		return -EINVAL;
 
 	/* IV below built */
@@ -992,9 +989,9 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 	}
 
 	kernel_fpu_begin();
-	aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
-		ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
-		+ ((unsigned long)req->cryptlen), auth_tag_len);
+	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
+			  ctx->hash_subkey, assoc, req->assoclen - 8,
+			  dst + req->cryptlen, auth_tag_len);
 	kernel_fpu_end();
 
 	/* The authTag (aka the Integrity Check Value) needs to be written
@@ -1033,12 +1030,12 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 	struct scatter_walk dst_sg_walk;
 	unsigned int i;
 
-	if (unlikely(req->assoclen != 8 && req->assoclen != 12))
+	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
 		return -EINVAL;
 
 	/* Assuming we are supporting rfc4106 64-bit extended */
 	/* sequence numbers We need to have the AAD length */
-	/* equal to 8 or 12 bytes */
+	/* equal to 16 or 20 bytes */
 
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
 	/* IV below built */
@@ -1075,8 +1072,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 
 	kernel_fpu_begin();
 	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
-		ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
-		authTag, auth_tag_len);
+			  ctx->hash_subkey, assoc, req->assoclen - 8,
+			  authTag, auth_tag_len);
 	kernel_fpu_end();
 
 	/* Compare generated tag with passed in tag. */
@@ -1105,19 +1102,12 @@ static int rfc4106_encrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
 	struct cryptd_aead *cryptd_tfm = *ctx;
-	struct aead_request *subreq = aead_request_ctx(req);
 
-	aead_request_set_tfm(subreq, irq_fpu_usable() ?
-				     cryptd_aead_child(cryptd_tfm) :
-				     &cryptd_tfm->base);
+	aead_request_set_tfm(req, irq_fpu_usable() ?
+				  cryptd_aead_child(cryptd_tfm) :
+				  &cryptd_tfm->base);
 
-	aead_request_set_callback(subreq, req->base.flags,
-				  req->base.complete, req->base.data);
-	aead_request_set_crypt(subreq, req->src, req->dst,
-			       req->cryptlen, req->iv);
-	aead_request_set_ad(subreq, req->assoclen);
-
-	return crypto_aead_encrypt(subreq);
+	return crypto_aead_encrypt(req);
 }
 
 static int rfc4106_decrypt(struct aead_request *req)
@@ -1125,19 +1115,12 @@ static int rfc4106_decrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
 	struct cryptd_aead *cryptd_tfm = *ctx;
-	struct aead_request *subreq = aead_request_ctx(req);
-
-	aead_request_set_tfm(subreq, irq_fpu_usable() ?
-				     cryptd_aead_child(cryptd_tfm) :
-				     &cryptd_tfm->base);
 
-	aead_request_set_callback(subreq, req->base.flags,
-				  req->base.complete, req->base.data);
-	aead_request_set_crypt(subreq, req->src, req->dst,
-			       req->cryptlen, req->iv);
-	aead_request_set_ad(subreq, req->assoclen);
+	aead_request_set_tfm(req, irq_fpu_usable() ?
+				  cryptd_aead_child(cryptd_tfm) :
+				  &cryptd_tfm->base);
 
-	return crypto_aead_decrypt(subreq);
+	return crypto_aead_decrypt(req);
 }
 #endif
 
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
new file mode 100644
index 0000000..16694e6
--- /dev/null
+++ b/arch/x86/crypto/chacha20-avx2-x86_64.S
@@ -0,0 +1,443 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 32
+
+ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
+	.octa 0x0e0d0c0f0a09080b0605040702010003
+ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
+	.octa 0x0d0c0f0e09080b0a0504070601000302
+CTRINC:	.octa 0x00000003000000020000000100000000
+	.octa 0x00000007000000060000000500000004
+
+.text
+
+ENTRY(chacha20_8block_xor_avx2)
+	# %rdi: Input state matrix, s
+	# %rsi: 8 data blocks output, o
+	# %rdx: 8 data blocks input, i
+
+	# This function encrypts eight consecutive ChaCha20 blocks by loading
+	# the state matrix in AVX registers eight times. As we need some
+	# scratch registers, we save the first four registers on the stack. The
+	# algorithm performs each operation on the corresponding word of each
+	# state matrix, hence requires no word shuffling. For final XORing step
+	# we transpose the matrix by interleaving 32-, 64- and then 128-bit
+	# words, which allows us to do XOR in AVX registers. 8/16-bit word
+	# rotation is done with the slightly better performing byte shuffling,
+	# 7/12-bit word rotation uses traditional shift+OR.
+
+	vzeroupper
+	# 4 * 32 byte stack, 32-byte aligned
+	mov		%rsp, %r8
+	and		$~31, %rsp
+	sub		$0x80, %rsp
+
+	# x0..15[0-7] = s[0..15]
+	vpbroadcastd	0x00(%rdi),%ymm0
+	vpbroadcastd	0x04(%rdi),%ymm1
+	vpbroadcastd	0x08(%rdi),%ymm2
+	vpbroadcastd	0x0c(%rdi),%ymm3
+	vpbroadcastd	0x10(%rdi),%ymm4
+	vpbroadcastd	0x14(%rdi),%ymm5
+	vpbroadcastd	0x18(%rdi),%ymm6
+	vpbroadcastd	0x1c(%rdi),%ymm7
+	vpbroadcastd	0x20(%rdi),%ymm8
+	vpbroadcastd	0x24(%rdi),%ymm9
+	vpbroadcastd	0x28(%rdi),%ymm10
+	vpbroadcastd	0x2c(%rdi),%ymm11
+	vpbroadcastd	0x30(%rdi),%ymm12
+	vpbroadcastd	0x34(%rdi),%ymm13
+	vpbroadcastd	0x38(%rdi),%ymm14
+	vpbroadcastd	0x3c(%rdi),%ymm15
+	# x0..3 on stack
+	vmovdqa		%ymm0,0x00(%rsp)
+	vmovdqa		%ymm1,0x20(%rsp)
+	vmovdqa		%ymm2,0x40(%rsp)
+	vmovdqa		%ymm3,0x60(%rsp)
+
+	vmovdqa		CTRINC(%rip),%ymm1
+	vmovdqa		ROT8(%rip),%ymm2
+	vmovdqa		ROT16(%rip),%ymm3
+
+	# x12 += counter values 0-3
+	vpaddd		%ymm1,%ymm12,%ymm12
+
+	mov		$10,%ecx
+
+.Ldoubleround8:
+	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+	vpaddd		0x00(%rsp),%ymm4,%ymm0
+	vmovdqa		%ymm0,0x00(%rsp)
+	vpxor		%ymm0,%ymm12,%ymm12
+	vpshufb		%ymm3,%ymm12,%ymm12
+	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+	vpaddd		0x20(%rsp),%ymm5,%ymm0
+	vmovdqa		%ymm0,0x20(%rsp)
+	vpxor		%ymm0,%ymm13,%ymm13
+	vpshufb		%ymm3,%ymm13,%ymm13
+	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+	vpaddd		0x40(%rsp),%ymm6,%ymm0
+	vmovdqa		%ymm0,0x40(%rsp)
+	vpxor		%ymm0,%ymm14,%ymm14
+	vpshufb		%ymm3,%ymm14,%ymm14
+	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+	vpaddd		0x60(%rsp),%ymm7,%ymm0
+	vmovdqa		%ymm0,0x60(%rsp)
+	vpxor		%ymm0,%ymm15,%ymm15
+	vpshufb		%ymm3,%ymm15,%ymm15
+
+	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+	vpaddd		%ymm12,%ymm8,%ymm8
+	vpxor		%ymm8,%ymm4,%ymm4
+	vpslld		$12,%ymm4,%ymm0
+	vpsrld		$20,%ymm4,%ymm4
+	vpor		%ymm0,%ymm4,%ymm4
+	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+	vpaddd		%ymm13,%ymm9,%ymm9
+	vpxor		%ymm9,%ymm5,%ymm5
+	vpslld		$12,%ymm5,%ymm0
+	vpsrld		$20,%ymm5,%ymm5
+	vpor		%ymm0,%ymm5,%ymm5
+	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+	vpaddd		%ymm14,%ymm10,%ymm10
+	vpxor		%ymm10,%ymm6,%ymm6
+	vpslld		$12,%ymm6,%ymm0
+	vpsrld		$20,%ymm6,%ymm6
+	vpor		%ymm0,%ymm6,%ymm6
+	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+	vpaddd		%ymm15,%ymm11,%ymm11
+	vpxor		%ymm11,%ymm7,%ymm7
+	vpslld		$12,%ymm7,%ymm0
+	vpsrld		$20,%ymm7,%ymm7
+	vpor		%ymm0,%ymm7,%ymm7
+
+	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+	vpaddd		0x00(%rsp),%ymm4,%ymm0
+	vmovdqa		%ymm0,0x00(%rsp)
+	vpxor		%ymm0,%ymm12,%ymm12
+	vpshufb		%ymm2,%ymm12,%ymm12
+	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+	vpaddd		0x20(%rsp),%ymm5,%ymm0
+	vmovdqa		%ymm0,0x20(%rsp)
+	vpxor		%ymm0,%ymm13,%ymm13
+	vpshufb		%ymm2,%ymm13,%ymm13
+	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+	vpaddd		0x40(%rsp),%ymm6,%ymm0
+	vmovdqa		%ymm0,0x40(%rsp)
+	vpxor		%ymm0,%ymm14,%ymm14
+	vpshufb		%ymm2,%ymm14,%ymm14
+	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+	vpaddd		0x60(%rsp),%ymm7,%ymm0
+	vmovdqa		%ymm0,0x60(%rsp)
+	vpxor		%ymm0,%ymm15,%ymm15
+	vpshufb		%ymm2,%ymm15,%ymm15
+
+	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+	vpaddd		%ymm12,%ymm8,%ymm8
+	vpxor		%ymm8,%ymm4,%ymm4
+	vpslld		$7,%ymm4,%ymm0
+	vpsrld		$25,%ymm4,%ymm4
+	vpor		%ymm0,%ymm4,%ymm4
+	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+	vpaddd		%ymm13,%ymm9,%ymm9
+	vpxor		%ymm9,%ymm5,%ymm5
+	vpslld		$7,%ymm5,%ymm0
+	vpsrld		$25,%ymm5,%ymm5
+	vpor		%ymm0,%ymm5,%ymm5
+	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+	vpaddd		%ymm14,%ymm10,%ymm10
+	vpxor		%ymm10,%ymm6,%ymm6
+	vpslld		$7,%ymm6,%ymm0
+	vpsrld		$25,%ymm6,%ymm6
+	vpor		%ymm0,%ymm6,%ymm6
+	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+	vpaddd		%ymm15,%ymm11,%ymm11
+	vpxor		%ymm11,%ymm7,%ymm7
+	vpslld		$7,%ymm7,%ymm0
+	vpsrld		$25,%ymm7,%ymm7
+	vpor		%ymm0,%ymm7,%ymm7
+
+	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+	vpaddd		0x00(%rsp),%ymm5,%ymm0
+	vmovdqa		%ymm0,0x00(%rsp)
+	vpxor		%ymm0,%ymm15,%ymm15
+	vpshufb		%ymm3,%ymm15,%ymm15
+	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
+	vpaddd		0x20(%rsp),%ymm6,%ymm0
+	vmovdqa		%ymm0,0x20(%rsp)
+	vpxor		%ymm0,%ymm12,%ymm12
+	vpshufb		%ymm3,%ymm12,%ymm12
+	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+	vpaddd		0x40(%rsp),%ymm7,%ymm0
+	vmovdqa		%ymm0,0x40(%rsp)
+	vpxor		%ymm0,%ymm13,%ymm13
+	vpshufb		%ymm3,%ymm13,%ymm13
+	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+	vpaddd		0x60(%rsp),%ymm4,%ymm0
+	vmovdqa		%ymm0,0x60(%rsp)
+	vpxor		%ymm0,%ymm14,%ymm14
+	vpshufb		%ymm3,%ymm14,%ymm14
+
+	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+	vpaddd		%ymm15,%ymm10,%ymm10
+	vpxor		%ymm10,%ymm5,%ymm5
+	vpslld		$12,%ymm5,%ymm0
+	vpsrld		$20,%ymm5,%ymm5
+	vpor		%ymm0,%ymm5,%ymm5
+	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+	vpaddd		%ymm12,%ymm11,%ymm11
+	vpxor		%ymm11,%ymm6,%ymm6
+	vpslld		$12,%ymm6,%ymm0
+	vpsrld		$20,%ymm6,%ymm6
+	vpor		%ymm0,%ymm6,%ymm6
+	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+	vpaddd		%ymm13,%ymm8,%ymm8
+	vpxor		%ymm8,%ymm7,%ymm7
+	vpslld		$12,%ymm7,%ymm0
+	vpsrld		$20,%ymm7,%ymm7
+	vpor		%ymm0,%ymm7,%ymm7
+	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+	vpaddd		%ymm14,%ymm9,%ymm9
+	vpxor		%ymm9,%ymm4,%ymm4
+	vpslld		$12,%ymm4,%ymm0
+	vpsrld		$20,%ymm4,%ymm4
+	vpor		%ymm0,%ymm4,%ymm4
+
+	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+	vpaddd		0x00(%rsp),%ymm5,%ymm0
+	vmovdqa		%ymm0,0x00(%rsp)
+	vpxor		%ymm0,%ymm15,%ymm15
+	vpshufb		%ymm2,%ymm15,%ymm15
+	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+	vpaddd		0x20(%rsp),%ymm6,%ymm0
+	vmovdqa		%ymm0,0x20(%rsp)
+	vpxor		%ymm0,%ymm12,%ymm12
+	vpshufb		%ymm2,%ymm12,%ymm12
+	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+	vpaddd		0x40(%rsp),%ymm7,%ymm0
+	vmovdqa		%ymm0,0x40(%rsp)
+	vpxor		%ymm0,%ymm13,%ymm13
+	vpshufb		%ymm2,%ymm13,%ymm13
+	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+	vpaddd		0x60(%rsp),%ymm4,%ymm0
+	vmovdqa		%ymm0,0x60(%rsp)
+	vpxor		%ymm0,%ymm14,%ymm14
+	vpshufb		%ymm2,%ymm14,%ymm14
+
+	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+	vpaddd		%ymm15,%ymm10,%ymm10
+	vpxor		%ymm10,%ymm5,%ymm5
+	vpslld		$7,%ymm5,%ymm0
+	vpsrld		$25,%ymm5,%ymm5
+	vpor		%ymm0,%ymm5,%ymm5
+	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+	vpaddd		%ymm12,%ymm11,%ymm11
+	vpxor		%ymm11,%ymm6,%ymm6
+	vpslld		$7,%ymm6,%ymm0
+	vpsrld		$25,%ymm6,%ymm6
+	vpor		%ymm0,%ymm6,%ymm6
+	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+	vpaddd		%ymm13,%ymm8,%ymm8
+	vpxor		%ymm8,%ymm7,%ymm7
+	vpslld		$7,%ymm7,%ymm0
+	vpsrld		$25,%ymm7,%ymm7
+	vpor		%ymm0,%ymm7,%ymm7
+	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+	vpaddd		%ymm14,%ymm9,%ymm9
+	vpxor		%ymm9,%ymm4,%ymm4
+	vpslld		$7,%ymm4,%ymm0
+	vpsrld		$25,%ymm4,%ymm4
+	vpor		%ymm0,%ymm4,%ymm4
+
+	dec		%ecx
+	jnz		.Ldoubleround8
+
+	# x0..15[0-3] += s[0..15]
+	vpbroadcastd	0x00(%rdi),%ymm0
+	vpaddd		0x00(%rsp),%ymm0,%ymm0
+	vmovdqa		%ymm0,0x00(%rsp)
+	vpbroadcastd	0x04(%rdi),%ymm0
+	vpaddd		0x20(%rsp),%ymm0,%ymm0
+	vmovdqa		%ymm0,0x20(%rsp)
+	vpbroadcastd	0x08(%rdi),%ymm0
+	vpaddd		0x40(%rsp),%ymm0,%ymm0
+	vmovdqa		%ymm0,0x40(%rsp)
+	vpbroadcastd	0x0c(%rdi),%ymm0
+	vpaddd		0x60(%rsp),%ymm0,%ymm0
+	vmovdqa		%ymm0,0x60(%rsp)
+	vpbroadcastd	0x10(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm4,%ymm4
+	vpbroadcastd	0x14(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm5,%ymm5
+	vpbroadcastd	0x18(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm6,%ymm6
+	vpbroadcastd	0x1c(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm7,%ymm7
+	vpbroadcastd	0x20(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm8,%ymm8
+	vpbroadcastd	0x24(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm9,%ymm9
+	vpbroadcastd	0x28(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm10,%ymm10
+	vpbroadcastd	0x2c(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm11,%ymm11
+	vpbroadcastd	0x30(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm12,%ymm12
+	vpbroadcastd	0x34(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm13,%ymm13
+	vpbroadcastd	0x38(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm14,%ymm14
+	vpbroadcastd	0x3c(%rdi),%ymm0
+	vpaddd		%ymm0,%ymm15,%ymm15
+
+	# x12 += counter values 0-3
+	vpaddd		%ymm1,%ymm12,%ymm12
+
+	# interleave 32-bit words in state n, n+1
+	vmovdqa		0x00(%rsp),%ymm0
+	vmovdqa		0x20(%rsp),%ymm1
+	vpunpckldq	%ymm1,%ymm0,%ymm2
+	vpunpckhdq	%ymm1,%ymm0,%ymm1
+	vmovdqa		%ymm2,0x00(%rsp)
+	vmovdqa		%ymm1,0x20(%rsp)
+	vmovdqa		0x40(%rsp),%ymm0
+	vmovdqa		0x60(%rsp),%ymm1
+	vpunpckldq	%ymm1,%ymm0,%ymm2
+	vpunpckhdq	%ymm1,%ymm0,%ymm1
+	vmovdqa		%ymm2,0x40(%rsp)
+	vmovdqa		%ymm1,0x60(%rsp)
+	vmovdqa		%ymm4,%ymm0
+	vpunpckldq	%ymm5,%ymm0,%ymm4
+	vpunpckhdq	%ymm5,%ymm0,%ymm5
+	vmovdqa		%ymm6,%ymm0
+	vpunpckldq	%ymm7,%ymm0,%ymm6
+	vpunpckhdq	%ymm7,%ymm0,%ymm7
+	vmovdqa		%ymm8,%ymm0
+	vpunpckldq	%ymm9,%ymm0,%ymm8
+	vpunpckhdq	%ymm9,%ymm0,%ymm9
+	vmovdqa		%ymm10,%ymm0
+	vpunpckldq	%ymm11,%ymm0,%ymm10
+	vpunpckhdq	%ymm11,%ymm0,%ymm11
+	vmovdqa		%ymm12,%ymm0
+	vpunpckldq	%ymm13,%ymm0,%ymm12
+	vpunpckhdq	%ymm13,%ymm0,%ymm13
+	vmovdqa		%ymm14,%ymm0
+	vpunpckldq	%ymm15,%ymm0,%ymm14
+	vpunpckhdq	%ymm15,%ymm0,%ymm15
+
+	# interleave 64-bit words in state n, n+2
+	vmovdqa		0x00(%rsp),%ymm0
+	vmovdqa		0x40(%rsp),%ymm2
+	vpunpcklqdq	%ymm2,%ymm0,%ymm1
+	vpunpckhqdq	%ymm2,%ymm0,%ymm2
+	vmovdqa		%ymm1,0x00(%rsp)
+	vmovdqa		%ymm2,0x40(%rsp)
+	vmovdqa		0x20(%rsp),%ymm0
+	vmovdqa		0x60(%rsp),%ymm2
+	vpunpcklqdq	%ymm2,%ymm0,%ymm1
+	vpunpckhqdq	%ymm2,%ymm0,%ymm2
+	vmovdqa		%ymm1,0x20(%rsp)
+	vmovdqa		%ymm2,0x60(%rsp)
+	vmovdqa		%ymm4,%ymm0
+	vpunpcklqdq	%ymm6,%ymm0,%ymm4
+	vpunpckhqdq	%ymm6,%ymm0,%ymm6
+	vmovdqa		%ymm5,%ymm0
+	vpunpcklqdq	%ymm7,%ymm0,%ymm5
+	vpunpckhqdq	%ymm7,%ymm0,%ymm7
+	vmovdqa		%ymm8,%ymm0
+	vpunpcklqdq	%ymm10,%ymm0,%ymm8
+	vpunpckhqdq	%ymm10,%ymm0,%ymm10
+	vmovdqa		%ymm9,%ymm0
+	vpunpcklqdq	%ymm11,%ymm0,%ymm9
+	vpunpckhqdq	%ymm11,%ymm0,%ymm11
+	vmovdqa		%ymm12,%ymm0
+	vpunpcklqdq	%ymm14,%ymm0,%ymm12
+	vpunpckhqdq	%ymm14,%ymm0,%ymm14
+	vmovdqa		%ymm13,%ymm0
+	vpunpcklqdq	%ymm15,%ymm0,%ymm13
+	vpunpckhqdq	%ymm15,%ymm0,%ymm15
+
+	# interleave 128-bit words in state n, n+4
+	vmovdqa		0x00(%rsp),%ymm0
+	vperm2i128	$0x20,%ymm4,%ymm0,%ymm1
+	vperm2i128	$0x31,%ymm4,%ymm0,%ymm4
+	vmovdqa		%ymm1,0x00(%rsp)
+	vmovdqa		0x20(%rsp),%ymm0
+	vperm2i128	$0x20,%ymm5,%ymm0,%ymm1
+	vperm2i128	$0x31,%ymm5,%ymm0,%ymm5
+	vmovdqa		%ymm1,0x20(%rsp)
+	vmovdqa		0x40(%rsp),%ymm0
+	vperm2i128	$0x20,%ymm6,%ymm0,%ymm1
+	vperm2i128	$0x31,%ymm6,%ymm0,%ymm6
+	vmovdqa		%ymm1,0x40(%rsp)
+	vmovdqa		0x60(%rsp),%ymm0
+	vperm2i128	$0x20,%ymm7,%ymm0,%ymm1
+	vperm2i128	$0x31,%ymm7,%ymm0,%ymm7
+	vmovdqa		%ymm1,0x60(%rsp)
+	vperm2i128	$0x20,%ymm12,%ymm8,%ymm0
+	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
+	vmovdqa		%ymm0,%ymm8
+	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
+	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
+	vmovdqa		%ymm0,%ymm9
+	vperm2i128	$0x20,%ymm14,%ymm10,%ymm0
+	vperm2i128	$0x31,%ymm14,%ymm10,%ymm14
+	vmovdqa		%ymm0,%ymm10
+	vperm2i128	$0x20,%ymm15,%ymm11,%ymm0
+	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
+	vmovdqa		%ymm0,%ymm11
+
+	# xor with corresponding input, write to output
+	vmovdqa		0x00(%rsp),%ymm0
+	vpxor		0x0000(%rdx),%ymm0,%ymm0
+	vmovdqu		%ymm0,0x0000(%rsi)
+	vmovdqa		0x20(%rsp),%ymm0
+	vpxor		0x0080(%rdx),%ymm0,%ymm0
+	vmovdqu		%ymm0,0x0080(%rsi)
+	vmovdqa		0x40(%rsp),%ymm0
+	vpxor		0x0040(%rdx),%ymm0,%ymm0
+	vmovdqu		%ymm0,0x0040(%rsi)
+	vmovdqa		0x60(%rsp),%ymm0
+	vpxor		0x00c0(%rdx),%ymm0,%ymm0
+	vmovdqu		%ymm0,0x00c0(%rsi)
+	vpxor		0x0100(%rdx),%ymm4,%ymm4
+	vmovdqu		%ymm4,0x0100(%rsi)
+	vpxor		0x0180(%rdx),%ymm5,%ymm5
+	vmovdqu		%ymm5,0x00180(%rsi)
+	vpxor		0x0140(%rdx),%ymm6,%ymm6
+	vmovdqu		%ymm6,0x0140(%rsi)
+	vpxor		0x01c0(%rdx),%ymm7,%ymm7
+	vmovdqu		%ymm7,0x01c0(%rsi)
+	vpxor		0x0020(%rdx),%ymm8,%ymm8
+	vmovdqu		%ymm8,0x0020(%rsi)
+	vpxor		0x00a0(%rdx),%ymm9,%ymm9
+	vmovdqu		%ymm9,0x00a0(%rsi)
+	vpxor		0x0060(%rdx),%ymm10,%ymm10
+	vmovdqu		%ymm10,0x0060(%rsi)
+	vpxor		0x00e0(%rdx),%ymm11,%ymm11
+	vmovdqu		%ymm11,0x00e0(%rsi)
+	vpxor		0x0120(%rdx),%ymm12,%ymm12
+	vmovdqu		%ymm12,0x0120(%rsi)
+	vpxor		0x01a0(%rdx),%ymm13,%ymm13
+	vmovdqu		%ymm13,0x01a0(%rsi)
+	vpxor		0x0160(%rdx),%ymm14,%ymm14
+	vmovdqu		%ymm14,0x0160(%rsi)
+	vpxor		0x01e0(%rdx),%ymm15,%ymm15
+	vmovdqu		%ymm15,0x01e0(%rsi)
+
+	vzeroupper
+	mov		%r8,%rsp
+	ret
+ENDPROC(chacha20_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
new file mode 100644
index 0000000..712b130
--- /dev/null
+++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S
@@ -0,0 +1,625 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 16
+
+ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
+ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
+CTRINC:	.octa 0x00000003000000020000000100000000
+
+.text
+
+ENTRY(chacha20_block_xor_ssse3)
+	# %rdi: Input state matrix, s
+	# %rsi: 1 data block output, o
+	# %rdx: 1 data block input, i
+
+	# This function encrypts one ChaCha20 block by loading the state matrix
+	# in four SSE registers. It performs matrix operation on four words in
+	# parallel, but requireds shuffling to rearrange the words after each
+	# round. 8/16-bit word rotation is done with the slightly better
+	# performing SSSE3 byte shuffling, 7/12-bit word rotation uses
+	# traditional shift+OR.
+
+	# x0..3 = s0..3
+	movdqa		0x00(%rdi),%xmm0
+	movdqa		0x10(%rdi),%xmm1
+	movdqa		0x20(%rdi),%xmm2
+	movdqa		0x30(%rdi),%xmm3
+	movdqa		%xmm0,%xmm8
+	movdqa		%xmm1,%xmm9
+	movdqa		%xmm2,%xmm10
+	movdqa		%xmm3,%xmm11
+
+	movdqa		ROT8(%rip),%xmm4
+	movdqa		ROT16(%rip),%xmm5
+
+	mov	$10,%ecx
+
+.Ldoubleround:
+
+	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm5,%xmm3
+
+	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm6
+	pslld		$12,%xmm6
+	psrld		$20,%xmm1
+	por		%xmm6,%xmm1
+
+	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm4,%xmm3
+
+	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm7
+	pslld		$7,%xmm7
+	psrld		$25,%xmm1
+	por		%xmm7,%xmm1
+
+	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+	pshufd		$0x39,%xmm1,%xmm1
+	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+	pshufd		$0x4e,%xmm2,%xmm2
+	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+	pshufd		$0x93,%xmm3,%xmm3
+
+	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm5,%xmm3
+
+	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm6
+	pslld		$12,%xmm6
+	psrld		$20,%xmm1
+	por		%xmm6,%xmm1
+
+	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm4,%xmm3
+
+	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm7
+	pslld		$7,%xmm7
+	psrld		$25,%xmm1
+	por		%xmm7,%xmm1
+
+	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+	pshufd		$0x93,%xmm1,%xmm1
+	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+	pshufd		$0x4e,%xmm2,%xmm2
+	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+	pshufd		$0x39,%xmm3,%xmm3
+
+	dec		%ecx
+	jnz		.Ldoubleround
+
+	# o0 = i0 ^ (x0 + s0)
+	movdqu		0x00(%rdx),%xmm4
+	paddd		%xmm8,%xmm0
+	pxor		%xmm4,%xmm0
+	movdqu		%xmm0,0x00(%rsi)
+	# o1 = i1 ^ (x1 + s1)
+	movdqu		0x10(%rdx),%xmm5
+	paddd		%xmm9,%xmm1
+	pxor		%xmm5,%xmm1
+	movdqu		%xmm1,0x10(%rsi)
+	# o2 = i2 ^ (x2 + s2)
+	movdqu		0x20(%rdx),%xmm6
+	paddd		%xmm10,%xmm2
+	pxor		%xmm6,%xmm2
+	movdqu		%xmm2,0x20(%rsi)
+	# o3 = i3 ^ (x3 + s3)
+	movdqu		0x30(%rdx),%xmm7
+	paddd		%xmm11,%xmm3
+	pxor		%xmm7,%xmm3
+	movdqu		%xmm3,0x30(%rsi)
+
+	ret
+ENDPROC(chacha20_block_xor_ssse3)
+
+ENTRY(chacha20_4block_xor_ssse3)
+	# %rdi: Input state matrix, s
+	# %rsi: 4 data blocks output, o
+	# %rdx: 4 data blocks input, i
+
+	# This function encrypts four consecutive ChaCha20 blocks by loading the
+	# the state matrix in SSE registers four times. As we need some scratch
+	# registers, we save the first four registers on the stack. The
+	# algorithm performs each operation on the corresponding word of each
+	# state matrix, hence requires no word shuffling. For final XORing step
+	# we transpose the matrix by interleaving 32- and then 64-bit words,
+	# which allows us to do XOR in SSE registers. 8/16-bit word rotation is
+	# done with the slightly better performing SSSE3 byte shuffling,
+	# 7/12-bit word rotation uses traditional shift+OR.
+
+	sub		$0x40,%rsp
+
+	# x0..15[0-3] = s0..3[0..3]
+	movq		0x00(%rdi),%xmm1
+	pshufd		$0x00,%xmm1,%xmm0
+	pshufd		$0x55,%xmm1,%xmm1
+	movq		0x08(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	movq		0x10(%rdi),%xmm5
+	pshufd		$0x00,%xmm5,%xmm4
+	pshufd		$0x55,%xmm5,%xmm5
+	movq		0x18(%rdi),%xmm7
+	pshufd		$0x00,%xmm7,%xmm6
+	pshufd		$0x55,%xmm7,%xmm7
+	movq		0x20(%rdi),%xmm9
+	pshufd		$0x00,%xmm9,%xmm8
+	pshufd		$0x55,%xmm9,%xmm9
+	movq		0x28(%rdi),%xmm11
+	pshufd		$0x00,%xmm11,%xmm10
+	pshufd		$0x55,%xmm11,%xmm11
+	movq		0x30(%rdi),%xmm13
+	pshufd		$0x00,%xmm13,%xmm12
+	pshufd		$0x55,%xmm13,%xmm13
+	movq		0x38(%rdi),%xmm15
+	pshufd		$0x00,%xmm15,%xmm14
+	pshufd		$0x55,%xmm15,%xmm15
+	# x0..3 on stack
+	movdqa		%xmm0,0x00(%rsp)
+	movdqa		%xmm1,0x10(%rsp)
+	movdqa		%xmm2,0x20(%rsp)
+	movdqa		%xmm3,0x30(%rsp)
+
+	movdqa		CTRINC(%rip),%xmm1
+	movdqa		ROT8(%rip),%xmm2
+	movdqa		ROT16(%rip),%xmm3
+
+	# x12 += counter values 0-3
+	paddd		%xmm1,%xmm12
+
+	mov		$10,%ecx
+
+.Ldoubleround4:
+	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+	movdqa		0x00(%rsp),%xmm0
+	paddd		%xmm4,%xmm0
+	movdqa		%xmm0,0x00(%rsp)
+	pxor		%xmm0,%xmm12
+	pshufb		%xmm3,%xmm12
+	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+	movdqa		0x10(%rsp),%xmm0
+	paddd		%xmm5,%xmm0
+	movdqa		%xmm0,0x10(%rsp)
+	pxor		%xmm0,%xmm13
+	pshufb		%xmm3,%xmm13
+	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+	movdqa		0x20(%rsp),%xmm0
+	paddd		%xmm6,%xmm0
+	movdqa		%xmm0,0x20(%rsp)
+	pxor		%xmm0,%xmm14
+	pshufb		%xmm3,%xmm14
+	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+	movdqa		0x30(%rsp),%xmm0
+	paddd		%xmm7,%xmm0
+	movdqa		%xmm0,0x30(%rsp)
+	pxor		%xmm0,%xmm15
+	pshufb		%xmm3,%xmm15
+
+	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+	paddd		%xmm12,%xmm8
+	pxor		%xmm8,%xmm4
+	movdqa		%xmm4,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm4
+	por		%xmm0,%xmm4
+	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+	paddd		%xmm13,%xmm9
+	pxor		%xmm9,%xmm5
+	movdqa		%xmm5,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm5
+	por		%xmm0,%xmm5
+	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+	paddd		%xmm14,%xmm10
+	pxor		%xmm10,%xmm6
+	movdqa		%xmm6,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm6
+	por		%xmm0,%xmm6
+	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+	paddd		%xmm15,%xmm11
+	pxor		%xmm11,%xmm7
+	movdqa		%xmm7,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm7
+	por		%xmm0,%xmm7
+
+	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+	movdqa		0x00(%rsp),%xmm0
+	paddd		%xmm4,%xmm0
+	movdqa		%xmm0,0x00(%rsp)
+	pxor		%xmm0,%xmm12
+	pshufb		%xmm2,%xmm12
+	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+	movdqa		0x10(%rsp),%xmm0
+	paddd		%xmm5,%xmm0
+	movdqa		%xmm0,0x10(%rsp)
+	pxor		%xmm0,%xmm13
+	pshufb		%xmm2,%xmm13
+	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+	movdqa		0x20(%rsp),%xmm0
+	paddd		%xmm6,%xmm0
+	movdqa		%xmm0,0x20(%rsp)
+	pxor		%xmm0,%xmm14
+	pshufb		%xmm2,%xmm14
+	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+	movdqa		0x30(%rsp),%xmm0
+	paddd		%xmm7,%xmm0
+	movdqa		%xmm0,0x30(%rsp)
+	pxor		%xmm0,%xmm15
+	pshufb		%xmm2,%xmm15
+
+	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+	paddd		%xmm12,%xmm8
+	pxor		%xmm8,%xmm4
+	movdqa		%xmm4,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm4
+	por		%xmm0,%xmm4
+	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+	paddd		%xmm13,%xmm9
+	pxor		%xmm9,%xmm5
+	movdqa		%xmm5,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm5
+	por		%xmm0,%xmm5
+	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+	paddd		%xmm14,%xmm10
+	pxor		%xmm10,%xmm6
+	movdqa		%xmm6,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm6
+	por		%xmm0,%xmm6
+	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+	paddd		%xmm15,%xmm11
+	pxor		%xmm11,%xmm7
+	movdqa		%xmm7,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm7
+	por		%xmm0,%xmm7
+
+	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+	movdqa		0x00(%rsp),%xmm0
+	paddd		%xmm5,%xmm0
+	movdqa		%xmm0,0x00(%rsp)
+	pxor		%xmm0,%xmm15
+	pshufb		%xmm3,%xmm15
+	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+	movdqa		0x10(%rsp),%xmm0
+	paddd		%xmm6,%xmm0
+	movdqa		%xmm0,0x10(%rsp)
+	pxor		%xmm0,%xmm12
+	pshufb		%xmm3,%xmm12
+	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+	movdqa		0x20(%rsp),%xmm0
+	paddd		%xmm7,%xmm0
+	movdqa		%xmm0,0x20(%rsp)
+	pxor		%xmm0,%xmm13
+	pshufb		%xmm3,%xmm13
+	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+	movdqa		0x30(%rsp),%xmm0
+	paddd		%xmm4,%xmm0
+	movdqa		%xmm0,0x30(%rsp)
+	pxor		%xmm0,%xmm14
+	pshufb		%xmm3,%xmm14
+
+	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+	paddd		%xmm15,%xmm10
+	pxor		%xmm10,%xmm5
+	movdqa		%xmm5,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm5
+	por		%xmm0,%xmm5
+	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+	paddd		%xmm12,%xmm11
+	pxor		%xmm11,%xmm6
+	movdqa		%xmm6,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm6
+	por		%xmm0,%xmm6
+	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+	paddd		%xmm13,%xmm8
+	pxor		%xmm8,%xmm7
+	movdqa		%xmm7,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm7
+	por		%xmm0,%xmm7
+	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+	paddd		%xmm14,%xmm9
+	pxor		%xmm9,%xmm4
+	movdqa		%xmm4,%xmm0
+	pslld		$12,%xmm0
+	psrld		$20,%xmm4
+	por		%xmm0,%xmm4
+
+	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+	movdqa		0x00(%rsp),%xmm0
+	paddd		%xmm5,%xmm0
+	movdqa		%xmm0,0x00(%rsp)
+	pxor		%xmm0,%xmm15
+	pshufb		%xmm2,%xmm15
+	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+	movdqa		0x10(%rsp),%xmm0
+	paddd		%xmm6,%xmm0
+	movdqa		%xmm0,0x10(%rsp)
+	pxor		%xmm0,%xmm12
+	pshufb		%xmm2,%xmm12
+	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+	movdqa		0x20(%rsp),%xmm0
+	paddd		%xmm7,%xmm0
+	movdqa		%xmm0,0x20(%rsp)
+	pxor		%xmm0,%xmm13
+	pshufb		%xmm2,%xmm13
+	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+	movdqa		0x30(%rsp),%xmm0
+	paddd		%xmm4,%xmm0
+	movdqa		%xmm0,0x30(%rsp)
+	pxor		%xmm0,%xmm14
+	pshufb		%xmm2,%xmm14
+
+	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+	paddd		%xmm15,%xmm10
+	pxor		%xmm10,%xmm5
+	movdqa		%xmm5,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm5
+	por		%xmm0,%xmm5
+	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+	paddd		%xmm12,%xmm11
+	pxor		%xmm11,%xmm6
+	movdqa		%xmm6,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm6
+	por		%xmm0,%xmm6
+	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+	paddd		%xmm13,%xmm8
+	pxor		%xmm8,%xmm7
+	movdqa		%xmm7,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm7
+	por		%xmm0,%xmm7
+	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+	paddd		%xmm14,%xmm9
+	pxor		%xmm9,%xmm4
+	movdqa		%xmm4,%xmm0
+	pslld		$7,%xmm0
+	psrld		$25,%xmm4
+	por		%xmm0,%xmm4
+
+	dec		%ecx
+	jnz		.Ldoubleround4
+
+	# x0[0-3] += s0[0]
+	# x1[0-3] += s0[1]
+	movq		0x00(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		0x00(%rsp),%xmm2
+	movdqa		%xmm2,0x00(%rsp)
+	paddd		0x10(%rsp),%xmm3
+	movdqa		%xmm3,0x10(%rsp)
+	# x2[0-3] += s0[2]
+	# x3[0-3] += s0[3]
+	movq		0x08(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		0x20(%rsp),%xmm2
+	movdqa		%xmm2,0x20(%rsp)
+	paddd		0x30(%rsp),%xmm3
+	movdqa		%xmm3,0x30(%rsp)
+
+	# x4[0-3] += s1[0]
+	# x5[0-3] += s1[1]
+	movq		0x10(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		%xmm2,%xmm4
+	paddd		%xmm3,%xmm5
+	# x6[0-3] += s1[2]
+	# x7[0-3] += s1[3]
+	movq		0x18(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		%xmm2,%xmm6
+	paddd		%xmm3,%xmm7
+
+	# x8[0-3] += s2[0]
+	# x9[0-3] += s2[1]
+	movq		0x20(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		%xmm2,%xmm8
+	paddd		%xmm3,%xmm9
+	# x10[0-3] += s2[2]
+	# x11[0-3] += s2[3]
+	movq		0x28(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		%xmm2,%xmm10
+	paddd		%xmm3,%xmm11
+
+	# x12[0-3] += s3[0]
+	# x13[0-3] += s3[1]
+	movq		0x30(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		%xmm2,%xmm12
+	paddd		%xmm3,%xmm13
+	# x14[0-3] += s3[2]
+	# x15[0-3] += s3[3]
+	movq		0x38(%rdi),%xmm3
+	pshufd		$0x00,%xmm3,%xmm2
+	pshufd		$0x55,%xmm3,%xmm3
+	paddd		%xmm2,%xmm14
+	paddd		%xmm3,%xmm15
+
+	# x12 += counter values 0-3
+	paddd		%xmm1,%xmm12
+
+	# interleave 32-bit words in state n, n+1
+	movdqa		0x00(%rsp),%xmm0
+	movdqa		0x10(%rsp),%xmm1
+	movdqa		%xmm0,%xmm2
+	punpckldq	%xmm1,%xmm2
+	punpckhdq	%xmm1,%xmm0
+	movdqa		%xmm2,0x00(%rsp)
+	movdqa		%xmm0,0x10(%rsp)
+	movdqa		0x20(%rsp),%xmm0
+	movdqa		0x30(%rsp),%xmm1
+	movdqa		%xmm0,%xmm2
+	punpckldq	%xmm1,%xmm2
+	punpckhdq	%xmm1,%xmm0
+	movdqa		%xmm2,0x20(%rsp)
+	movdqa		%xmm0,0x30(%rsp)
+	movdqa		%xmm4,%xmm0
+	punpckldq	%xmm5,%xmm4
+	punpckhdq	%xmm5,%xmm0
+	movdqa		%xmm0,%xmm5
+	movdqa		%xmm6,%xmm0
+	punpckldq	%xmm7,%xmm6
+	punpckhdq	%xmm7,%xmm0
+	movdqa		%xmm0,%xmm7
+	movdqa		%xmm8,%xmm0
+	punpckldq	%xmm9,%xmm8
+	punpckhdq	%xmm9,%xmm0
+	movdqa		%xmm0,%xmm9
+	movdqa		%xmm10,%xmm0
+	punpckldq	%xmm11,%xmm10
+	punpckhdq	%xmm11,%xmm0
+	movdqa		%xmm0,%xmm11
+	movdqa		%xmm12,%xmm0
+	punpckldq	%xmm13,%xmm12
+	punpckhdq	%xmm13,%xmm0
+	movdqa		%xmm0,%xmm13
+	movdqa		%xmm14,%xmm0
+	punpckldq	%xmm15,%xmm14
+	punpckhdq	%xmm15,%xmm0
+	movdqa		%xmm0,%xmm15
+
+	# interleave 64-bit words in state n, n+2
+	movdqa		0x00(%rsp),%xmm0
+	movdqa		0x20(%rsp),%xmm1
+	movdqa		%xmm0,%xmm2
+	punpcklqdq	%xmm1,%xmm2
+	punpckhqdq	%xmm1,%xmm0
+	movdqa		%xmm2,0x00(%rsp)
+	movdqa		%xmm0,0x20(%rsp)
+	movdqa		0x10(%rsp),%xmm0
+	movdqa		0x30(%rsp),%xmm1
+	movdqa		%xmm0,%xmm2
+	punpcklqdq	%xmm1,%xmm2
+	punpckhqdq	%xmm1,%xmm0
+	movdqa		%xmm2,0x10(%rsp)
+	movdqa		%xmm0,0x30(%rsp)
+	movdqa		%xmm4,%xmm0
+	punpcklqdq	%xmm6,%xmm4
+	punpckhqdq	%xmm6,%xmm0
+	movdqa		%xmm0,%xmm6
+	movdqa		%xmm5,%xmm0
+	punpcklqdq	%xmm7,%xmm5
+	punpckhqdq	%xmm7,%xmm0
+	movdqa		%xmm0,%xmm7
+	movdqa		%xmm8,%xmm0
+	punpcklqdq	%xmm10,%xmm8
+	punpckhqdq	%xmm10,%xmm0
+	movdqa		%xmm0,%xmm10
+	movdqa		%xmm9,%xmm0
+	punpcklqdq	%xmm11,%xmm9
+	punpckhqdq	%xmm11,%xmm0
+	movdqa		%xmm0,%xmm11
+	movdqa		%xmm12,%xmm0
+	punpcklqdq	%xmm14,%xmm12
+	punpckhqdq	%xmm14,%xmm0
+	movdqa		%xmm0,%xmm14
+	movdqa		%xmm13,%xmm0
+	punpcklqdq	%xmm15,%xmm13
+	punpckhqdq	%xmm15,%xmm0
+	movdqa		%xmm0,%xmm15
+
+	# xor with corresponding input, write to output
+	movdqa		0x00(%rsp),%xmm0
+	movdqu		0x00(%rdx),%xmm1
+	pxor		%xmm1,%xmm0
+	movdqu		%xmm0,0x00(%rsi)
+	movdqa		0x10(%rsp),%xmm0
+	movdqu		0x80(%rdx),%xmm1
+	pxor		%xmm1,%xmm0
+	movdqu		%xmm0,0x80(%rsi)
+	movdqa		0x20(%rsp),%xmm0
+	movdqu		0x40(%rdx),%xmm1
+	pxor		%xmm1,%xmm0
+	movdqu		%xmm0,0x40(%rsi)
+	movdqa		0x30(%rsp),%xmm0
+	movdqu		0xc0(%rdx),%xmm1
+	pxor		%xmm1,%xmm0
+	movdqu		%xmm0,0xc0(%rsi)
+	movdqu		0x10(%rdx),%xmm1
+	pxor		%xmm1,%xmm4
+	movdqu		%xmm4,0x10(%rsi)
+	movdqu		0x90(%rdx),%xmm1
+	pxor		%xmm1,%xmm5
+	movdqu		%xmm5,0x90(%rsi)
+	movdqu		0x50(%rdx),%xmm1
+	pxor		%xmm1,%xmm6
+	movdqu		%xmm6,0x50(%rsi)
+	movdqu		0xd0(%rdx),%xmm1
+	pxor		%xmm1,%xmm7
+	movdqu		%xmm7,0xd0(%rsi)
+	movdqu		0x20(%rdx),%xmm1
+	pxor		%xmm1,%xmm8
+	movdqu		%xmm8,0x20(%rsi)
+	movdqu		0xa0(%rdx),%xmm1
+	pxor		%xmm1,%xmm9
+	movdqu		%xmm9,0xa0(%rsi)
+	movdqu		0x60(%rdx),%xmm1
+	pxor		%xmm1,%xmm10
+	movdqu		%xmm10,0x60(%rsi)
+	movdqu		0xe0(%rdx),%xmm1
+	pxor		%xmm1,%xmm11
+	movdqu		%xmm11,0xe0(%rsi)
+	movdqu		0x30(%rdx),%xmm1
+	pxor		%xmm1,%xmm12
+	movdqu		%xmm12,0x30(%rsi)
+	movdqu		0xb0(%rdx),%xmm1
+	pxor		%xmm1,%xmm13
+	movdqu		%xmm13,0xb0(%rsi)
+	movdqu		0x70(%rdx),%xmm1
+	pxor		%xmm1,%xmm14
+	movdqu		%xmm14,0x70(%rsi)
+	movdqu		0xf0(%rdx),%xmm1
+	pxor		%xmm1,%xmm15
+	movdqu		%xmm15,0xf0(%rsi)
+
+	add		$0x40,%rsp
+	ret
+ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
new file mode 100644
index 0000000..effe216
--- /dev/null
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -0,0 +1,150 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha20.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+
+#define CHACHA20_STATE_ALIGN 16
+
+asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
+#ifdef CONFIG_AS_AVX2
+asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
+static bool chacha20_use_avx2;
+#endif
+
+static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
+			    unsigned int bytes)
+{
+	u8 buf[CHACHA20_BLOCK_SIZE];
+
+#ifdef CONFIG_AS_AVX2
+	if (chacha20_use_avx2) {
+		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
+			chacha20_8block_xor_avx2(state, dst, src);
+			bytes -= CHACHA20_BLOCK_SIZE * 8;
+			src += CHACHA20_BLOCK_SIZE * 8;
+			dst += CHACHA20_BLOCK_SIZE * 8;
+			state[12] += 8;
+		}
+	}
+#endif
+	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+		chacha20_4block_xor_ssse3(state, dst, src);
+		bytes -= CHACHA20_BLOCK_SIZE * 4;
+		src += CHACHA20_BLOCK_SIZE * 4;
+		dst += CHACHA20_BLOCK_SIZE * 4;
+		state[12] += 4;
+	}
+	while (bytes >= CHACHA20_BLOCK_SIZE) {
+		chacha20_block_xor_ssse3(state, dst, src);
+		bytes -= CHACHA20_BLOCK_SIZE;
+		src += CHACHA20_BLOCK_SIZE;
+		dst += CHACHA20_BLOCK_SIZE;
+		state[12]++;
+	}
+	if (bytes) {
+		memcpy(buf, src, bytes);
+		chacha20_block_xor_ssse3(state, buf, buf);
+		memcpy(dst, buf, bytes);
+	}
+}
+
+static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
+			 struct scatterlist *src, unsigned int nbytes)
+{
+	u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
+	struct blkcipher_walk walk;
+	int err;
+
+	if (!may_use_simd())
+		return crypto_chacha20_crypt(desc, dst, src, nbytes);
+
+	state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
+
+	crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+
+	kernel_fpu_begin();
+
+	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+				rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % CHACHA20_BLOCK_SIZE);
+	}
+
+	if (walk.nbytes) {
+		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+				walk.nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	kernel_fpu_end();
+
+	return err;
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "chacha20",
+	.cra_driver_name	= "chacha20-simd",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_ctxsize		= sizeof(struct chacha20_ctx),
+	.cra_alignmask		= sizeof(u32) - 1,
+	.cra_module		= THIS_MODULE,
+	.cra_u			= {
+		.blkcipher = {
+			.min_keysize	= CHACHA20_KEY_SIZE,
+			.max_keysize	= CHACHA20_KEY_SIZE,
+			.ivsize		= CHACHA20_IV_SIZE,
+			.geniv		= "seqiv",
+			.setkey		= crypto_chacha20_setkey,
+			.encrypt	= chacha20_simd,
+			.decrypt	= chacha20_simd,
+		},
+	},
+};
+
+static int __init chacha20_simd_mod_init(void)
+{
+	if (!cpu_has_ssse3)
+		return -ENODEV;
+
+#ifdef CONFIG_AS_AVX2
+	chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+			    cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
+#endif
+	return crypto_register_alg(&alg);
+}
+
+static void __exit chacha20_simd_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(chacha20_simd_mod_init);
+module_exit(chacha20_simd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-simd");
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
new file mode 100644
index 0000000..eff2f41
--- /dev/null
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -0,0 +1,386 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 32
+
+ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
+	.octa 0x0000000003ffffff0000000003ffffff
+ORMASK:	.octa 0x00000000010000000000000001000000
+	.octa 0x00000000010000000000000001000000
+
+.text
+
+#define h0 0x00(%rdi)
+#define h1 0x04(%rdi)
+#define h2 0x08(%rdi)
+#define h3 0x0c(%rdi)
+#define h4 0x10(%rdi)
+#define r0 0x00(%rdx)
+#define r1 0x04(%rdx)
+#define r2 0x08(%rdx)
+#define r3 0x0c(%rdx)
+#define r4 0x10(%rdx)
+#define u0 0x00(%r8)
+#define u1 0x04(%r8)
+#define u2 0x08(%r8)
+#define u3 0x0c(%r8)
+#define u4 0x10(%r8)
+#define w0 0x14(%r8)
+#define w1 0x18(%r8)
+#define w2 0x1c(%r8)
+#define w3 0x20(%r8)
+#define w4 0x24(%r8)
+#define y0 0x28(%r8)
+#define y1 0x2c(%r8)
+#define y2 0x30(%r8)
+#define y3 0x34(%r8)
+#define y4 0x38(%r8)
+#define m %rsi
+#define hc0 %ymm0
+#define hc1 %ymm1
+#define hc2 %ymm2
+#define hc3 %ymm3
+#define hc4 %ymm4
+#define hc0x %xmm0
+#define hc1x %xmm1
+#define hc2x %xmm2
+#define hc3x %xmm3
+#define hc4x %xmm4
+#define t1 %ymm5
+#define t2 %ymm6
+#define t1x %xmm5
+#define t2x %xmm6
+#define ruwy0 %ymm7
+#define ruwy1 %ymm8
+#define ruwy2 %ymm9
+#define ruwy3 %ymm10
+#define ruwy4 %ymm11
+#define ruwy0x %xmm7
+#define ruwy1x %xmm8
+#define ruwy2x %xmm9
+#define ruwy3x %xmm10
+#define ruwy4x %xmm11
+#define svxz1 %ymm12
+#define svxz2 %ymm13
+#define svxz3 %ymm14
+#define svxz4 %ymm15
+#define d0 %r9
+#define d1 %r10
+#define d2 %r11
+#define d3 %r12
+#define d4 %r13
+
+ENTRY(poly1305_4block_avx2)
+	# %rdi: Accumulator h[5]
+	# %rsi: 64 byte input block m
+	# %rdx: Poly1305 key r[5]
+	# %rcx: Quadblock count
+	# %r8:  Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
+
+	# This four-block variant uses loop unrolled block processing. It
+	# requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
+	# h = (h + m) * r  =>  h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
+
+	vzeroupper
+	push		%rbx
+	push		%r12
+	push		%r13
+
+	# combine r0,u0,w0,y0
+	vmovd		y0,ruwy0x
+	vmovd		w0,t1x
+	vpunpcklqdq	t1,ruwy0,ruwy0
+	vmovd		u0,t1x
+	vmovd		r0,t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,ruwy0,ruwy0
+
+	# combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
+	vmovd		y1,ruwy1x
+	vmovd		w1,t1x
+	vpunpcklqdq	t1,ruwy1,ruwy1
+	vmovd		u1,t1x
+	vmovd		r1,t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,ruwy1,ruwy1
+	vpslld		$2,ruwy1,svxz1
+	vpaddd		ruwy1,svxz1,svxz1
+
+	# combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
+	vmovd		y2,ruwy2x
+	vmovd		w2,t1x
+	vpunpcklqdq	t1,ruwy2,ruwy2
+	vmovd		u2,t1x
+	vmovd		r2,t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,ruwy2,ruwy2
+	vpslld		$2,ruwy2,svxz2
+	vpaddd		ruwy2,svxz2,svxz2
+
+	# combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
+	vmovd		y3,ruwy3x
+	vmovd		w3,t1x
+	vpunpcklqdq	t1,ruwy3,ruwy3
+	vmovd		u3,t1x
+	vmovd		r3,t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,ruwy3,ruwy3
+	vpslld		$2,ruwy3,svxz3
+	vpaddd		ruwy3,svxz3,svxz3
+
+	# combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
+	vmovd		y4,ruwy4x
+	vmovd		w4,t1x
+	vpunpcklqdq	t1,ruwy4,ruwy4
+	vmovd		u4,t1x
+	vmovd		r4,t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,ruwy4,ruwy4
+	vpslld		$2,ruwy4,svxz4
+	vpaddd		ruwy4,svxz4,svxz4
+
+.Ldoblock4:
+	# hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
+	#	 m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
+	vmovd		0x00(m),hc0x
+	vmovd		0x10(m),t1x
+	vpunpcklqdq	t1,hc0,hc0
+	vmovd		0x20(m),t1x
+	vmovd		0x30(m),t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,hc0,hc0
+	vpand		ANMASK(%rip),hc0,hc0
+	vmovd		h0,t1x
+	vpaddd		t1,hc0,hc0
+	# hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
+	#	 (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
+	vmovd		0x03(m),hc1x
+	vmovd		0x13(m),t1x
+	vpunpcklqdq	t1,hc1,hc1
+	vmovd		0x23(m),t1x
+	vmovd		0x33(m),t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,hc1,hc1
+	vpsrld		$2,hc1,hc1
+	vpand		ANMASK(%rip),hc1,hc1
+	vmovd		h1,t1x
+	vpaddd		t1,hc1,hc1
+	# hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
+	#	 (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
+	vmovd		0x06(m),hc2x
+	vmovd		0x16(m),t1x
+	vpunpcklqdq	t1,hc2,hc2
+	vmovd		0x26(m),t1x
+	vmovd		0x36(m),t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,hc2,hc2
+	vpsrld		$4,hc2,hc2
+	vpand		ANMASK(%rip),hc2,hc2
+	vmovd		h2,t1x
+	vpaddd		t1,hc2,hc2
+	# hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
+	#	 (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
+	vmovd		0x09(m),hc3x
+	vmovd		0x19(m),t1x
+	vpunpcklqdq	t1,hc3,hc3
+	vmovd		0x29(m),t1x
+	vmovd		0x39(m),t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,hc3,hc3
+	vpsrld		$6,hc3,hc3
+	vpand		ANMASK(%rip),hc3,hc3
+	vmovd		h3,t1x
+	vpaddd		t1,hc3,hc3
+	# hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
+	#	 (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
+	vmovd		0x0c(m),hc4x
+	vmovd		0x1c(m),t1x
+	vpunpcklqdq	t1,hc4,hc4
+	vmovd		0x2c(m),t1x
+	vmovd		0x3c(m),t2x
+	vpunpcklqdq	t2,t1,t1
+	vperm2i128	$0x20,t1,hc4,hc4
+	vpsrld		$8,hc4,hc4
+	vpor		ORMASK(%rip),hc4,hc4
+	vmovd		h4,t1x
+	vpaddd		t1,hc4,hc4
+
+	# t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
+	vpmuludq	hc0,ruwy0,t1
+	# t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
+	vpmuludq	hc1,svxz4,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
+	vpmuludq	hc2,svxz3,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
+	vpmuludq	hc3,svxz2,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
+	vpmuludq	hc4,svxz1,t2
+	vpaddq		t2,t1,t1
+	# d0 = t1[0] + t1[1] + t[2] + t[3]
+	vpermq		$0xee,t1,t2
+	vpaddq		t2,t1,t1
+	vpsrldq		$8,t1,t2
+	vpaddq		t2,t1,t1
+	vmovq		t1x,d0
+
+	# t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
+	vpmuludq	hc0,ruwy1,t1
+	# t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
+	vpmuludq	hc1,ruwy0,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
+	vpmuludq	hc2,svxz4,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
+	vpmuludq	hc3,svxz3,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
+	vpmuludq	hc4,svxz2,t2
+	vpaddq		t2,t1,t1
+	# d1 = t1[0] + t1[1] + t1[3] + t1[4]
+	vpermq		$0xee,t1,t2
+	vpaddq		t2,t1,t1
+	vpsrldq		$8,t1,t2
+	vpaddq		t2,t1,t1
+	vmovq		t1x,d1
+
+	# t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
+	vpmuludq	hc0,ruwy2,t1
+	# t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
+	vpmuludq	hc1,ruwy1,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
+	vpmuludq	hc2,ruwy0,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
+	vpmuludq	hc3,svxz4,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
+	vpmuludq	hc4,svxz3,t2
+	vpaddq		t2,t1,t1
+	# d2 = t1[0] + t1[1] + t1[2] + t1[3]
+	vpermq		$0xee,t1,t2
+	vpaddq		t2,t1,t1
+	vpsrldq		$8,t1,t2
+	vpaddq		t2,t1,t1
+	vmovq		t1x,d2
+
+	# t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
+	vpmuludq	hc0,ruwy3,t1
+	# t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
+	vpmuludq	hc1,ruwy2,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
+	vpmuludq	hc2,ruwy1,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
+	vpmuludq	hc3,ruwy0,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
+	vpmuludq	hc4,svxz4,t2
+	vpaddq		t2,t1,t1
+	# d3 = t1[0] + t1[1] + t1[2] + t1[3]
+	vpermq		$0xee,t1,t2
+	vpaddq		t2,t1,t1
+	vpsrldq		$8,t1,t2
+	vpaddq		t2,t1,t1
+	vmovq		t1x,d3
+
+	# t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
+	vpmuludq	hc0,ruwy4,t1
+	# t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
+	vpmuludq	hc1,ruwy3,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
+	vpmuludq	hc2,ruwy2,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
+	vpmuludq	hc3,ruwy1,t2
+	vpaddq		t2,t1,t1
+	# t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
+	vpmuludq	hc4,ruwy0,t2
+	vpaddq		t2,t1,t1
+	# d4 = t1[0] + t1[1] + t1[2] + t1[3]
+	vpermq		$0xee,t1,t2
+	vpaddq		t2,t1,t1
+	vpsrldq		$8,t1,t2
+	vpaddq		t2,t1,t1
+	vmovq		t1x,d4
+
+	# d1 += d0 >> 26
+	mov		d0,%rax
+	shr		$26,%rax
+	add		%rax,d1
+	# h0 = d0 & 0x3ffffff
+	mov		d0,%rbx
+	and		$0x3ffffff,%ebx
+
+	# d2 += d1 >> 26
+	mov		d1,%rax
+	shr		$26,%rax
+	add		%rax,d2
+	# h1 = d1 & 0x3ffffff
+	mov		d1,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h1
+
+	# d3 += d2 >> 26
+	mov		d2,%rax
+	shr		$26,%rax
+	add		%rax,d3
+	# h2 = d2 & 0x3ffffff
+	mov		d2,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h2
+
+	# d4 += d3 >> 26
+	mov		d3,%rax
+	shr		$26,%rax
+	add		%rax,d4
+	# h3 = d3 & 0x3ffffff
+	mov		d3,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h3
+
+	# h0 += (d4 >> 26) * 5
+	mov		d4,%rax
+	shr		$26,%rax
+	lea		(%eax,%eax,4),%eax
+	add		%eax,%ebx
+	# h4 = d4 & 0x3ffffff
+	mov		d4,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h4
+
+	# h1 += h0 >> 26
+	mov		%ebx,%eax
+	shr		$26,%eax
+	add		%eax,h1
+	# h0 = h0 & 0x3ffffff
+	andl		$0x3ffffff,%ebx
+	mov		%ebx,h0
+
+	add		$0x40,m
+	dec		%rcx
+	jnz		.Ldoblock4
+
+	vzeroupper
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+	ret
+ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
new file mode 100644
index 0000000..338c748
--- /dev/null
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -0,0 +1,582 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 16
+
+ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
+ORMASK:	.octa 0x00000000010000000000000001000000
+
+.text
+
+#define h0 0x00(%rdi)
+#define h1 0x04(%rdi)
+#define h2 0x08(%rdi)
+#define h3 0x0c(%rdi)
+#define h4 0x10(%rdi)
+#define r0 0x00(%rdx)
+#define r1 0x04(%rdx)
+#define r2 0x08(%rdx)
+#define r3 0x0c(%rdx)
+#define r4 0x10(%rdx)
+#define s1 0x00(%rsp)
+#define s2 0x04(%rsp)
+#define s3 0x08(%rsp)
+#define s4 0x0c(%rsp)
+#define m %rsi
+#define h01 %xmm0
+#define h23 %xmm1
+#define h44 %xmm2
+#define t1 %xmm3
+#define t2 %xmm4
+#define t3 %xmm5
+#define t4 %xmm6
+#define mask %xmm7
+#define d0 %r8
+#define d1 %r9
+#define d2 %r10
+#define d3 %r11
+#define d4 %r12
+
+ENTRY(poly1305_block_sse2)
+	# %rdi: Accumulator h[5]
+	# %rsi: 16 byte input block m
+	# %rdx: Poly1305 key r[5]
+	# %rcx: Block count
+
+	# This single block variant tries to improve performance by doing two
+	# multiplications in parallel using SSE instructions. There is quite
+	# some quardword packing involved, hence the speedup is marginal.
+
+	push		%rbx
+	push		%r12
+	sub		$0x10,%rsp
+
+	# s1..s4 = r1..r4 * 5
+	mov		r1,%eax
+	lea		(%eax,%eax,4),%eax
+	mov		%eax,s1
+	mov		r2,%eax
+	lea		(%eax,%eax,4),%eax
+	mov		%eax,s2
+	mov		r3,%eax
+	lea		(%eax,%eax,4),%eax
+	mov		%eax,s3
+	mov		r4,%eax
+	lea		(%eax,%eax,4),%eax
+	mov		%eax,s4
+
+	movdqa		ANMASK(%rip),mask
+
+.Ldoblock:
+	# h01 = [0, h1, 0, h0]
+	# h23 = [0, h3, 0, h2]
+	# h44 = [0, h4, 0, h4]
+	movd		h0,h01
+	movd		h1,t1
+	movd		h2,h23
+	movd		h3,t2
+	movd		h4,h44
+	punpcklqdq	t1,h01
+	punpcklqdq	t2,h23
+	punpcklqdq	h44,h44
+
+	# h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
+	movd		0x00(m),t1
+	movd		0x03(m),t2
+	psrld		$2,t2
+	punpcklqdq	t2,t1
+	pand		mask,t1
+	paddd		t1,h01
+	# h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
+	movd		0x06(m),t1
+	movd		0x09(m),t2
+	psrld		$4,t1
+	psrld		$6,t2
+	punpcklqdq	t2,t1
+	pand		mask,t1
+	paddd		t1,h23
+	# h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
+	mov		0x0c(m),%eax
+	shr		$8,%eax
+	or		$0x01000000,%eax
+	movd		%eax,t1
+	pshufd		$0xc4,t1,t1
+	paddd		t1,h44
+
+	# t1[0] = h0 * r0 + h2 * s3
+	# t1[1] = h1 * s4 + h3 * s2
+	movd		r0,t1
+	movd		s4,t2
+	punpcklqdq	t2,t1
+	pmuludq		h01,t1
+	movd		s3,t2
+	movd		s2,t3
+	punpcklqdq	t3,t2
+	pmuludq		h23,t2
+	paddq		t2,t1
+	# t2[0] = h0 * r1 + h2 * s4
+	# t2[1] = h1 * r0 + h3 * s3
+	movd		r1,t2
+	movd		r0,t3
+	punpcklqdq	t3,t2
+	pmuludq		h01,t2
+	movd		s4,t3
+	movd		s3,t4
+	punpcklqdq	t4,t3
+	pmuludq		h23,t3
+	paddq		t3,t2
+	# t3[0] = h4 * s1
+	# t3[1] = h4 * s2
+	movd		s1,t3
+	movd		s2,t4
+	punpcklqdq	t4,t3
+	pmuludq		h44,t3
+	# d0 = t1[0] + t1[1] + t3[0]
+	# d1 = t2[0] + t2[1] + t3[1]
+	movdqa		t1,t4
+	punpcklqdq	t2,t4
+	punpckhqdq	t2,t1
+	paddq		t4,t1
+	paddq		t3,t1
+	movq		t1,d0
+	psrldq		$8,t1
+	movq		t1,d1
+
+	# t1[0] = h0 * r2 + h2 * r0
+	# t1[1] = h1 * r1 + h3 * s4
+	movd		r2,t1
+	movd		r1,t2
+	punpcklqdq 	t2,t1
+	pmuludq		h01,t1
+	movd		r0,t2
+	movd		s4,t3
+	punpcklqdq	t3,t2
+	pmuludq		h23,t2
+	paddq		t2,t1
+	# t2[0] = h0 * r3 + h2 * r1
+	# t2[1] = h1 * r2 + h3 * r0
+	movd		r3,t2
+	movd		r2,t3
+	punpcklqdq	t3,t2
+	pmuludq		h01,t2
+	movd		r1,t3
+	movd		r0,t4
+	punpcklqdq	t4,t3
+	pmuludq		h23,t3
+	paddq		t3,t2
+	# t3[0] = h4 * s3
+	# t3[1] = h4 * s4
+	movd		s3,t3
+	movd		s4,t4
+	punpcklqdq	t4,t3
+	pmuludq		h44,t3
+	# d2 = t1[0] + t1[1] + t3[0]
+	# d3 = t2[0] + t2[1] + t3[1]
+	movdqa		t1,t4
+	punpcklqdq	t2,t4
+	punpckhqdq	t2,t1
+	paddq		t4,t1
+	paddq		t3,t1
+	movq		t1,d2
+	psrldq		$8,t1
+	movq		t1,d3
+
+	# t1[0] = h0 * r4 + h2 * r2
+	# t1[1] = h1 * r3 + h3 * r1
+	movd		r4,t1
+	movd		r3,t2
+	punpcklqdq	t2,t1
+	pmuludq		h01,t1
+	movd		r2,t2
+	movd		r1,t3
+	punpcklqdq	t3,t2
+	pmuludq		h23,t2
+	paddq		t2,t1
+	# t3[0] = h4 * r0
+	movd		r0,t3
+	pmuludq		h44,t3
+	# d4 = t1[0] + t1[1] + t3[0]
+	movdqa		t1,t4
+	psrldq		$8,t4
+	paddq		t4,t1
+	paddq		t3,t1
+	movq		t1,d4
+
+	# d1 += d0 >> 26
+	mov		d0,%rax
+	shr		$26,%rax
+	add		%rax,d1
+	# h0 = d0 & 0x3ffffff
+	mov		d0,%rbx
+	and		$0x3ffffff,%ebx
+
+	# d2 += d1 >> 26
+	mov		d1,%rax
+	shr		$26,%rax
+	add		%rax,d2
+	# h1 = d1 & 0x3ffffff
+	mov		d1,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h1
+
+	# d3 += d2 >> 26
+	mov		d2,%rax
+	shr		$26,%rax
+	add		%rax,d3
+	# h2 = d2 & 0x3ffffff
+	mov		d2,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h2
+
+	# d4 += d3 >> 26
+	mov		d3,%rax
+	shr		$26,%rax
+	add		%rax,d4
+	# h3 = d3 & 0x3ffffff
+	mov		d3,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h3
+
+	# h0 += (d4 >> 26) * 5
+	mov		d4,%rax
+	shr		$26,%rax
+	lea		(%eax,%eax,4),%eax
+	add		%eax,%ebx
+	# h4 = d4 & 0x3ffffff
+	mov		d4,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h4
+
+	# h1 += h0 >> 26
+	mov		%ebx,%eax
+	shr		$26,%eax
+	add		%eax,h1
+	# h0 = h0 & 0x3ffffff
+	andl		$0x3ffffff,%ebx
+	mov		%ebx,h0
+
+	add		$0x10,m
+	dec		%rcx
+	jnz		.Ldoblock
+
+	add		$0x10,%rsp
+	pop		%r12
+	pop		%rbx
+	ret
+ENDPROC(poly1305_block_sse2)
+
+
+#define u0 0x00(%r8)
+#define u1 0x04(%r8)
+#define u2 0x08(%r8)
+#define u3 0x0c(%r8)
+#define u4 0x10(%r8)
+#define hc0 %xmm0
+#define hc1 %xmm1
+#define hc2 %xmm2
+#define hc3 %xmm5
+#define hc4 %xmm6
+#define ru0 %xmm7
+#define ru1 %xmm8
+#define ru2 %xmm9
+#define ru3 %xmm10
+#define ru4 %xmm11
+#define sv1 %xmm12
+#define sv2 %xmm13
+#define sv3 %xmm14
+#define sv4 %xmm15
+#undef d0
+#define d0 %r13
+
+ENTRY(poly1305_2block_sse2)
+	# %rdi: Accumulator h[5]
+	# %rsi: 16 byte input block m
+	# %rdx: Poly1305 key r[5]
+	# %rcx: Doubleblock count
+	# %r8:  Poly1305 derived key r^2 u[5]
+
+	# This two-block variant further improves performance by using loop
+	# unrolled block processing. This is more straight forward and does
+	# less byte shuffling, but requires a second Poly1305 key r^2:
+	# h = (h + m) * r    =>    h = (h + m1) * r^2 + m2 * r
+
+	push		%rbx
+	push		%r12
+	push		%r13
+
+	# combine r0,u0
+	movd		u0,ru0
+	movd		r0,t1
+	punpcklqdq	t1,ru0
+
+	# combine r1,u1 and s1=r1*5,v1=u1*5
+	movd		u1,ru1
+	movd		r1,t1
+	punpcklqdq	t1,ru1
+	movdqa		ru1,sv1
+	pslld		$2,sv1
+	paddd		ru1,sv1
+
+	# combine r2,u2 and s2=r2*5,v2=u2*5
+	movd		u2,ru2
+	movd		r2,t1
+	punpcklqdq	t1,ru2
+	movdqa		ru2,sv2
+	pslld		$2,sv2
+	paddd		ru2,sv2
+
+	# combine r3,u3 and s3=r3*5,v3=u3*5
+	movd		u3,ru3
+	movd		r3,t1
+	punpcklqdq	t1,ru3
+	movdqa		ru3,sv3
+	pslld		$2,sv3
+	paddd		ru3,sv3
+
+	# combine r4,u4 and s4=r4*5,v4=u4*5
+	movd		u4,ru4
+	movd		r4,t1
+	punpcklqdq	t1,ru4
+	movdqa		ru4,sv4
+	pslld		$2,sv4
+	paddd		ru4,sv4
+
+.Ldoblock2:
+	# hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
+	movd		0x00(m),hc0
+	movd		0x10(m),t1
+	punpcklqdq	t1,hc0
+	pand		ANMASK(%rip),hc0
+	movd		h0,t1
+	paddd		t1,hc0
+	# hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
+	movd		0x03(m),hc1
+	movd		0x13(m),t1
+	punpcklqdq	t1,hc1
+	psrld		$2,hc1
+	pand		ANMASK(%rip),hc1
+	movd		h1,t1
+	paddd		t1,hc1
+	# hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
+	movd		0x06(m),hc2
+	movd		0x16(m),t1
+	punpcklqdq	t1,hc2
+	psrld		$4,hc2
+	pand		ANMASK(%rip),hc2
+	movd		h2,t1
+	paddd		t1,hc2
+	# hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
+	movd		0x09(m),hc3
+	movd		0x19(m),t1
+	punpcklqdq	t1,hc3
+	psrld		$6,hc3
+	pand		ANMASK(%rip),hc3
+	movd		h3,t1
+	paddd		t1,hc3
+	# hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
+	movd		0x0c(m),hc4
+	movd		0x1c(m),t1
+	punpcklqdq	t1,hc4
+	psrld		$8,hc4
+	por		ORMASK(%rip),hc4
+	movd		h4,t1
+	paddd		t1,hc4
+
+	# t1 = [ hc0[1] * r0, hc0[0] * u0 ]
+	movdqa		ru0,t1
+	pmuludq		hc0,t1
+	# t1 += [ hc1[1] * s4, hc1[0] * v4 ]
+	movdqa		sv4,t2
+	pmuludq		hc1,t2
+	paddq		t2,t1
+	# t1 += [ hc2[1] * s3, hc2[0] * v3 ]
+	movdqa		sv3,t2
+	pmuludq		hc2,t2
+	paddq		t2,t1
+	# t1 += [ hc3[1] * s2, hc3[0] * v2 ]
+	movdqa		sv2,t2
+	pmuludq		hc3,t2
+	paddq		t2,t1
+	# t1 += [ hc4[1] * s1, hc4[0] * v1 ]
+	movdqa		sv1,t2
+	pmuludq		hc4,t2
+	paddq		t2,t1
+	# d0 = t1[0] + t1[1]
+	movdqa		t1,t2
+	psrldq		$8,t2
+	paddq		t2,t1
+	movq		t1,d0
+
+	# t1 = [ hc0[1] * r1, hc0[0] * u1 ]
+	movdqa		ru1,t1
+	pmuludq		hc0,t1
+	# t1 += [ hc1[1] * r0, hc1[0] * u0 ]
+	movdqa		ru0,t2
+	pmuludq		hc1,t2
+	paddq		t2,t1
+	# t1 += [ hc2[1] * s4, hc2[0] * v4 ]
+	movdqa		sv4,t2
+	pmuludq		hc2,t2
+	paddq		t2,t1
+	# t1 += [ hc3[1] * s3, hc3[0] * v3 ]
+	movdqa		sv3,t2
+	pmuludq		hc3,t2
+	paddq		t2,t1
+	# t1 += [ hc4[1] * s2, hc4[0] * v2 ]
+	movdqa		sv2,t2
+	pmuludq		hc4,t2
+	paddq		t2,t1
+	# d1 = t1[0] + t1[1]
+	movdqa		t1,t2
+	psrldq		$8,t2
+	paddq		t2,t1
+	movq		t1,d1
+
+	# t1 = [ hc0[1] * r2, hc0[0] * u2 ]
+	movdqa		ru2,t1
+	pmuludq		hc0,t1
+	# t1 += [ hc1[1] * r1, hc1[0] * u1 ]
+	movdqa		ru1,t2
+	pmuludq		hc1,t2
+	paddq		t2,t1
+	# t1 += [ hc2[1] * r0, hc2[0] * u0 ]
+	movdqa		ru0,t2
+	pmuludq		hc2,t2
+	paddq		t2,t1
+	# t1 += [ hc3[1] * s4, hc3[0] * v4 ]
+	movdqa		sv4,t2
+	pmuludq		hc3,t2
+	paddq		t2,t1
+	# t1 += [ hc4[1] * s3, hc4[0] * v3 ]
+	movdqa		sv3,t2
+	pmuludq		hc4,t2
+	paddq		t2,t1
+	# d2 = t1[0] + t1[1]
+	movdqa		t1,t2
+	psrldq		$8,t2
+	paddq		t2,t1
+	movq		t1,d2
+
+	# t1 = [ hc0[1] * r3, hc0[0] * u3 ]
+	movdqa		ru3,t1
+	pmuludq		hc0,t1
+	# t1 += [ hc1[1] * r2, hc1[0] * u2 ]
+	movdqa		ru2,t2
+	pmuludq		hc1,t2
+	paddq		t2,t1
+	# t1 += [ hc2[1] * r1, hc2[0] * u1 ]
+	movdqa		ru1,t2
+	pmuludq		hc2,t2
+	paddq		t2,t1
+	# t1 += [ hc3[1] * r0, hc3[0] * u0 ]
+	movdqa		ru0,t2
+	pmuludq		hc3,t2
+	paddq		t2,t1
+	# t1 += [ hc4[1] * s4, hc4[0] * v4 ]
+	movdqa		sv4,t2
+	pmuludq		hc4,t2
+	paddq		t2,t1
+	# d3 = t1[0] + t1[1]
+	movdqa		t1,t2
+	psrldq		$8,t2
+	paddq		t2,t1
+	movq		t1,d3
+
+	# t1 = [ hc0[1] * r4, hc0[0] * u4 ]
+	movdqa		ru4,t1
+	pmuludq		hc0,t1
+	# t1 += [ hc1[1] * r3, hc1[0] * u3 ]
+	movdqa		ru3,t2
+	pmuludq		hc1,t2
+	paddq		t2,t1
+	# t1 += [ hc2[1] * r2, hc2[0] * u2 ]
+	movdqa		ru2,t2
+	pmuludq		hc2,t2
+	paddq		t2,t1
+	# t1 += [ hc3[1] * r1, hc3[0] * u1 ]
+	movdqa		ru1,t2
+	pmuludq		hc3,t2
+	paddq		t2,t1
+	# t1 += [ hc4[1] * r0, hc4[0] * u0 ]
+	movdqa		ru0,t2
+	pmuludq		hc4,t2
+	paddq		t2,t1
+	# d4 = t1[0] + t1[1]
+	movdqa		t1,t2
+	psrldq		$8,t2
+	paddq		t2,t1
+	movq		t1,d4
+
+	# d1 += d0 >> 26
+	mov		d0,%rax
+	shr		$26,%rax
+	add		%rax,d1
+	# h0 = d0 & 0x3ffffff
+	mov		d0,%rbx
+	and		$0x3ffffff,%ebx
+
+	# d2 += d1 >> 26
+	mov		d1,%rax
+	shr		$26,%rax
+	add		%rax,d2
+	# h1 = d1 & 0x3ffffff
+	mov		d1,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h1
+
+	# d3 += d2 >> 26
+	mov		d2,%rax
+	shr		$26,%rax
+	add		%rax,d3
+	# h2 = d2 & 0x3ffffff
+	mov		d2,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h2
+
+	# d4 += d3 >> 26
+	mov		d3,%rax
+	shr		$26,%rax
+	add		%rax,d4
+	# h3 = d3 & 0x3ffffff
+	mov		d3,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h3
+
+	# h0 += (d4 >> 26) * 5
+	mov		d4,%rax
+	shr		$26,%rax
+	lea		(%eax,%eax,4),%eax
+	add		%eax,%ebx
+	# h4 = d4 & 0x3ffffff
+	mov		d4,%rax
+	and		$0x3ffffff,%eax
+	mov		%eax,h4
+
+	# h1 += h0 >> 26
+	mov		%ebx,%eax
+	shr		$26,%eax
+	add		%eax,h1
+	# h0 = h0 & 0x3ffffff
+	andl		$0x3ffffff,%ebx
+	mov		%ebx,h0
+
+	add		$0x20,m
+	dec		%rcx
+	jnz		.Ldoblock2
+
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+	ret
+ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
new file mode 100644
index 0000000..f7170d7
--- /dev/null
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -0,0 +1,207 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/poly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+
+struct poly1305_simd_desc_ctx {
+	struct poly1305_desc_ctx base;
+	/* derived key u set? */
+	bool uset;
+#ifdef CONFIG_AS_AVX2
+	/* derived keys r^3, r^4 set? */
+	bool wset;
+#endif
+	/* derived Poly1305 key r^2 */
+	u32 u[5];
+	/* ... silently appended r^3 and r^4 when using AVX2 */
+};
+
+asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
+				    const u32 *r, unsigned int blocks);
+asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
+				     unsigned int blocks, const u32 *u);
+#ifdef CONFIG_AS_AVX2
+asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
+				     unsigned int blocks, const u32 *u);
+static bool poly1305_use_avx2;
+#endif
+
+static int poly1305_simd_init(struct shash_desc *desc)
+{
+	struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->uset = false;
+#ifdef CONFIG_AS_AVX2
+	sctx->wset = false;
+#endif
+
+	return crypto_poly1305_init(desc);
+}
+
+static void poly1305_simd_mult(u32 *a, const u32 *b)
+{
+	u8 m[POLY1305_BLOCK_SIZE];
+
+	memset(m, 0, sizeof(m));
+	/* The poly1305 block function adds a hi-bit to the accumulator which
+	 * we don't need for key multiplication; compensate for it. */
+	a[4] -= 1 << 24;
+	poly1305_block_sse2(a, m, b, 1);
+}
+
+static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
+					 const u8 *src, unsigned int srclen)
+{
+	struct poly1305_simd_desc_ctx *sctx;
+	unsigned int blocks, datalen;
+
+	BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
+	sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
+
+	if (unlikely(!dctx->sset)) {
+		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+		src += srclen - datalen;
+		srclen = datalen;
+	}
+
+#ifdef CONFIG_AS_AVX2
+	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
+		if (unlikely(!sctx->wset)) {
+			if (!sctx->uset) {
+				memcpy(sctx->u, dctx->r, sizeof(sctx->u));
+				poly1305_simd_mult(sctx->u, dctx->r);
+				sctx->uset = true;
+			}
+			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u + 5, dctx->r);
+			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u + 10, dctx->r);
+			sctx->wset = true;
+		}
+		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
+		poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+		src += POLY1305_BLOCK_SIZE * 4 * blocks;
+		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
+	}
+#endif
+	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
+		if (unlikely(!sctx->uset)) {
+			memcpy(sctx->u, dctx->r, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u, dctx->r);
+			sctx->uset = true;
+		}
+		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
+		poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+		src += POLY1305_BLOCK_SIZE * 2 * blocks;
+		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
+	}
+	if (srclen >= POLY1305_BLOCK_SIZE) {
+		poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+		srclen -= POLY1305_BLOCK_SIZE;
+	}
+	return srclen;
+}
+
+static int poly1305_simd_update(struct shash_desc *desc,
+				const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int bytes;
+
+	/* kernel_fpu_begin/end is costly, use fallback for small updates */
+	if (srclen <= 288 || !may_use_simd())
+		return crypto_poly1305_update(desc, src, srclen);
+
+	kernel_fpu_begin();
+
+	if (unlikely(dctx->buflen)) {
+		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		srclen -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_simd_blocks(dctx, dctx->buf,
+					     POLY1305_BLOCK_SIZE);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+		bytes = poly1305_simd_blocks(dctx, src, srclen);
+		src += srclen - bytes;
+		srclen = bytes;
+	}
+
+	kernel_fpu_end();
+
+	if (unlikely(srclen)) {
+		dctx->buflen = srclen;
+		memcpy(dctx->buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	= POLY1305_DIGEST_SIZE,
+	.init		= poly1305_simd_init,
+	.update		= poly1305_simd_update,
+	.final		= crypto_poly1305_final,
+	.setkey		= crypto_poly1305_setkey,
+	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
+	.base		= {
+		.cra_name		= "poly1305",
+		.cra_driver_name	= "poly1305-simd",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_blocksize		= POLY1305_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init poly1305_simd_mod_init(void)
+{
+	if (!cpu_has_xmm2)
+		return -ENODEV;
+
+#ifdef CONFIG_AS_AVX2
+	poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+			    cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
+	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
+	if (poly1305_use_avx2)
+		alg.descsize += 10 * sizeof(u32);
+#endif
+	return crypto_register_shash(&alg);
+}
+
+static void __exit poly1305_simd_mod_exit(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(poly1305_simd_mod_init);
+module_exit(poly1305_simd_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("Poly1305 authenticator");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-simd");
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3bb2c43..8cb3e43 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1237,11 +1237,12 @@ ENTRY(nmi)
 	 *  If the variable is not set and the stack is not the NMI
 	 *  stack then:
 	 *    o Set the special variable on the stack
-	 *    o Copy the interrupt frame into a "saved" location on the stack
-	 *    o Copy the interrupt frame into a "copy" location on the stack
+	 *    o Copy the interrupt frame into an "outermost" location on the
+	 *      stack
+	 *    o Copy the interrupt frame into an "iret" location on the stack
 	 *    o Continue processing the NMI
 	 *  If the variable is set or the previous stack is the NMI stack:
-	 *    o Modify the "copy" location to jump to the repeate_nmi
+	 *    o Modify the "iret" location to jump to the repeat_nmi
 	 *    o return back to the first NMI
 	 *
 	 * Now on exit of the first NMI, we first clear the stack variable
@@ -1250,31 +1251,151 @@ ENTRY(nmi)
 	 * a nested NMI that updated the copy interrupt stack frame, a
 	 * jump will be made to the repeat_nmi code that will handle the second
 	 * NMI.
+	 *
+	 * However, espfix prevents us from directly returning to userspace
+	 * with a single IRET instruction.  Similarly, IRET to user mode
+	 * can fault.  We therefore handle NMIs from user space like
+	 * other IST entries.
 	 */
 
 	/* Use %rdx as our temp variable throughout */
 	pushq	%rdx
 
+	testb	$3, CS-RIP+8(%rsp)
+	jz	.Lnmi_from_kernel
+
+	/*
+	 * NMI from user mode.  We need to run on the thread stack, but we
+	 * can't go through the normal entry paths: NMIs are masked, and
+	 * we don't want to enable interrupts, because then we'll end
+	 * up in an awkward situation in which IRQs are on but NMIs
+	 * are off.
+	 */
+
+	SWAPGS
+	cld
+	movq	%rsp, %rdx
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	pushq	5*8(%rdx)	/* pt_regs->ss */
+	pushq	4*8(%rdx)	/* pt_regs->rsp */
+	pushq	3*8(%rdx)	/* pt_regs->flags */
+	pushq	2*8(%rdx)	/* pt_regs->cs */
+	pushq	1*8(%rdx)	/* pt_regs->rip */
+	pushq   $-1		/* pt_regs->orig_ax */
+	pushq   %rdi		/* pt_regs->di */
+	pushq   %rsi		/* pt_regs->si */
+	pushq   (%rdx)		/* pt_regs->dx */
+	pushq   %rcx		/* pt_regs->cx */
+	pushq   %rax		/* pt_regs->ax */
+	pushq   %r8		/* pt_regs->r8 */
+	pushq   %r9		/* pt_regs->r9 */
+	pushq   %r10		/* pt_regs->r10 */
+	pushq   %r11		/* pt_regs->r11 */
+	pushq	%rbx		/* pt_regs->rbx */
+	pushq	%rbp		/* pt_regs->rbp */
+	pushq	%r12		/* pt_regs->r12 */
+	pushq	%r13		/* pt_regs->r13 */
+	pushq	%r14		/* pt_regs->r14 */
+	pushq	%r15		/* pt_regs->r15 */
+
+	/*
+	 * At this point we no longer need to worry about stack damage
+	 * due to nesting -- we're on the normal thread stack and we're
+	 * done with the NMI stack.
+	 */
+
+	movq	%rsp, %rdi
+	movq	$-1, %rsi
+	call	do_nmi
+
+	/*
+	 * Return back to user mode.  We must *not* do the normal exit
+	 * work, because we don't want to enable interrupts.  Fortunately,
+	 * do_nmi doesn't modify pt_regs.
+	 */
+	SWAPGS
+	jmp	restore_c_regs_and_iret
+
+.Lnmi_from_kernel:
+	/*
+	 * Here's what our stack frame will look like:
+	 * +---------------------------------------------------------+
+	 * | original SS                                             |
+	 * | original Return RSP                                     |
+	 * | original RFLAGS                                         |
+	 * | original CS                                             |
+	 * | original RIP                                            |
+	 * +---------------------------------------------------------+
+	 * | temp storage for rdx                                    |
+	 * +---------------------------------------------------------+
+	 * | "NMI executing" variable                                |
+	 * +---------------------------------------------------------+
+	 * | iret SS          } Copied from "outermost" frame        |
+	 * | iret Return RSP  } on each loop iteration; overwritten  |
+	 * | iret RFLAGS      } by a nested NMI to force another     |
+	 * | iret CS          } iteration if needed.                 |
+	 * | iret RIP         }                                      |
+	 * +---------------------------------------------------------+
+	 * | outermost SS          } initialized in first_nmi;       |
+	 * | outermost Return RSP  } will not be changed before      |
+	 * | outermost RFLAGS      } NMI processing is done.         |
+	 * | outermost CS          } Copied to "iret" frame on each  |
+	 * | outermost RIP         } iteration.                      |
+	 * +---------------------------------------------------------+
+	 * | pt_regs                                                 |
+	 * +---------------------------------------------------------+
+	 *
+	 * The "original" frame is used by hardware.  Before re-enabling
+	 * NMIs, we need to be done with it, and we need to leave enough
+	 * space for the asm code here.
+	 *
+	 * We return by executing IRET while RSP points to the "iret" frame.
+	 * That will either return for real or it will loop back into NMI
+	 * processing.
+	 *
+	 * The "outermost" frame is copied to the "iret" frame on each
+	 * iteration of the loop, so each iteration starts with the "iret"
+	 * frame pointing to the final return target.
+	 */
+
 	/*
-	 * If %cs was not the kernel segment, then the NMI triggered in user
-	 * space, which means it is definitely not nested.
+	 * Determine whether we're a nested NMI.
+	 *
+	 * If we interrupted kernel code between repeat_nmi and
+	 * end_repeat_nmi, then we are a nested NMI.  We must not
+	 * modify the "iret" frame because it's being written by
+	 * the outer NMI.  That's okay; the outer NMI handler is
+	 * about to about to call do_nmi anyway, so we can just
+	 * resume the outer NMI.
 	 */
-	cmpl	$__KERNEL_CS, 16(%rsp)
-	jne	first_nmi
+
+	movq	$repeat_nmi, %rdx
+	cmpq	8(%rsp), %rdx
+	ja	1f
+	movq	$end_repeat_nmi, %rdx
+	cmpq	8(%rsp), %rdx
+	ja	nested_nmi_out
+1:
 
 	/*
-	 * Check the special variable on the stack to see if NMIs are
-	 * executing.
+	 * Now check "NMI executing".  If it's set, then we're nested.
+	 * This will not detect if we interrupted an outer NMI just
+	 * before IRET.
 	 */
 	cmpl	$1, -8(%rsp)
 	je	nested_nmi
 
 	/*
-	 * Now test if the previous stack was an NMI stack.
-	 * We need the double check. We check the NMI stack to satisfy the
-	 * race when the first NMI clears the variable before returning.
-	 * We check the variable because the first NMI could be in a
-	 * breakpoint routine using a breakpoint stack.
+	 * Now test if the previous stack was an NMI stack.  This covers
+	 * the case where we interrupt an outer NMI after it clears
+	 * "NMI executing" but before IRET.  We need to be careful, though:
+	 * there is one case in which RSP could point to the NMI stack
+	 * despite there being no NMI active: naughty userspace controls
+	 * RSP at the very beginning of the SYSCALL targets.  We can
+	 * pull a fast one on naughty userspace, though: we program
+	 * SYSCALL to mask DF, so userspace cannot cause DF to be set
+	 * if it controls the kernel's RSP.  We set DF before we clear
+	 * "NMI executing".
 	 */
 	lea	6*8(%rsp), %rdx
 	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
@@ -1286,25 +1407,20 @@ ENTRY(nmi)
 	cmpq	%rdx, 4*8(%rsp)
 	/* If it is below the NMI stack, it is a normal NMI */
 	jb	first_nmi
-	/* Ah, it is within the NMI stack, treat it as nested */
+
+	/* Ah, it is within the NMI stack. */
+
+	testb	$(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
+	jz	first_nmi	/* RSP was user controlled. */
+
+	/* This is a nested NMI. */
 
 nested_nmi:
 	/*
-	 * Do nothing if we interrupted the fixup in repeat_nmi.
-	 * It's about to repeat the NMI handler, so we are fine
-	 * with ignoring this one.
+	 * Modify the "iret" frame to point to repeat_nmi, forcing another
+	 * iteration of NMI handling.
 	 */
-	movq	$repeat_nmi, %rdx
-	cmpq	8(%rsp), %rdx
-	ja	1f
-	movq	$end_repeat_nmi, %rdx
-	cmpq	8(%rsp), %rdx
-	ja	nested_nmi_out
-
-1:
-	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
-	leaq	-1*8(%rsp), %rdx
-	movq	%rdx, %rsp
+	subq	$8, %rsp
 	leaq	-10*8(%rsp), %rdx
 	pushq	$__KERNEL_DS
 	pushq	%rdx
@@ -1318,61 +1434,42 @@ nested_nmi:
 nested_nmi_out:
 	popq	%rdx
 
-	/* No need to check faults here */
+	/* We are returning to kernel mode, so this cannot result in a fault. */
 	INTERRUPT_RETURN
 
 first_nmi:
-	/*
-	 * Because nested NMIs will use the pushed location that we
-	 * stored in rdx, we must keep that space available.
-	 * Here's what our stack frame will look like:
-	 * +-------------------------+
-	 * | original SS             |
-	 * | original Return RSP     |
-	 * | original RFLAGS         |
-	 * | original CS             |
-	 * | original RIP            |
-	 * +-------------------------+
-	 * | temp storage for rdx    |
-	 * +-------------------------+
-	 * | NMI executing variable  |
-	 * +-------------------------+
-	 * | copied SS               |
-	 * | copied Return RSP       |
-	 * | copied RFLAGS           |
-	 * | copied CS               |
-	 * | copied RIP              |
-	 * +-------------------------+
-	 * | Saved SS                |
-	 * | Saved Return RSP        |
-	 * | Saved RFLAGS            |
-	 * | Saved CS                |
-	 * | Saved RIP               |
-	 * +-------------------------+
-	 * | pt_regs                 |
-	 * +-------------------------+
-	 *
-	 * The saved stack frame is used to fix up the copied stack frame
-	 * that a nested NMI may change to make the interrupted NMI iret jump
-	 * to the repeat_nmi. The original stack frame and the temp storage
-	 * is also used by nested NMIs and can not be trusted on exit.
-	 */
-	/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
+	/* Restore rdx. */
 	movq	(%rsp), %rdx
 
-	/* Set the NMI executing variable on the stack. */
-	pushq	$1
+	/* Make room for "NMI executing". */
+	pushq	$0
 
-	/* Leave room for the "copied" frame */
+	/* Leave room for the "iret" frame */
 	subq	$(5*8), %rsp
 
-	/* Copy the stack frame to the Saved frame */
+	/* Copy the "original" frame to the "outermost" frame */
 	.rept 5
 	pushq	11*8(%rsp)
 	.endr
 
 	/* Everything up to here is safe from nested NMIs */
 
+#ifdef CONFIG_DEBUG_ENTRY
+	/*
+	 * For ease of testing, unmask NMIs right away.  Disabled by
+	 * default because IRET is very expensive.
+	 */
+	pushq	$0		/* SS */
+	pushq	%rsp		/* RSP (minus 8 because of the previous push) */
+	addq	$8, (%rsp)	/* Fix up RSP */
+	pushfq			/* RFLAGS */
+	pushq	$__KERNEL_CS	/* CS */
+	pushq	$1f		/* RIP */
+	INTERRUPT_RETURN	/* continues at repeat_nmi below */
+1:
+#endif
+
+repeat_nmi:
 	/*
 	 * If there was a nested NMI, the first NMI's iret will return
 	 * here. But NMIs are still enabled and we can take another
@@ -1381,16 +1478,20 @@ first_nmi:
 	 * it will just return, as we are about to repeat an NMI anyway.
 	 * This makes it safe to copy to the stack frame that a nested
 	 * NMI will update.
+	 *
+	 * RSP is pointing to "outermost RIP".  gsbase is unknown, but, if
+	 * we're repeating an NMI, gsbase has the same value that it had on
+	 * the first iteration.  paranoid_entry will load the kernel
+	 * gsbase if needed before we call do_nmi.  "NMI executing"
+	 * is zero.
 	 */
-repeat_nmi:
+	movq	$1, 10*8(%rsp)		/* Set "NMI executing". */
+
 	/*
-	 * Update the stack variable to say we are still in NMI (the update
-	 * is benign for the non-repeat case, where 1 was pushed just above
-	 * to this very stack slot).
+	 * Copy the "outermost" frame to the "iret" frame.  NMIs that nest
+	 * here must not modify the "iret" frame while we're writing to
+	 * it or it will end up containing garbage.
 	 */
-	movq	$1, 10*8(%rsp)
-
-	/* Make another copy, this one may be modified by nested NMIs */
 	addq	$(10*8), %rsp
 	.rept 5
 	pushq	-6*8(%rsp)
@@ -1399,9 +1500,9 @@ repeat_nmi:
 end_repeat_nmi:
 
 	/*
-	 * Everything below this point can be preempted by a nested
-	 * NMI if the first NMI took an exception and reset our iret stack
-	 * so that we repeat another NMI.
+	 * Everything below this point can be preempted by a nested NMI.
+	 * If this happens, then the inner NMI will change the "iret"
+	 * frame to point back to repeat_nmi.
 	 */
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	ALLOC_PT_GPREGS_ON_STACK
@@ -1415,28 +1516,11 @@ end_repeat_nmi:
 	 */
 	call	paranoid_entry
 
-	/*
-	 * Save off the CR2 register. If we take a page fault in the NMI then
-	 * it could corrupt the CR2 value. If the NMI preempts a page fault
-	 * handler before it was able to read the CR2 register, and then the
-	 * NMI itself takes a page fault, the page fault that was preempted
-	 * will read the information from the NMI page fault and not the
-	 * origin fault. Save it off and restore it if it changes.
-	 * Use the r12 callee-saved register.
-	 */
-	movq	%cr2, %r12
-
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq	%rsp, %rdi
 	movq	$-1, %rsi
 	call	do_nmi
 
-	/* Did the NMI take a page fault? Restore cr2 if it did */
-	movq	%cr2, %rcx
-	cmpq	%rcx, %r12
-	je	1f
-	movq	%r12, %cr2
-1:
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	nmi_restore
 nmi_swapgs:
@@ -1444,11 +1528,26 @@ nmi_swapgs:
 nmi_restore:
 	RESTORE_EXTRA_REGS
 	RESTORE_C_REGS
-	/* Pop the extra iret frame at once */
+
+	/* Point RSP at the "iret" frame. */
 	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
-	/* Clear the NMI executing stack variable */
-	movq	$0, 5*8(%rsp)
+	/*
+	 * Clear "NMI executing".  Set DF first so that we can easily
+	 * distinguish the remaining code between here and IRET from
+	 * the SYSCALL entry and exit paths.  On a native kernel, we
+	 * could just inspect RIP, but, on paravirt kernels,
+	 * INTERRUPT_RETURN can translate into a jump into a
+	 * hypercall page.
+	 */
+	std
+	movq	$0, 5*8(%rsp)		/* clear "NMI executing" */
+
+	/*
+	 * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
+	 * stack in a single instruction.  We are returning to kernel
+	 * mode, so this cannot result in a fault.
+	 */
 	INTERRUPT_RETURN
 END(nmi)
 
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index bb187a6..a7e257d 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -140,6 +140,7 @@ sysexit_from_sys_call:
 	 */
 	andl	$~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
 	movl	RIP(%rsp), %ecx		/* User %eip */
+	movq    RAX(%rsp), %rax
 	RESTORE_RSI_RDI
 	xorl	%edx, %edx		/* Do not leak kernel information */
 	xorq	%r8, %r8
@@ -205,7 +206,6 @@ sysexit_from_sys_call:
 	movl	RDX(%rsp), %edx		/* arg3 */
 	movl	RSI(%rsp), %ecx		/* arg4 */
 	movl	RDI(%rsp), %r8d		/* arg5 */
-	movl	%ebp, %r9d		/* arg6 */
 	.endm
 
 	.macro auditsys_exit exit
@@ -220,7 +220,6 @@ sysexit_from_sys_call:
 1:	setbe	%al			/* 1 if error, 0 if not */
 	movzbl	%al, %edi		/* zero-extend that into %edi */
 	call	__audit_syscall_exit
-	movq	RAX(%rsp), %rax		/* reload syscall return value */
 	movl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -236,6 +235,7 @@ sysexit_from_sys_call:
 
 sysenter_auditsys:
 	auditsys_entry_common
+	movl	%ebp, %r9d		/* reload 6th syscall arg */
 	jmp	sysenter_dispatch
 
 sysexit_audit:
@@ -336,7 +336,7 @@ ENTRY(entry_SYSCALL_compat)
 	 * 32-bit zero extended:
 	 */
 	ASM_STAC
-1:	movl	(%r8), %ebp
+1:	movl	(%r8), %r9d
 	_ASM_EXTABLE(1b, ia32_badarg)
 	ASM_CLAC
 	orl	$TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
@@ -346,7 +346,7 @@ ENTRY(entry_SYSCALL_compat)
 cstar_do_call:
 	/* 32-bit syscall -> 64-bit C ABI argument conversion */
 	movl	%edi, %r8d		/* arg5 */
-	movl	%ebp, %r9d		/* arg6 */
+	/* r9 already loaded */		/* arg6 */
 	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
 	movl	%ebx, %edi		/* arg1 */
 	movl	%edx, %edx		/* arg3 (zero extension) */
@@ -358,7 +358,6 @@ cstar_dispatch:
 	call	*ia32_sys_call_table(, %rax, 8)
 	movq	%rax, RAX(%rsp)
 1:
-	movl	RCX(%rsp), %ebp
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
@@ -369,6 +368,7 @@ sysretl_from_sys_call:
 	RESTORE_RSI_RDI_RDX
 	movl	RIP(%rsp), %ecx
 	movl	EFLAGS(%rsp), %r11d
+	movq    RAX(%rsp), %rax
 	xorq	%r10, %r10
 	xorq	%r9, %r9
 	xorq	%r8, %r8
@@ -392,7 +392,9 @@ sysretl_from_sys_call:
 
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
+	movl	%r9d, R9(%rsp)		/* register to be clobbered by call */
 	auditsys_entry_common
+	movl	R9(%rsp), %r9d		/* reload 6th syscall arg */
 	jmp	cstar_dispatch
 
 sysretl_audit:
@@ -404,14 +406,16 @@ cstar_tracesys:
 	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz	cstar_auditsys
 #endif
+	xchgl	%r9d, %ebp
 	SAVE_EXTRA_REGS
 	xorl	%eax, %eax		/* Do not leak kernel information */
 	movq	%rax, R11(%rsp)
 	movq	%rax, R10(%rsp)
-	movq	%rax, R9(%rsp)
+	movq	%r9, R9(%rsp)
 	movq	%rax, R8(%rsp)
 	movq	%rsp, %rdi		/* &pt_regs -> arg1 */
 	call	syscall_trace_enter
+	movl	R9(%rsp), %r9d
 
 	/* Reload arg registers from stack. (see sysenter_tracesys) */
 	movl	RCX(%rsp), %ecx
@@ -421,6 +425,7 @@ cstar_tracesys:
 	movl	%eax, %eax		/* zero extension */
 
 	RESTORE_EXTRA_REGS
+	xchgl	%ebp, %r9d
 	jmp	cstar_do_call
 END(entry_SYSCALL_compat)
 
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4dd1f2d..aeac434 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -9,3 +9,4 @@ generic-y += cputime.h
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 9686c3d..259a7c1 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -21,7 +21,7 @@
  * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
  * compiler switches.
  */
-static inline unsigned int __arch_hweight32(unsigned int w)
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res = 0;
 
@@ -42,20 +42,23 @@ static inline unsigned int __arch_hweight8(unsigned int w)
 	return __arch_hweight32(w & 0xff);
 }
 
+#ifdef CONFIG_X86_32
 static inline unsigned long __arch_hweight64(__u64 w)
 {
-	unsigned long res = 0;
-
-#ifdef CONFIG_X86_32
 	return  __arch_hweight32((u32)w) +
 		__arch_hweight32((u32)(w >> 32));
+}
 #else
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+	unsigned long res = 0;
+
 	asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
 		     : "="REG_OUT (res)
 		     : REG_IN (w));
-#endif /* CONFIG_X86_32 */
 
 	return res;
 }
+#endif /* CONFIG_X86_32 */
 
 #endif
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a0bf89f..4e10d73 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
 	set_ldt(NULL, 0);
 }
 
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
-	set_ldt(pc->ldt, pc->size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
-	preempt_disable();
-	load_LDT_nolock(pc);
-	preempt_enable();
-}
-
 static inline unsigned long get_desc_base(const struct desc_struct *desc)
 {
 	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 99efebb..ca3ce9a 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
 DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
-extern void init_espfix_ap(void);
+extern void init_espfix_ap(int cpu);
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 0637826..c49c517 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -189,6 +189,7 @@ union fpregs_state {
 	struct fxregs_state		fxsave;
 	struct swregs_state		soft;
 	struct xregs_state		xsave;
+	u8 __padding[PAGE_SIZE];
 };
 
 /*
@@ -198,40 +199,6 @@ union fpregs_state {
  */
 struct fpu {
 	/*
-	 * @state:
-	 *
-	 * In-memory copy of all FPU registers that we save/restore
-	 * over context switches. If the task is using the FPU then
-	 * the registers in the FPU are more recent than this state
-	 * copy. If the task context-switches away then they get
-	 * saved here and represent the FPU state.
-	 *
-	 * After context switches there may be a (short) time period
-	 * during which the in-FPU hardware registers are unchanged
-	 * and still perfectly match this state, if the tasks
-	 * scheduled afterwards are not using the FPU.
-	 *
-	 * This is the 'lazy restore' window of optimization, which
-	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
-	 *
-	 * We detect whether a subsequent task uses the FPU via setting
-	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
-	 *
-	 * During this window, if the task gets scheduled again, we
-	 * might be able to skip having to do a restore from this
-	 * memory buffer to the hardware registers - at the cost of
-	 * incurring the overhead of #NM fault traps.
-	 *
-	 * Note that on modern CPUs that support the XSAVEOPT (or other
-	 * optimized XSAVE instructions), we don't use #NM traps anymore,
-	 * as the hardware can track whether FPU registers need saving
-	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
-	 * logic, which unconditionally saves/restores all FPU state
-	 * across context switches. (if FPU state exists.)
-	 */
-	union fpregs_state		state;
-
-	/*
 	 * @last_cpu:
 	 *
 	 * Records the last CPU on which this context was loaded into
@@ -288,6 +255,43 @@ struct fpu {
 	 * deal with bursty apps that only use the FPU for a short time:
 	 */
 	unsigned char			counter;
+	/*
+	 * @state:
+	 *
+	 * In-memory copy of all FPU registers that we save/restore
+	 * over context switches. If the task is using the FPU then
+	 * the registers in the FPU are more recent than this state
+	 * copy. If the task context-switches away then they get
+	 * saved here and represent the FPU state.
+	 *
+	 * After context switches there may be a (short) time period
+	 * during which the in-FPU hardware registers are unchanged
+	 * and still perfectly match this state, if the tasks
+	 * scheduled afterwards are not using the FPU.
+	 *
+	 * This is the 'lazy restore' window of optimization, which
+	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+	 *
+	 * We detect whether a subsequent task uses the FPU via setting
+	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+	 *
+	 * During this window, if the task gets scheduled again, we
+	 * might be able to skip having to do a restore from this
+	 * memory buffer to the hardware registers - at the cost of
+	 * incurring the overhead of #NM fault traps.
+	 *
+	 * Note that on modern CPUs that support the XSAVEOPT (or other
+	 * optimized XSAVE instructions), we don't use #NM traps anymore,
+	 * as the hardware can track whether FPU registers need saving
+	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+	 * logic, which unconditionally saves/restores all FPU state
+	 * across context switches. (if FPU state exists.)
+	 */
+	union fpregs_state		state;
+	/*
+	 * WARNING: 'state' is dynamically-sized.  Do not put
+	 * anything after it here.
+	 */
 };
 
 #endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
index 200ec2e..cd0310e 100644
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ b/arch/x86/include/asm/intel_pmc_ipc.h
@@ -25,36 +25,9 @@
 
 #if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
 
-/*
- * intel_pmc_ipc_simple_command
- * @cmd: command
- * @sub: sub type
- */
 int intel_pmc_ipc_simple_command(int cmd, int sub);
-
-/*
- * intel_pmc_ipc_raw_cmd
- * @cmd: command
- * @sub: sub type
- * @in: input data
- * @inlen: input length in bytes
- * @out: output data
- * @outlen: output length in dwords
- * @sptr: data writing to SPTR register
- * @dptr: data writing to DPTR register
- */
 int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
 		u32 *out, u32 outlen, u32 dptr, u32 sptr);
-
-/*
- * intel_pmc_ipc_command
- * @cmd: command
- * @sub: sub type
- * @in: input data
- * @inlen: input length in bytes
- * @out: output data
- * @outlen: output length in dwords
- */
 int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
 		u32 *out, u32 outlen);
 
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 8b22422..1410b56 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -1,6 +1,9 @@
 #ifndef _ASM_X86_KASAN_H
 #define _ASM_X86_KASAN_H
 
+#include <linux/const.h>
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
 /*
  * Compiler uses shadow offset assuming that addresses start
  * from 0. Kernel addresses don't start from 0, so shadow
@@ -14,15 +17,11 @@
 
 #ifndef __ASSEMBLY__
 
-extern pte_t kasan_zero_pte[];
-extern pte_t kasan_zero_pmd[];
-extern pte_t kasan_zero_pud[];
-
 #ifdef CONFIG_KASAN
-void __init kasan_map_early_shadow(pgd_t *pgd);
+void __init kasan_early_init(void);
 void __init kasan_init(void);
 #else
-static inline void kasan_map_early_shadow(pgd_t *pgd) { }
+static inline void kasan_early_init(void) { }
 static inline void kasan_init(void) { }
 #endif
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2a7f5d7..c12e845 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -252,6 +252,11 @@ struct kvm_pio_request {
 	int size;
 };
 
+struct rsvd_bits_validate {
+	u64 rsvd_bits_mask[2][4];
+	u64 bad_mt_xwr;
+};
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -289,8 +294,15 @@ struct kvm_mmu {
 
 	u64 *pae_root;
 	u64 *lm_root;
-	u64 rsvd_bits_mask[2][4];
-	u64 bad_mt_xwr;
+
+	/*
+	 * check zero bits on shadow page table entries, these
+	 * bits include not only hardware reserved bits but also
+	 * the bits spte never used.
+	 */
+	struct rsvd_bits_validate shadow_zero_check;
+
+	struct rsvd_bits_validate guest_rsvd_check;
 
 	/*
 	 * Bitmap: bit set = last pte in walk
@@ -358,6 +370,11 @@ struct kvm_mtrr {
 	struct list_head head;
 };
 
+/* Hyper-V per vcpu emulation context */
+struct kvm_vcpu_hv {
+	u64 hv_vapic;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -514,8 +531,7 @@ struct kvm_vcpu_arch {
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
-	/* fields used by HYPER-V emulation */
-	u64 hv_vapic;
+	struct kvm_vcpu_hv hyperv;
 
 	cpumask_var_t wbinvd_dirty_mask;
 
@@ -586,6 +602,17 @@ struct kvm_apic_map {
 	struct kvm_lapic *logical_map[16][16];
 };
 
+/* Hyper-V emulation context */
+struct kvm_hv {
+	u64 hv_guest_os_id;
+	u64 hv_hypercall;
+	u64 hv_tsc_page;
+
+	/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
+	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
+	u64 hv_crash_ctl;
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -604,6 +631,8 @@ struct kvm_arch {
 	bool iommu_noncoherent;
 #define __KVM_HAVE_ARCH_NONCOHERENT_DMA
 	atomic_t noncoherent_dma_count;
+#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
+	atomic_t assigned_device_count;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -643,16 +672,14 @@ struct kvm_arch {
 	/* reads protected by irq_srcu, writes by irq_lock */
 	struct hlist_head mask_notifier_list;
 
-	/* fields used by HYPER-V emulation */
-	u64 hv_guest_os_id;
-	u64 hv_hypercall;
-	u64 hv_tsc_page;
+	struct kvm_hv hyperv;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
 	#endif
 
 	bool boot_vcpu_runs_old_kvmclock;
+	u32 bsp_vcpu_id;
 
 	u64 disabled_quirks;
 };
@@ -1201,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 			    const struct kvm_userspace_memory_region *mem);
 int x86_set_memory_region(struct kvm *kvm,
 			  const struct kvm_userspace_memory_region *mem);
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 982dfc3..2dbc0bf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,10 +151,12 @@ extern int mce_p5_enabled;
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
+void mcheck_cpu_clear(struct cpuinfo_x86 *c);
 void mcheck_vendor_init_severity(void);
 #else
 static inline int mcheck_init(void) { return 0; }
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
 static inline void mcheck_vendor_init_severity(void) {}
 #endif
 
@@ -181,20 +183,18 @@ DECLARE_PER_CPU(struct device *, mce_device);
 
 #ifdef CONFIG_X86_MCE_INTEL
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
+void mce_intel_feature_clear(struct cpuinfo_x86 *c);
 void cmci_clear(void);
 void cmci_reenable(void);
 void cmci_rediscover(void);
 void cmci_recheck(void);
-void lmce_clear(void);
-void lmce_enable(void);
 #else
 static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
+static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { }
 static inline void cmci_clear(void) {}
 static inline void cmci_reenable(void) {}
 static inline void cmci_rediscover(void) {}
 static inline void cmci_recheck(void) {}
-static inline void lmce_clear(void) {}
-static inline void lmce_enable(void) {}
 #endif
 
 #ifdef CONFIG_X86_MCE_AMD
diff --git a/arch/x86/include/asm/mm-arch-hooks.h b/arch/x86/include/asm/mm-arch-hooks.h
deleted file mode 100644
index 4e881a3..0000000
--- a/arch/x86/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_X86_MM_ARCH_HOOKS_H
-#define _ASM_X86_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_X86_MM_ARCH_HOOKS_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 09b9620..364d274 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,8 +9,7 @@
  * we put the segment information here.
  */
 typedef struct {
-	void *ldt;
-	int size;
+	struct ldt_struct *ldt;
 
 #ifdef CONFIG_X86_64
 	/* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5e8daee..984abfe 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -23,7 +23,7 @@ extern struct static_key rdpmc_always_available;
 
 static inline void load_mm_cr4(struct mm_struct *mm)
 {
-	if (static_key_true(&rdpmc_always_available) ||
+	if (static_key_false(&rdpmc_always_available) ||
 	    atomic_read(&mm->context.perf_rdpmc_allowed))
 		cr4_set_bits(X86_CR4_PCE);
 	else
@@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) {}
 #endif
 
 /*
+ * ldt_structs can be allocated, used, and freed, but they are never
+ * modified while live.
+ */
+struct ldt_struct {
+	/*
+	 * Xen requires page-aligned LDTs with special permissions.  This is
+	 * needed to prevent us from installing evil descriptors such as
+	 * call gates.  On native, we could merge the ldt_struct and LDT
+	 * allocations, but it's not worth trying to optimize.
+	 */
+	struct desc_struct *entries;
+	int size;
+};
+
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
+	struct ldt_struct *ldt;
+
+	/* lockless_dereference synchronizes with smp_store_release */
+	ldt = lockless_dereference(mm->context.ldt);
+
+	/*
+	 * Any change to mm->context.ldt is followed by an IPI to all
+	 * CPUs with the mm active.  The LDT will not be freed until
+	 * after the IPI is handled by all such CPUs.  This means that,
+	 * if the ldt_struct changes before we return, the values we see
+	 * will be safe, and the new values will be loaded before we run
+	 * any user code.
+	 *
+	 * NB: don't try to convert this to use RCU without extreme care.
+	 * We would still need IRQs off, because we don't want to change
+	 * the local LDT after an IPI loaded a newer value than the one
+	 * that we can see.
+	 */
+
+	if (unlikely(ldt))
+		set_ldt(ldt->entries, ldt->size);
+	else
+		clear_LDT();
+
+	DEBUG_LOCKS_WARN_ON(preemptible());
+}
+
+/*
  * Used for LDT copy/destruction.
  */
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		 * was called and then modify_ldt changed
 		 * prev->context.ldt but suppressed an IPI to this CPU.
 		 * In this case, prev->context.ldt != NULL, because we
-		 * never free an LDT while the mm still exists.  That
-		 * means that next->context.ldt != prev->context.ldt,
-		 * because mms never share an LDT.
+		 * never set context.ldt to NULL while the mm still
+		 * exists.  That means that next->context.ldt !=
+		 * prev->context.ldt, because mms never share an LDT.
 		 */
 		if (unlikely(prev->context.ldt != next->context.ldt))
-			load_LDT_nolock(&next->context);
+			load_mm_ldt(next);
 	}
 #ifdef CONFIG_SMP
 	  else {
@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			load_cr3(next->pgd);
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 			load_mm_cr4(next);
-			load_LDT_nolock(&next->context);
+			load_mm_ldt(next);
 		}
 	}
 #endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c163215..aaf59b7 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
 
 struct ms_hyperv_info {
 	u32 features;
+	u32 misc_features;
 	u32 hints;
 };
 
@@ -20,4 +21,8 @@ void hyperv_vector_handler(struct pt_regs *regs);
 void hv_setup_vmbus_irq(void (*handler)(void));
 void hv_remove_vmbus_irq(void);
 
+void hv_setup_kexec_handler(void (*handler)(void));
+void hv_remove_kexec_handler(void);
+void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
+void hv_remove_crash_handler(void);
 #endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 9ebc3d0..fcd17c1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -73,6 +73,12 @@
 #define MSR_LBR_CORE_FROM		0x00000040
 #define MSR_LBR_CORE_TO			0x00000060
 
+#define MSR_LBR_INFO_0			0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED		BIT_ULL(63)
+#define LBR_INFO_IN_TX			BIT_ULL(62)
+#define LBR_INFO_ABORT			BIT_ULL(61)
+#define LBR_INFO_CYCLES			0xffff
+
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
@@ -80,13 +86,21 @@
 
 #define MSR_IA32_RTIT_CTL		0x00000570
 #define RTIT_CTL_TRACEEN		BIT(0)
+#define RTIT_CTL_CYCLEACC		BIT(1)
 #define RTIT_CTL_OS			BIT(2)
 #define RTIT_CTL_USR			BIT(3)
 #define RTIT_CTL_CR3EN			BIT(7)
 #define RTIT_CTL_TOPA			BIT(8)
+#define RTIT_CTL_MTC_EN			BIT(9)
 #define RTIT_CTL_TSC_EN			BIT(10)
 #define RTIT_CTL_DISRETC		BIT(11)
 #define RTIT_CTL_BRANCH_EN		BIT(13)
+#define RTIT_CTL_MTC_RANGE_OFFSET	14
+#define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
+#define RTIT_CTL_CYC_THRESH_OFFSET	19
+#define RTIT_CTL_CYC_THRESH		(0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
+#define RTIT_CTL_PSB_FREQ_OFFSET	24
+#define RTIT_CTL_PSB_FREQ      		(0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
 #define MSR_IA32_RTIT_STATUS		0x00000571
 #define RTIT_STATUS_CONTEXTEN		BIT(1)
 #define RTIT_STATUS_TRIGGEREN		BIT(2)
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 164e3f8..fa1195d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,8 +93,6 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
-extern bool mp_should_keep_irq(struct device *dev);
-
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dc0f6ed..7bcb861 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -159,6 +159,13 @@ struct x86_pmu_capability {
  */
 #define INTEL_PMC_IDX_FIXED_BTS				(INTEL_PMC_IDX_FIXED + 16)
 
+#define GLOBAL_STATUS_COND_CHG				BIT_ULL(63)
+#define GLOBAL_STATUS_BUFFER_OVF			BIT_ULL(62)
+#define GLOBAL_STATUS_UNC_OVF				BIT_ULL(61)
+#define GLOBAL_STATUS_ASIF				BIT_ULL(60)
+#define GLOBAL_STATUS_COUNTERS_FROZEN			BIT_ULL(59)
+#define GLOBAL_STATUS_LBRS_FROZEN			BIT_ULL(58)
+
 /*
  * IBS cpuid feature detection
  */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 43e6519..944f178 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -390,9 +390,6 @@ struct thread_struct {
 #endif
 	unsigned long		gs;
 
-	/* Floating point and extended processor state */
-	struct fpu		fpu;
-
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
 	/* Debug status used for traps, single steps, etc... */
@@ -418,6 +415,13 @@ struct thread_struct {
 	unsigned long		iopl;
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
+
+	/* Floating point and extended processor state */
+	struct fpu		fpu;
+	/*
+	 * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
+	 * the end.
+	 */
 };
 
 /*
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 6fe6b18..9dfce4e 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
 	unsigned long ip;
 	unsigned long flags;
 	unsigned short cs;
-	unsigned short __pad2;	/* Was called gs, but was always zero. */
-	unsigned short __pad1;	/* Was called fs, but was always zero. */
-	unsigned short ss;
+	unsigned short gs;
+	unsigned short fs;
+	unsigned short __pad0;
 	unsigned long err;
 	unsigned long trapno;
 	unsigned long oldmask;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b..d7f3b3b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,12 +79,12 @@ do {									\
 #else /* CONFIG_X86_32 */
 
 /* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
+#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
 
 #define __EXTRA_CLOBBER  \
 	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
-	  "r12", "r13", "r14", "r15", "flags"
+	  "r12", "r13", "r14", "r15"
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							  \
@@ -100,11 +100,7 @@ do {									\
 #define __switch_canary_iparam
 #endif	/* CC_STACKPROTECTOR */
 
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
- * has no effect.
- */
+/* Save restore flags to clear handle leaking NT */
 #define switch_to(prev, next, last) \
 	asm volatile(SAVE_CONTEXT					  \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 94605c0..aad56eb 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,7 @@ extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
 extern int check_tsc_disabled(void);
 extern unsigned long native_calibrate_tsc(void);
+extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
 
 extern int tsc_clocksource_reliable;
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da772ed..448b7ca 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -47,6 +47,7 @@
 #define CPU_BASED_MOV_DR_EXITING                0x00800000
 #define CPU_BASED_UNCOND_IO_EXITING             0x01000000
 #define CPU_BASED_USE_IO_BITMAPS                0x02000000
+#define CPU_BASED_MONITOR_TRAP_FLAG             0x08000000
 #define CPU_BASED_USE_MSR_BITMAPS               0x10000000
 #define CPU_BASED_MONITOR_EXITING               0x20000000
 #define CPU_BASED_PAUSE_EXITING                 0x40000000
@@ -367,29 +368,29 @@ enum vmcs_field {
 #define TYPE_PHYSICAL_APIC_EVENT        (10 << 12)
 #define TYPE_PHYSICAL_APIC_INST         (15 << 12)
 
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
+/* segment AR in VMCS -- these are different from what LAR reports */
+#define VMX_SEGMENT_AR_L_MASK (1 << 13)
+
+#define VMX_AR_TYPE_ACCESSES_MASK 1
+#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
+#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define VMX_AR_TYPE_CODE_MASK (1 << 3)
+#define VMX_AR_TYPE_MASK 0x0f
+#define VMX_AR_TYPE_BUSY_64_TSS 11
+#define VMX_AR_TYPE_BUSY_32_TSS 11
+#define VMX_AR_TYPE_BUSY_16_TSS 3
+#define VMX_AR_TYPE_LDT 2
+
+#define VMX_AR_UNUSABLE_MASK (1 << 16)
+#define VMX_AR_S_MASK (1 << 4)
+#define VMX_AR_P_MASK (1 << 7)
+#define VMX_AR_L_MASK (1 << 13)
+#define VMX_AR_DB_MASK (1 << 14)
+#define VMX_AR_G_MASK (1 << 15)
+#define VMX_AR_DPL_SHIFT 5
+#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
+
+#define VMX_AR_RESERVD_MASK 0xfffe0f00
 
 #define TSS_PRIVATE_MEMSLOT			(KVM_USER_MEM_SLOTS + 0)
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_USER_MEM_SLOTS + 1)
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 8fba544..f0412c5 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -27,6 +27,8 @@
 #define HV_X64_MSR_VP_RUNTIME_AVAILABLE		(1 << 0)
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
+/* Partition reference TSC MSR is available */
+#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE              (1 << 9)
 
 /* A partition's reference time stamp counter (TSC) page */
 #define HV_X64_MSR_REFERENCE_TSC		0x40000021
@@ -108,6 +110,8 @@
 #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		(1 << 4)
 /* Support for a virtual guest idle state is available */
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE		(1 << 5)
+/* Guest crash data handler available */
+#define HV_X64_GUEST_CRASH_MSR_AVAILABLE		(1 << 10)
 
 /*
  * Implementation recommendations. Indicates which behaviors the hypervisor
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a4ae82e..cd54147 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -354,7 +354,7 @@ struct kvm_xcrs {
 struct kvm_sync_regs {
 };
 
-#define KVM_QUIRK_LINT0_REENABLED	(1 << 0)
-#define KVM_QUIRK_CD_NW_CLEARED		(1 << 1)
+#define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index a0eab85..76880ed 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -15,7 +15,8 @@ struct mce {
 	__u64 time;	/* wall time_t when error was detected */
 	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
 	__u8  inject_flags;	/* software inject flags */
-	__u16  pad;
+	__u8  severity;
+	__u8  usable_addr;
 	__u32 cpuid;	/* CPUID 1 EAX */
 	__u8  cs;		/* code segment */
 	__u8  bank;	/* machine check bank */
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 0e8a973..40836a9 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,24 +177,9 @@ struct sigcontext {
 	__u64 rip;
 	__u64 eflags;		/* RFLAGS */
 	__u16 cs;
-
-	/*
-	 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
-	 * Linux saved and restored fs and gs in these slots.  This
-	 * was counterproductive, as fsbase and gsbase were never
-	 * saved, so arch_prctl was presumably unreliable.
-	 *
-	 * If these slots are ever needed for any other purpose, there
-	 * is some risk that very old 64-bit binaries could get
-	 * confused.  I doubt that many such binaries still work,
-	 * though, since the same patch in 2.5.64 also removed the
-	 * 64-bit set_thread_area syscall, so it appears that there is
-	 * no TLS API that works in both pre- and post-2.5.64 kernels.
-	 */
-	__u16 __pad2;		/* Was gs. */
-	__u16 __pad1;		/* Was fs. */
-
-	__u16 ss;
+	__u16 gs;
+	__u16 fs;
+	__u16 __pad0;
 	__u64 err;
 	__u64 trapno;
 	__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 1fe9218..37fee27 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -58,6 +58,7 @@
 #define EXIT_REASON_INVALID_STATE       33
 #define EXIT_REASON_MSR_LOAD_FAIL       34
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_TRAP_FLAG   37
 #define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
 #define EXIT_REASON_MCE_DURING_VMENTRY  41
@@ -106,6 +107,7 @@
 	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
 	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
 	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
 	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
 	{ EXIT_REASON_PAUSE_INSTRUCTION,     "PAUSE_INSTRUCTION" }, \
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dcb5285..cde732c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1424,7 +1424,7 @@ static inline void __x2apic_disable(void)
 {
 	u64 msr;
 
-	if (cpu_has_apic)
+	if (!cpu_has_apic)
 		return;
 
 	rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1483,10 +1483,13 @@ void x2apic_setup(void)
 
 static __init void x2apic_disable(void)
 {
-	u32 x2apic_id;
+	u32 x2apic_id, state = x2apic_state;
 
-	if (x2apic_state != X2APIC_ON)
-		goto out;
+	x2apic_mode = 0;
+	x2apic_state = X2APIC_DISABLED;
+
+	if (state != X2APIC_ON)
+		return;
 
 	x2apic_id = read_apic_id();
 	if (x2apic_id >= 255)
@@ -1494,9 +1497,6 @@ static __init void x2apic_disable(void)
 
 	__x2apic_disable();
 	register_lapic_address(mp_lapic_addr);
-out:
-	x2apic_state = X2APIC_DISABLED;
-	x2apic_mode = 0;
 }
 
 static __init void x2apic_enable(void)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 845dc0d..206052e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -943,7 +943,7 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
 	 */
 	if (irq < nr_legacy_irqs() && data->count == 1) {
 		if (info->ioapic_trigger != data->trigger)
-			mp_register_handler(irq, data->trigger);
+			mp_register_handler(irq, info->ioapic_trigger);
 		data->entry.trigger = data->trigger = info->ioapic_trigger;
 		data->entry.polarity = data->polarity = info->ioapic_polarity;
 	}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 28eba2d..2683f36 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -322,7 +322,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		irq_data->chip = &lapic_controller;
 		irq_data->chip_data = data;
 		irq_data->hwirq = virq + i;
-		err = assign_irq_vector_policy(virq, irq_data->node, data,
+		err = assign_irq_vector_policy(virq + i, irq_data->node, data,
 					       info);
 		if (err)
 			goto error;
@@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu)
 	int irq, vector;
 	struct apic_chip_data *data;
 
-	/*
-	 * vector_lock will make sure that we don't run into irq vector
-	 * assignments that might be happening on another cpu in parallel,
-	 * while we setup our initial vector to irq mappings.
-	 */
-	raw_spin_lock(&vector_lock);
 	/* Mark the inuse vectors */
 	for_each_active_irq(irq) {
 		data = apic_chip_data(irq_get_irq_data(irq));
@@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu)
 		if (!cpumask_test_cpu(cpu, data->domain))
 			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
 	}
-	raw_spin_unlock(&vector_lock);
 }
 
 /*
- * Setup the vector to irq mappings.
+ * Setup the vector to irq mappings. Must be called with vector_lock held.
  */
 void setup_vector_irq(int cpu)
 {
 	int irq;
 
+	lockdep_assert_held(&vector_lock);
 	/*
 	 * On most of the platforms, legacy PIC delivers the interrupts on the
 	 * boot cpu. But there are certain platforms where PIC interrupts are
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 9bff687..4eb065c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -46,6 +46,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
 					   perf_event_intel_uncore_snb.o \
 					   perf_event_intel_uncore_snbep.o \
 					   perf_event_intel_uncore_nhmex.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_msr.o
+obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_msr.o
 endif
 
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 922c5e0..cb9e5df 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1410,7 +1410,7 @@ void cpu_init(void)
 	load_sp0(t, &current->thread);
 	set_tss_desc(cpu, t);
 	load_TR_desc();
-	load_LDT(&init_mm.context);
+	load_mm_ldt(&init_mm);
 
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
@@ -1459,7 +1459,7 @@ void cpu_init(void)
 	load_sp0(t, thread);
 	set_tss_desc(cpu, t);
 	load_TR_desc();
-	load_LDT(&init_mm.context);
+	load_mm_ldt(&init_mm);
 
 	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
 
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
index 1c338b0..336878a 100644
--- a/arch/x86/kernel/cpu/intel_pt.h
+++ b/arch/x86/kernel/cpu/intel_pt.h
@@ -25,32 +25,11 @@
  */
 #define TOPA_PMI_MARGIN 512
 
-/*
- * Table of Physical Addresses bits
- */
-enum topa_sz {
-	TOPA_4K	= 0,
-	TOPA_8K,
-	TOPA_16K,
-	TOPA_32K,
-	TOPA_64K,
-	TOPA_128K,
-	TOPA_256K,
-	TOPA_512K,
-	TOPA_1MB,
-	TOPA_2MB,
-	TOPA_4MB,
-	TOPA_8MB,
-	TOPA_16MB,
-	TOPA_32MB,
-	TOPA_64MB,
-	TOPA_128MB,
-	TOPA_SZ_END,
-};
+#define TOPA_SHIFT 12
 
-static inline unsigned int sizes(enum topa_sz tsz)
+static inline unsigned int sizes(unsigned int tsz)
 {
-	return 1 << (tsz + 12);
+	return 1 << (tsz + TOPA_SHIFT);
 };
 
 struct topa_entry {
@@ -66,20 +45,26 @@ struct topa_entry {
 	u64	rsvd4	: 16;
 };
 
-#define TOPA_SHIFT 12
-#define PT_CPUID_LEAVES 2
+#define PT_CPUID_LEAVES		2
+#define PT_CPUID_REGS_NUM	4 /* number of regsters (eax, ebx, ecx, edx) */
 
 enum pt_capabilities {
 	PT_CAP_max_subleaf = 0,
 	PT_CAP_cr3_filtering,
+	PT_CAP_psb_cyc,
+	PT_CAP_mtc,
 	PT_CAP_topa_output,
 	PT_CAP_topa_multiple_entries,
+	PT_CAP_single_range_output,
 	PT_CAP_payloads_lip,
+	PT_CAP_mtc_periods,
+	PT_CAP_cycle_thresholds,
+	PT_CAP_psb_periods,
 };
 
 struct pt_pmu {
 	struct pmu		pmu;
-	u32			caps[4 * PT_CPUID_LEAVES];
+	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
 };
 
 /**
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index bb34b03..a3311c8 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,4 +1,4 @@
-obj-y				=  mce.o mce-severity.o
+obj-y				=  mce.o mce-severity.o mce-genpool.o
 
 obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index a1aef95..34c89a3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 
 	m.addr = mem_err->physical_addr;
 	mce_log(&m);
-	mce_notify_irq();
 }
 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
new file mode 100644
index 0000000..0a85010
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -0,0 +1,99 @@
+/*
+ * MCE event pool management in MCE context
+ *
+ * Copyright (C) 2015 Intel Corp.
+ * Author: Chen, Gong <gong.chen@linux.intel.com>
+ *
+ * This file is licensed under GPLv2.
+ */
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/genalloc.h>
+#include <linux/llist.h>
+#include "mce-internal.h"
+
+/*
+ * printk() is not safe in MCE context. This is a lock-less memory allocator
+ * used to save error information organized in a lock-less list.
+ *
+ * This memory pool is only to be used to save MCE records in MCE context.
+ * MCE events are rare, so a fixed size memory pool should be enough. Use
+ * 2 pages to save MCE events for now (~80 MCE records at most).
+ */
+#define MCE_POOLSZ	(2 * PAGE_SIZE)
+
+static struct gen_pool *mce_evt_pool;
+static LLIST_HEAD(mce_event_llist);
+static char gen_pool_buf[MCE_POOLSZ];
+
+void mce_gen_pool_process(void)
+{
+	struct llist_node *head;
+	struct mce_evt_llist *node;
+	struct mce *mce;
+
+	head = llist_del_all(&mce_event_llist);
+	if (!head)
+		return;
+
+	head = llist_reverse_order(head);
+	llist_for_each_entry(node, head, llnode) {
+		mce = &node->mce;
+		atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+		gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
+	}
+}
+
+bool mce_gen_pool_empty(void)
+{
+	return llist_empty(&mce_event_llist);
+}
+
+int mce_gen_pool_add(struct mce *mce)
+{
+	struct mce_evt_llist *node;
+
+	if (!mce_evt_pool)
+		return -EINVAL;
+
+	node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
+	if (!node) {
+		pr_warn_ratelimited("MCE records pool full!\n");
+		return -ENOMEM;
+	}
+
+	memcpy(&node->mce, mce, sizeof(*mce));
+	llist_add(&node->llnode, &mce_event_llist);
+
+	return 0;
+}
+
+static int mce_gen_pool_create(void)
+{
+	struct gen_pool *tmpp;
+	int ret = -ENOMEM;
+
+	tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
+	if (!tmpp)
+		goto out;
+
+	ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
+	if (ret) {
+		gen_pool_destroy(tmpp);
+		goto out;
+	}
+
+	mce_evt_pool = tmpp;
+
+out:
+	return ret;
+}
+
+int mce_gen_pool_init(void)
+{
+	/* Just init mce_gen_pool once. */
+	if (mce_evt_pool)
+		return 0;
+
+	return mce_gen_pool_create();
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fe32074..547720e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -13,6 +13,8 @@ enum severity_level {
 	MCE_PANIC_SEVERITY,
 };
 
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
 #define ATTR_LEN		16
 #define INITIAL_CHECK_INTERVAL	5 * 60 /* 5 minutes */
 
@@ -24,6 +26,16 @@ struct mce_bank {
 	char			attrname[ATTR_LEN];	/* attribute name */
 };
 
+struct mce_evt_llist {
+	struct llist_node llnode;
+	struct mce mce;
+};
+
+void mce_gen_pool_process(void);
+bool mce_gen_pool_empty(void);
+int mce_gen_pool_add(struct mce *mce);
+int mce_gen_pool_init(void);
+
 extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
@@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
 	return -EINVAL;
 }
 #endif
+
+void mce_inject_log(struct mce *m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index df919ff..0f8f21c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -52,11 +52,11 @@
 
 static DEFINE_MUTEX(mce_chrdev_read_mutex);
 
-#define rcu_dereference_check_mce(p) \
+#define mce_log_get_idx_check(p) \
 ({ \
-	rcu_lockdep_assert(rcu_read_lock_sched_held() || \
-			   lockdep_is_held(&mce_chrdev_read_mutex), \
-			   "suspicious rcu_dereference_check_mce() usage"); \
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
+			 !lockdep_is_held(&mce_chrdev_read_mutex), \
+			 "suspicious mce_log_get_idx_check() usage"); \
 	smp_load_acquire(&(p)); \
 })
 
@@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
  */
 mce_banks_t mce_banks_ce_disabled;
 
-static DEFINE_PER_CPU(struct work_struct, mce_work);
+static struct work_struct mce_work;
+static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+static int mce_usable_address(struct mce *m);
 
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
  */
-static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
 
 /* Do initial initialization of a struct mce */
 void mce_setup(struct mce *m)
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
 	/* Emit the trace record: */
 	trace_mce_record(mce);
 
-	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+	if (!mce_gen_pool_add(mce))
+		irq_work_queue(&mce_irq_work);
 
 	mce->finished = 0;
 	wmb();
 	for (;;) {
-		entry = rcu_dereference_check_mce(mcelog.next);
+		entry = mce_log_get_idx_check(mcelog.next);
 		for (;;) {
 
 			/*
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
 	set_bit(0, &mce_need_notify);
 }
 
-static void drain_mcelog_buffer(void)
+void mce_inject_log(struct mce *m)
 {
-	unsigned int next, i, prev = 0;
-
-	next = ACCESS_ONCE(mcelog.next);
-
-	do {
-		struct mce *m;
-
-		/* drain what was logged during boot */
-		for (i = prev; i < next; i++) {
-			unsigned long start = jiffies;
-			unsigned retries = 1;
-
-			m = &mcelog.entry[i];
-
-			while (!m->finished) {
-				if (time_after_eq(jiffies, start + 2*retries))
-					retries++;
-
-				cpu_relax();
-
-				if (!m->finished && retries >= 4) {
-					pr_err("skipping error being logged currently!\n");
-					break;
-				}
-			}
-			smp_rmb();
-			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
-		}
-
-		memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
-		prev = next;
-		next = cmpxchg(&mcelog.next, prev, 0);
-	} while (next != prev);
+	mutex_lock(&mce_chrdev_read_mutex);
+	mce_log(m);
+	mutex_unlock(&mce_chrdev_read_mutex);
 }
+EXPORT_SYMBOL_GPL(mce_inject_log);
 
+static struct notifier_block mce_srao_nb;
 
 void mce_register_decode_chain(struct notifier_block *nb)
 {
+	/* Ensure SRAO notifier has the highest priority in the decode chain. */
+	if (nb != &mce_srao_nb && nb->priority == INT_MAX)
+		nb->priority -= 1;
+
 	atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
-	drain_mcelog_buffer();
 }
 EXPORT_SYMBOL_GPL(mce_register_decode_chain);
 
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
 	}
 }
 
-/*
- * Simple lockless ring to communicate PFNs from the exception handler with the
- * process context work function. This is vastly simplified because there's
- * only a single reader and a single writer.
- */
-#define MCE_RING_SIZE 16	/* we use one entry less */
-
-struct mce_ring {
-	unsigned short start;
-	unsigned short end;
-	unsigned long ring[MCE_RING_SIZE];
-};
-static DEFINE_PER_CPU(struct mce_ring, mce_ring);
-
-/* Runs with CPU affinity in workqueue */
-static int mce_ring_empty(void)
-{
-	struct mce_ring *r = this_cpu_ptr(&mce_ring);
-
-	return r->start == r->end;
-}
-
-static int mce_ring_get(unsigned long *pfn)
-{
-	struct mce_ring *r;
-	int ret = 0;
-
-	*pfn = 0;
-	get_cpu();
-	r = this_cpu_ptr(&mce_ring);
-	if (r->start == r->end)
-		goto out;
-	*pfn = r->ring[r->start];
-	r->start = (r->start + 1) % MCE_RING_SIZE;
-	ret = 1;
-out:
-	put_cpu();
-	return ret;
-}
-
-/* Always runs in MCE context with preempt off */
-static int mce_ring_add(unsigned long pfn)
-{
-	struct mce_ring *r = this_cpu_ptr(&mce_ring);
-	unsigned next;
-
-	next = (r->end + 1) % MCE_RING_SIZE;
-	if (next == r->start)
-		return -1;
-	r->ring[r->end] = pfn;
-	wmb();
-	r->end = next;
-	return 0;
-}
-
 int mce_available(struct cpuinfo_x86 *c)
 {
 	if (mca_cfg.disabled)
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
 
 static void mce_schedule_work(void)
 {
-	if (!mce_ring_empty())
-		schedule_work(this_cpu_ptr(&mce_work));
+	if (!mce_gen_pool_empty() && keventd_up())
+		schedule_work(&mce_work);
 }
 
-static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
-
 static void mce_irq_work_cb(struct irq_work *entry)
 {
 	mce_notify_irq();
@@ -551,8 +472,29 @@ static void mce_report_event(struct pt_regs *regs)
 		return;
 	}
 
-	irq_work_queue(this_cpu_ptr(&mce_irq_work));
+	irq_work_queue(&mce_irq_work);
+}
+
+static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
+				void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	unsigned long pfn;
+
+	if (!mce)
+		return NOTIFY_DONE;
+
+	if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
+		pfn = mce->addr >> PAGE_SHIFT;
+		memory_failure(pfn, MCE_VECTOR, 0);
+	}
+
+	return NOTIFY_OK;
 }
+static struct notifier_block mce_srao_nb = {
+	.notifier_call	= srao_decode_notifier,
+	.priority = INT_MAX,
+};
 
 /*
  * Read ADDR and MISC registers.
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		 */
 		if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
 			if (m.status & MCI_STATUS_ADDRV) {
-				mce_ring_add(m.addr >> PAGE_SHIFT);
-				mce_schedule_work();
+				m.severity = severity;
+				m.usable_addr = mce_usable_address(&m);
+
+				if (!mce_gen_pool_add(&m))
+					mce_schedule_work();
 			}
 		}
 
@@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 		mce_read_aux(&m, i);
 
-		/*
-		 * Action optional error. Queue address for later processing.
-		 * When the ring overflows we just ignore the AO error.
-		 * RED-PEN add some logging mechanism when
-		 * usable_address or mce_add_ring fails.
-		 * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
-		 */
-		if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
-			mce_ring_add(m.addr >> PAGE_SHIFT);
+		/* assuming valid severity level != 0 */
+		m.severity = severity;
+		m.usable_addr = mce_usable_address(&m);
 
 		mce_log(&m);
 
@@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
 /*
  * Action optional processing happens here (picking up
  * from the list of faulting pages that do_machine_check()
- * placed into the "ring").
+ * placed into the genpool).
  */
 static void mce_process_work(struct work_struct *dummy)
 {
-	unsigned long pfn;
-
-	while (mce_ring_get(&pfn))
-		memory_failure(pfn, MCE_VECTOR, 0);
+	mce_gen_pool_process();
 }
 
 #ifdef CONFIG_X86_MCE_INTEL
@@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	}
 }
 
+static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
+{
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		mce_intel_feature_clear(c);
+		break;
+	default:
+		break;
+	}
+}
+
 static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
 	unsigned long iv = check_interval * HZ;
@@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 		return;
 	}
 
+	if (mce_gen_pool_init()) {
+		mca_cfg.disabled = true;
+		pr_emerg("Couldn't allocate MCE records pool!\n");
+		return;
+	}
+
 	machine_check_vector = do_machine_check;
 
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_vendor(c);
 	__mcheck_cpu_init_timer();
-	INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
-	init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
+}
+
+/*
+ * Called for each booted CPU to clear some machine checks opt-ins
+ */
+void mcheck_cpu_clear(struct cpuinfo_x86 *c)
+{
+	if (mca_cfg.disabled)
+		return;
+
+	if (!mce_available(c))
+		return;
+
+	/*
+	 * Possibly to clear general settings generic to x86
+	 * __mcheck_cpu_clear_generic(c);
+	 */
+	__mcheck_cpu_clear_vendor(c);
+
 }
 
 /*
@@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
-	next = rcu_dereference_check_mce(mcelog.next);
+	next = mce_log_get_idx_check(mcelog.next);
 
 	/* Only supports full reads right now */
 	err = -EINVAL;
@@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable);
 int __init mcheck_init(void)
 {
 	mcheck_intel_therm_init();
+	mce_register_decode_chain(&mce_srao_nb);
 	mcheck_vendor_init_severity();
 
+	INIT_WORK(&mce_work, mce_process_work);
+	init_irq_work(&mce_irq_work, mce_irq_work_cb);
+
 	return 0;
 }
 
@@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void)
 
 	return 0;
 }
-late_initcall(mcheck_debugfs_init);
+#else
+static int __init mcheck_debugfs_init(void) { return -EINVAL; }
 #endif
+
+static int __init mcheck_late_init(void)
+{
+	mcheck_debugfs_init();
+
+	/*
+	 * Flush out everything that has been logged during early boot, now that
+	 * everything has been initialized (workqueues, decoders, ...).
+	 */
+	mce_schedule_work();
+
+	return 0;
+}
+late_initcall(mcheck_late_init);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 844f56c..1e8bb6c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
 	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 }
 
+static void cmci_toggle_interrupt_mode(bool on)
+{
+	unsigned long flags, *owned;
+	int bank;
+	u64 val;
+
+	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+	owned = this_cpu_ptr(mce_banks_owned);
+	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
+		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+
+		if (on)
+			val |= MCI_CTL2_CMCI_EN;
+		else
+			val &= ~MCI_CTL2_CMCI_EN;
+
+		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+	}
+	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
 unsigned long cmci_intel_adjust_timer(unsigned long interval)
 {
 	if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
 		 */
 		if (!atomic_read(&cmci_storm_on_cpus)) {
 			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
-			cmci_reenable();
+			cmci_toggle_interrupt_mode(true);
 			cmci_recheck();
 		}
 		return CMCI_POLL_INTERVAL;
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
 	}
 }
 
-static void cmci_storm_disable_banks(void)
-{
-	unsigned long flags, *owned;
-	int bank;
-	u64 val;
-
-	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
-	owned = this_cpu_ptr(mce_banks_owned);
-	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
-		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
-		val &= ~MCI_CTL2_CMCI_EN;
-		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
-	}
-	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-}
-
 static bool cmci_storm_detect(void)
 {
 	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
 	if (cnt <= CMCI_STORM_THRESHOLD)
 		return false;
 
-	cmci_storm_disable_banks();
+	cmci_toggle_interrupt_mode(false);
 	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 	r = atomic_add_return(1, &cmci_storm_on_cpus);
 	mce_timer_kick(CMCI_STORM_INTERVAL);
@@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
 		return;
 
 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
-	mce_notify_irq();
 }
 
 /*
@@ -435,7 +439,7 @@ static void intel_init_cmci(void)
 	cmci_recheck();
 }
 
-void intel_init_lmce(void)
+static void intel_init_lmce(void)
 {
 	u64 val;
 
@@ -448,9 +452,26 @@ void intel_init_lmce(void)
 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
 }
 
+static void intel_clear_lmce(void)
+{
+	u64 val;
+
+	if (!lmce_supported())
+		return;
+
+	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+	val &= ~MCG_EXT_CTL_LMCE_EN;
+	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_thermal(c);
 	intel_init_cmci();
 	intel_init_lmce();
 }
+
+void mce_intel_feature_clear(struct cpuinfo_x86 *c)
+{
+	intel_clear_lmce();
+}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6236a54..3c98639 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -377,17 +377,16 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
 	return err;
 }
 
-static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
+static void mc_device_remove(struct device *dev, struct subsys_interface *sif)
 {
 	int cpu = dev->id;
 
 	if (!cpu_online(cpu))
-		return 0;
+		return;
 
 	pr_debug("CPU%d removed\n", cpu);
 	microcode_fini_cpu(cpu);
 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
-	return 0;
 }
 
 static struct subsys_interface mc_cpu_interface = {
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index aad4bd8..f794bfa 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -18,6 +18,7 @@
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/kexec.h>
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
 #include <asm/hyperv.h>
@@ -28,10 +29,14 @@
 #include <asm/i8259.h>
 #include <asm/apic.h>
 #include <asm/timer.h>
+#include <asm/reboot.h>
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
 
+static void (*hv_kexec_handler)(void);
+static void (*hv_crash_handler)(struct pt_regs *regs);
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static void (*vmbus_handler)(void);
 
@@ -67,8 +72,47 @@ void hv_remove_vmbus_irq(void)
 }
 EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
 EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+
+void hv_setup_kexec_handler(void (*handler)(void))
+{
+	hv_kexec_handler = handler;
+}
+EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
+
+void hv_remove_kexec_handler(void)
+{
+	hv_kexec_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
+
+void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
+{
+	hv_crash_handler = handler;
+}
+EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
+
+void hv_remove_crash_handler(void)
+{
+	hv_crash_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
 #endif
 
+static void hv_machine_shutdown(void)
+{
+	if (kexec_in_progress && hv_kexec_handler)
+		hv_kexec_handler();
+	native_machine_shutdown();
+}
+
+static void hv_machine_crash_shutdown(struct pt_regs *regs)
+{
+	if (hv_crash_handler)
+		hv_crash_handler(regs);
+	native_machine_crash_shutdown(regs);
+}
+
+
 static uint32_t  __init ms_hyperv_platform(void)
 {
 	u32 eax;
@@ -114,6 +158,7 @@ static void __init ms_hyperv_init_platform(void)
 	 * Extract the features and hints
 	 */
 	ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
+	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
 	printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
@@ -141,6 +186,8 @@ static void __init ms_hyperv_init_platform(void)
 	no_timer_check = 1;
 #endif
 
+	machine_ops.shutdown = hv_machine_shutdown;
+	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de4..f56cf07 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1551,7 +1551,7 @@ static void __init filter_events(struct attribute **attrs)
 }
 
 /* Merge two pointer arrays */
-static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
 {
 	struct attribute **new;
 	int j, i;
@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
 	int idx = segment >> 3;
 
 	if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+		struct ldt_struct *ldt;
+
 		if (idx > LDT_ENTRIES)
 			return 0;
 
-		if (idx > current->active_mm->context.size)
+		/* IRQs are off, so this synchronizes with smp_store_release */
+		ldt = lockless_dereference(current->active_mm->context.ldt);
+		if (!ldt || idx > ldt->size)
 			return 0;
 
-		desc = current->active_mm->context.ldt;
+		desc = &ldt->entries[idx];
 	} else {
 		if (idx > GDT_ENTRIES)
 			return 0;
 
-		desc = raw_cpu_ptr(gdt_page.gdt);
+		desc = raw_cpu_ptr(gdt_page.gdt) + idx;
 	}
 
-	return get_desc_base(desc + idx);
+	return get_desc_base(desc);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3e7fd27..5edf6d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -165,7 +165,7 @@ struct intel_excl_cntrs {
 	unsigned	core_id;	/* per-core: core id */
 };
 
-#define MAX_LBR_ENTRIES		16
+#define MAX_LBR_ENTRIES		32
 
 enum {
 	X86_PERF_KFREE_SHARED = 0,
@@ -594,6 +594,7 @@ struct x86_pmu {
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
 	int 		max_pebs_events;
+	unsigned long	free_running_flags;
 
 	/*
 	 * Intel LBR
@@ -624,6 +625,7 @@ struct x86_pmu {
 struct x86_perf_task_context {
 	u64 lbr_from[MAX_LBR_ENTRIES];
 	u64 lbr_to[MAX_LBR_ENTRIES];
+	u64 lbr_info[MAX_LBR_ENTRIES];
 	int lbr_callstack_users;
 	int lbr_stack_state;
 };
@@ -793,6 +795,8 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
 ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
 ssize_t intel_event_sysfs_show(char *page, u64 config);
 
+struct attribute **merge_attr(struct attribute **a, struct attribute **b);
+
 #ifdef CONFIG_CPU_SUP_AMD
 
 int amd_pmu_init(void);
@@ -808,20 +812,6 @@ static inline int amd_pmu_init(void)
 
 #ifdef CONFIG_CPU_SUP_INTEL
 
-static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
-{
-	/* user explicitly requested branch sampling */
-	if (has_branch_stack(event))
-		return true;
-
-	/* implicit branch sampling to correct PEBS skid */
-	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
-	    x86_pmu.intel_cap.pebs_format < 2)
-		return true;
-
-	return false;
-}
-
 static inline bool intel_pmu_has_bts(struct perf_event *event)
 {
 	if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
@@ -873,6 +863,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
 
 extern struct event_constraint intel_hsw_pebs_event_constraints[];
 
+extern struct event_constraint intel_skl_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_enable(struct perf_event *event);
@@ -911,6 +903,8 @@ void intel_pmu_lbr_init_snb(void);
 
 void intel_pmu_lbr_init_hsw(void);
 
+void intel_pmu_lbr_init_skl(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
@@ -934,6 +928,7 @@ static inline int is_ht_workaround_enabled(void)
 {
 	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
 }
+
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index b9826a9..3f124d5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -177,6 +177,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_skl_event_constraints[] = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
+	EVENT_CONSTRAINT_END
+};
+
 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -193,6 +201,13 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
+static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+	EVENT_EXTRA_END
+};
+
 EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
@@ -244,6 +259,200 @@ static u64 intel_pmu_event_map(int hw_event)
 	return intel_perfmon_event_map[hw_event];
 }
 
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts.
+ * - icache miss does not include decoded icache
+ */
+
+#define SKL_DEMAND_DATA_RD		BIT_ULL(0)
+#define SKL_DEMAND_RFO			BIT_ULL(1)
+#define SKL_ANY_RESPONSE		BIT_ULL(16)
+#define SKL_SUPPLIER_NONE		BIT_ULL(17)
+#define SKL_L3_MISS_LOCAL_DRAM		BIT_ULL(26)
+#define SKL_L3_MISS_REMOTE_HOP0_DRAM	BIT_ULL(27)
+#define SKL_L3_MISS_REMOTE_HOP1_DRAM	BIT_ULL(28)
+#define SKL_L3_MISS_REMOTE_HOP2P_DRAM	BIT_ULL(29)
+#define SKL_L3_MISS			(SKL_L3_MISS_LOCAL_DRAM| \
+					 SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+					 SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+					 SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+#define SKL_SPL_HIT			BIT_ULL(30)
+#define SKL_SNOOP_NONE			BIT_ULL(31)
+#define SKL_SNOOP_NOT_NEEDED		BIT_ULL(32)
+#define SKL_SNOOP_MISS			BIT_ULL(33)
+#define SKL_SNOOP_HIT_NO_FWD		BIT_ULL(34)
+#define SKL_SNOOP_HIT_WITH_FWD		BIT_ULL(35)
+#define SKL_SNOOP_HITM			BIT_ULL(36)
+#define SKL_SNOOP_NON_DRAM		BIT_ULL(37)
+#define SKL_ANY_SNOOP			(SKL_SPL_HIT|SKL_SNOOP_NONE| \
+					 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+					 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+					 SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
+#define SKL_DEMAND_READ			SKL_DEMAND_DATA_RD
+#define SKL_SNOOP_DRAM			(SKL_SNOOP_NONE| \
+					 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+					 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+					 SKL_SNOOP_HITM|SKL_SPL_HIT)
+#define SKL_DEMAND_WRITE		SKL_DEMAND_RFO
+#define SKL_LLC_ACCESS			SKL_ANY_RESPONSE
+#define SKL_L3_MISS_REMOTE		(SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+					 SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+					 SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+
+static __initconst const u64 skl_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
+		[ C(RESULT_MISS)   ] = 0x151,	/* L1D.REPLACEMENT */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x283,	/* ICACHE_64B.MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
+		[ C(RESULT_MISS)   ] = 0x608,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
+		[ C(RESULT_MISS)   ] = 0x649,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2085,	/* ITLB_MISSES.STLB_HIT */
+		[ C(RESULT_MISS)   ] = 0xe85,	/* ITLB_MISSES.WALK_COMPLETED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0xc4,	/* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0xc5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+};
+
+static __initconst const u64 skl_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+				       SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+		[ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
+				       SKL_L3_MISS|SKL_ANY_SNOOP|
+				       SKL_SUPPLIER_NONE,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+				       SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+		[ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
+				       SKL_L3_MISS|SKL_ANY_SNOOP|
+				       SKL_SUPPLIER_NONE,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+				       SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+		[ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
+				       SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+				       SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+		[ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
+				       SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+};
+
 #define SNB_DMND_DATA_RD	(1ULL << 0)
 #define SNB_DMND_RFO		(1ULL << 1)
 #define SNB_DMND_IFETCH		(1ULL << 2)
@@ -1114,7 +1323,7 @@ static struct extra_reg intel_slm_extra_regs[] __read_mostly =
 {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
-	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
 	EVENT_EXTRA_END
 };
 
@@ -1594,6 +1803,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	loops = 0;
 again:
+	intel_pmu_lbr_read();
 	intel_pmu_ack_status(status);
 	if (++loops > 100) {
 		static bool warned = false;
@@ -1608,16 +1818,16 @@ again:
 
 	inc_irq_stat(apic_perf_irqs);
 
-	intel_pmu_lbr_read();
 
 	/*
-	 * CondChgd bit 63 doesn't mean any overflow status. Ignore
-	 * and clear the bit.
+	 * Ignore a range of extra bits in status that do not indicate
+	 * overflow by themselves.
 	 */
-	if (__test_and_clear_bit(63, (unsigned long *)&status)) {
-		if (!status)
-			goto done;
-	}
+	status &= ~(GLOBAL_STATUS_COND_CHG |
+		    GLOBAL_STATUS_ASIF |
+		    GLOBAL_STATUS_LBRS_FROZEN);
+	if (!status)
+		goto done;
 
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
@@ -1699,18 +1909,22 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static int intel_alt_er(int idx)
+static int intel_alt_er(int idx, u64 config)
 {
+	int alt_idx;
 	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
 		return idx;
 
 	if (idx == EXTRA_REG_RSP_0)
-		return EXTRA_REG_RSP_1;
+		alt_idx = EXTRA_REG_RSP_1;
 
 	if (idx == EXTRA_REG_RSP_1)
-		return EXTRA_REG_RSP_0;
+		alt_idx = EXTRA_REG_RSP_0;
+
+	if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
+		return idx;
 
-	return idx;
+	return alt_idx;
 }
 
 static void intel_fixup_er(struct perf_event *event, int idx)
@@ -1799,7 +2013,7 @@ again:
 		 */
 		c = NULL;
 	} else {
-		idx = intel_alt_er(idx);
+		idx = intel_alt_er(idx, reg->config);
 		if (idx != reg->idx) {
 			raw_spin_unlock_irqrestore(&era->lock, flags);
 			goto again;
@@ -2253,6 +2467,15 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
 	}
 }
 
+static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+{
+	unsigned long flags = x86_pmu.free_running_flags;
+
+	if (event->attr.use_clockid)
+		flags &= ~PERF_SAMPLE_TIME;
+	return flags;
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
@@ -2263,7 +2486,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (event->attr.precise_ip) {
 		if (!event->attr.freq) {
 			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
-			if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+			if (!(event->attr.sample_type &
+			      ~intel_pmu_free_running_flags(event)))
 				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
 		}
 		if (x86_pmu.pebs_aliases)
@@ -2534,7 +2758,7 @@ static int intel_pmu_cpu_prepare(int cpu)
 	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
 		cpuc->shared_regs = allocate_shared_regs(cpu);
 		if (!cpuc->shared_regs)
-			return NOTIFY_BAD;
+			goto err;
 	}
 
 	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
@@ -2542,18 +2766,27 @@ static int intel_pmu_cpu_prepare(int cpu)
 
 		cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
 		if (!cpuc->constraint_list)
-			return NOTIFY_BAD;
+			goto err_shared_regs;
 
 		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
-		if (!cpuc->excl_cntrs) {
-			kfree(cpuc->constraint_list);
-			kfree(cpuc->shared_regs);
-			return NOTIFY_BAD;
-		}
+		if (!cpuc->excl_cntrs)
+			goto err_constraint_list;
+
 		cpuc->excl_thread_id = 0;
 	}
 
 	return NOTIFY_OK;
+
+err_constraint_list:
+	kfree(cpuc->constraint_list);
+	cpuc->constraint_list = NULL;
+
+err_shared_regs:
+	kfree(cpuc->shared_regs);
+	cpuc->shared_regs = NULL;
+
+err:
+	return NOTIFY_BAD;
 }
 
 static void intel_pmu_cpu_starting(int cpu)
@@ -2685,6 +2918,8 @@ static __initconst const struct x86_pmu core_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
+	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
+
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32-bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -2723,6 +2958,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
+	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -3260,6 +3496,29 @@ __init int intel_pmu_init(void)
 		pr_cont("Broadwell events, ");
 		break;
 
+	case 78: /* 14nm Skylake Mobile */
+	case 94: /* 14nm Skylake Desktop */
+		x86_pmu.late_ack = true;
+		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+		intel_pmu_lbr_init_skl();
+
+		x86_pmu.event_constraints = intel_skl_event_constraints;
+		x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_skl_extra_regs;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+		x86_pmu.hw_config = hsw_hw_config;
+		x86_pmu.get_event_constraints = hsw_get_event_constraints;
+		x86_pmu.cpu_events = hsw_events_attrs;
+		WARN_ON(!x86_pmu.format_attrs);
+		x86_pmu.cpu_events = hsw_events_attrs;
+		pr_cont("Skylake events, ");
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
@@ -3329,7 +3588,7 @@ __init int intel_pmu_init(void)
 	 */
 	if (x86_pmu.extra_regs) {
 		for (er = x86_pmu.extra_regs; er->msr; er++) {
-			er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
+			er->extra_msr_access = check_msr(er->msr, 0x11UL);
 			/* Disable LBR select mapping */
 			if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
 				x86_pmu.lbr_sel_map = NULL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 43dd672..54690e8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -62,9 +62,6 @@ struct bts_buffer {
 
 struct pmu bts_pmu;
 
-void intel_pmu_enable_bts(u64 config);
-void intel_pmu_disable_bts(void);
-
 static size_t buf_size(struct page *page)
 {
 	return 1 << (PAGE_SHIFT + page_private(page));
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 1880761..377e8f8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -952,6 +952,14 @@ static u64 intel_cqm_event_count(struct perf_event *event)
 		return 0;
 
 	/*
+	 * Getting up-to-date values requires an SMP IPI which is not
+	 * possible if we're being called in interrupt context. Return
+	 * the cached values instead.
+	 */
+	if (unlikely(in_interrupt()))
+		goto out;
+
+	/*
 	 * Notice that we don't perform the reading of an RMID
 	 * atomically, because we can't hold a spin lock across the
 	 * IPIs.
@@ -1247,7 +1255,7 @@ static inline void cqm_pick_event_reader(int cpu)
 	cpumask_set_cpu(cpu, &cqm_cpumask);
 }
 
-static void intel_cqm_cpu_prepare(unsigned int cpu)
+static void intel_cqm_cpu_starting(unsigned int cpu)
 {
 	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1288,13 +1296,11 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb,
 	unsigned int cpu  = (unsigned long)hcpu;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-		intel_cqm_cpu_prepare(cpu);
-		break;
 	case CPU_DOWN_PREPARE:
 		intel_cqm_cpu_exit(cpu);
 		break;
 	case CPU_STARTING:
+		intel_cqm_cpu_starting(cpu);
 		cqm_pick_event_reader(cpu);
 		break;
 	}
@@ -1365,7 +1371,7 @@ static int __init intel_cqm_init(void)
 		goto out;
 
 	for_each_online_cpu(i) {
-		intel_cqm_cpu_prepare(i);
+		intel_cqm_cpu_starting(i);
 		cqm_pick_event_reader(i);
 	}
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 71fc402..84f236a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -224,6 +224,19 @@ union hsw_tsx_tuning {
 
 #define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
 
+/* Same as HSW, plus TSC */
+
+struct pebs_record_skl {
+	u64 flags, ip;
+	u64 ax, bx, cx, dx;
+	u64 si, di, bp, sp;
+	u64 r8,  r9,  r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 status, dla, dse, lat;
+	u64 real_ip, tsx_tuning;
+	u64 tsc;
+};
+
 void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -675,6 +688,28 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_skl_pebs_event_constraints[] = {
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+	INTEL_PLD_CONSTRAINT(0x1cd, 0xf),		      /* MEM_TRANS_RETIRED.* */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
+	/* Allow all events as PEBS with no flags */
+	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -754,6 +789,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
+	bool large_pebs = ds->pebs_interrupt_threshold >
+		ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+	if (large_pebs)
+		intel_pmu_drain_pebs_buffer();
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 
@@ -762,12 +802,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
 
-	if (ds->pebs_interrupt_threshold >
-	    ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
-		intel_pmu_drain_pebs_buffer();
-		if (!pebs_is_enabled(cpuc))
-			perf_sched_cb_dec(event->ctx->pmu);
-	}
+	if (large_pebs && !pebs_is_enabled(cpuc))
+		perf_sched_cb_dec(event->ctx->pmu);
 
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -885,7 +921,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	return 0;
 }
 
-static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
 {
 	if (pebs->tsx_tuning) {
 		union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
@@ -894,7 +930,7 @@ static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
 	return 0;
 }
 
-static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
+static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
 {
 	u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
 
@@ -918,7 +954,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	 * unconditionally access the 'extra' entries.
 	 */
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-	struct pebs_record_hsw *pebs = __pebs;
+	struct pebs_record_skl *pebs = __pebs;
 	u64 sample_type;
 	int fll, fst, dsrc;
 	int fl = event->hw.flags;
@@ -1016,6 +1052,16 @@ static void setup_pebs_sample_data(struct perf_event *event,
 			data->txn = intel_hsw_transaction(pebs);
 	}
 
+	/*
+	 * v3 supplies an accurate time stamp, so we use that
+	 * for the time stamp.
+	 *
+	 * We can only do this for the default trace clock.
+	 */
+	if (x86_pmu.intel_cap.pebs_format >= 3 &&
+		event->attr.use_clockid == 0)
+		data->time = native_sched_clock_from_tsc(pebs->tsc);
+
 	if (has_branch_stack(event))
 		data->br_stack = &cpuc->lbr_stack;
 }
@@ -1142,6 +1188,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 
 	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;
+		u64 pebs_status;
 
 		/* PEBS v3 has accurate status bits */
 		if (x86_pmu.intel_cap.pebs_format >= 3) {
@@ -1152,12 +1199,17 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 		}
 
-		bit = find_first_bit((unsigned long *)&p->status,
+		pebs_status = p->status & cpuc->pebs_enabled;
+		pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+		bit = find_first_bit((unsigned long *)&pebs_status,
 					x86_pmu.max_pebs_events);
-		if (bit >= x86_pmu.max_pebs_events)
-			continue;
-		if (!test_bit(bit, cpuc->active_mask))
+		if (WARN(bit >= x86_pmu.max_pebs_events,
+			 "PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx",
+			 (unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled,
+			 *(unsigned long long *)cpuc->active_mask))
 			continue;
+
 		/*
 		 * The PEBS hardware does not deal well with the situation
 		 * when events happen near to each other and multiple bits
@@ -1172,27 +1224,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 		 * one, and it's not possible to reconstruct all events
 		 * that caused the PEBS record. It's called collision.
 		 * If collision happened, the record will be dropped.
-		 *
 		 */
-		if (p->status != (1 << bit)) {
-			u64 pebs_status;
-
-			/* slow path */
-			pebs_status = p->status & cpuc->pebs_enabled;
-			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
-			if (pebs_status != (1 << bit)) {
-				for_each_set_bit(i, (unsigned long *)&pebs_status,
-						 MAX_PEBS_EVENTS)
-					error[i]++;
-				continue;
-			}
+		if (p->status != (1ULL << bit)) {
+			for_each_set_bit(i, (unsigned long *)&pebs_status,
+					 x86_pmu.max_pebs_events)
+				error[i]++;
+			continue;
 		}
+
 		counts[bit]++;
 	}
 
 	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
 		if ((counts[bit] == 0) && (error[bit] == 0))
 			continue;
+
 		event = cpuc->events[bit];
 		WARN_ON_ONCE(!event);
 		WARN_ON_ONCE(!event->attr.precise_ip);
@@ -1245,6 +1291,14 @@ void __init intel_ds_init(void)
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
 
+		case 3:
+			pr_cont("PEBS fmt3%c, ", pebs_type);
+			x86_pmu.pebs_record_size =
+						sizeof(struct pebs_record_skl);
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+			x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+			break;
+
 		default:
 			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
 			x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 452a7bd..b2c9475 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -13,7 +13,8 @@ enum {
 	LBR_FORMAT_EIP		= 0x02,
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
 	LBR_FORMAT_EIP_FLAGS2	= 0x04,
-	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_EIP_FLAGS2,
+	LBR_FORMAT_INFO		= 0x05,
+	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
 };
 
 static enum {
@@ -140,6 +141,13 @@ static void __intel_pmu_lbr_enable(bool pmi)
 	u64 debugctl, lbr_select = 0, orig_debugctl;
 
 	/*
+	 * No need to unfreeze manually, as v4 can do that as part
+	 * of the GLOBAL_STATUS ack.
+	 */
+	if (pmi && x86_pmu.version >= 4)
+		return;
+
+	/*
 	 * No need to reprogram LBR_SELECT in a PMI, as it
 	 * did not change.
 	 */
@@ -186,6 +194,8 @@ static void intel_pmu_lbr_reset_64(void)
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		wrmsrl(x86_pmu.lbr_from + i, 0);
 		wrmsrl(x86_pmu.lbr_to   + i, 0);
+		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+			wrmsrl(MSR_LBR_INFO_0 + i, 0);
 	}
 }
 
@@ -230,10 +240,12 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 
 	mask = x86_pmu.lbr_nr - 1;
 	tos = intel_pmu_lbr_tos();
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
 	task_ctx->lbr_stack_state = LBR_NONE;
 }
@@ -251,10 +263,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 	mask = x86_pmu.lbr_nr - 1;
 	tos = intel_pmu_lbr_tos();
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
 	task_ctx->lbr_stack_state = LBR_VALID;
 }
@@ -411,16 +425,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 	u64 tos = intel_pmu_lbr_tos();
 	int i;
 	int out = 0;
+	int num = x86_pmu.lbr_nr;
 
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+		num = tos;
+
+	for (i = 0; i < num; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
 		u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
 		int skip = 0;
+		u16 cycles = 0;
 		int lbr_flags = lbr_desc[lbr_format];
 
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
+		if (lbr_format == LBR_FORMAT_INFO) {
+			u64 info;
+
+			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
+			mis = !!(info & LBR_INFO_MISPRED);
+			pred = !mis;
+			in_tx = !!(info & LBR_INFO_IN_TX);
+			abort = !!(info & LBR_INFO_ABORT);
+			cycles = (info & LBR_INFO_CYCLES);
+		}
 		if (lbr_flags & LBR_EIP_FLAGS) {
 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
 			pred = !mis;
@@ -450,6 +479,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		cpuc->lbr_entries[out].predicted = pred;
 		cpuc->lbr_entries[out].in_tx	 = in_tx;
 		cpuc->lbr_entries[out].abort	 = abort;
+		cpuc->lbr_entries[out].cycles	 = cycles;
 		cpuc->lbr_entries[out].reserved	 = 0;
 		out++;
 	}
@@ -947,6 +977,26 @@ void intel_pmu_lbr_init_hsw(void)
 	pr_cont("16-deep LBR, ");
 }
 
+/* skylake */
+__init void intel_pmu_lbr_init_skl(void)
+{
+	x86_pmu.lbr_nr	 = 32;
+	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
+	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+	/*
+	 * SW branch filter usage:
+	 * - support syscall, sysret capture.
+	 *   That requires LBR_FAR but that means far
+	 *   jmp need to be filtered out
+	 */
+	pr_cont("32-deep LBR, ");
+}
+
 /* atom */
 void __init intel_pmu_lbr_init_atom(void)
 {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 183de71..4216928 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -65,15 +65,21 @@ static struct pt_cap_desc {
 } pt_caps[] = {
 	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
 	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
+	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
+	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
 	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
 	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
+	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
 	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
+	PT_CAP(mtc_periods,		1, CR_EAX, 0xffff0000),
+	PT_CAP(cycle_thresholds,	1, CR_EBX, 0xffff),
+	PT_CAP(psb_periods,		1, CR_EBX, 0xffff0000),
 };
 
 static u32 pt_cap_get(enum pt_capabilities cap)
 {
 	struct pt_cap_desc *cd = &pt_caps[cap];
-	u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
+	u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
 	unsigned int shift = __ffs(cd->mask);
 
 	return (c & cd->mask) >> shift;
@@ -94,12 +100,22 @@ static struct attribute_group pt_cap_group = {
 	.name	= "caps",
 };
 
+PMU_FORMAT_ATTR(cyc,		"config:1"	);
+PMU_FORMAT_ATTR(mtc,		"config:9"	);
 PMU_FORMAT_ATTR(tsc,		"config:10"	);
 PMU_FORMAT_ATTR(noretcomp,	"config:11"	);
+PMU_FORMAT_ATTR(mtc_period,	"config:14-17"	);
+PMU_FORMAT_ATTR(cyc_thresh,	"config:19-22"	);
+PMU_FORMAT_ATTR(psb_period,	"config:24-27"	);
 
 static struct attribute *pt_formats_attr[] = {
+	&format_attr_cyc.attr,
+	&format_attr_mtc.attr,
 	&format_attr_tsc.attr,
 	&format_attr_noretcomp.attr,
+	&format_attr_mtc_period.attr,
+	&format_attr_cyc_thresh.attr,
+	&format_attr_psb_period.attr,
 	NULL,
 };
 
@@ -129,10 +145,10 @@ static int __init pt_pmu_hw_init(void)
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
 		cpuid_count(20, i,
-			    &pt_pmu.caps[CR_EAX + i*4],
-			    &pt_pmu.caps[CR_EBX + i*4],
-			    &pt_pmu.caps[CR_ECX + i*4],
-			    &pt_pmu.caps[CR_EDX + i*4]);
+			    &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
+			    &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
+			    &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
+			    &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
 	}
 
 	ret = -ENOMEM;
@@ -170,15 +186,65 @@ fail:
 	return ret;
 }
 
-#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC)
+#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC	| \
+			  RTIT_CTL_CYC_THRESH	| \
+			  RTIT_CTL_PSB_FREQ)
+
+#define RTIT_CTL_MTC	(RTIT_CTL_MTC_EN	| \
+			 RTIT_CTL_MTC_RANGE)
+
+#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN		| \
+			RTIT_CTL_DISRETC	| \
+			RTIT_CTL_CYC_PSB	| \
+			RTIT_CTL_MTC)
 
 static bool pt_event_valid(struct perf_event *event)
 {
 	u64 config = event->attr.config;
+	u64 allowed, requested;
 
 	if ((config & PT_CONFIG_MASK) != config)
 		return false;
 
+	if (config & RTIT_CTL_CYC_PSB) {
+		if (!pt_cap_get(PT_CAP_psb_cyc))
+			return false;
+
+		allowed = pt_cap_get(PT_CAP_psb_periods);
+		requested = (config & RTIT_CTL_PSB_FREQ) >>
+			RTIT_CTL_PSB_FREQ_OFFSET;
+		if (requested && (!(allowed & BIT(requested))))
+			return false;
+
+		allowed = pt_cap_get(PT_CAP_cycle_thresholds);
+		requested = (config & RTIT_CTL_CYC_THRESH) >>
+			RTIT_CTL_CYC_THRESH_OFFSET;
+		if (requested && (!(allowed & BIT(requested))))
+			return false;
+	}
+
+	if (config & RTIT_CTL_MTC) {
+		/*
+		 * In the unlikely case that CPUID lists valid mtc periods,
+		 * but not the mtc capability, drop out here.
+		 *
+		 * Spec says that setting mtc period bits while mtc bit in
+		 * CPUID is 0 will #GP, so better safe than sorry.
+		 */
+		if (!pt_cap_get(PT_CAP_mtc))
+			return false;
+
+		allowed = pt_cap_get(PT_CAP_mtc_periods);
+		if (!allowed)
+			return false;
+
+		requested = (config & RTIT_CTL_MTC_RANGE) >>
+			RTIT_CTL_MTC_RANGE_OFFSET;
+
+		if (!(allowed & BIT(requested)))
+			return false;
+	}
+
 	return true;
 }
 
@@ -191,6 +257,11 @@ static void pt_config(struct perf_event *event)
 {
 	u64 reg;
 
+	if (!event->hw.itrace_started) {
+		event->hw.itrace_started = 1;
+		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+	}
+
 	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
 
 	if (!event->attr.exclude_kernel)
@@ -910,7 +981,6 @@ void intel_pt_interrupt(void)
 
 		pt_config_buffer(buf->cur->table, buf->cur_idx,
 				 buf->output_off);
-		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
 		pt_config(event);
 	}
 }
@@ -934,7 +1004,6 @@ static void pt_event_start(struct perf_event *event, int mode)
 
 	pt_config_buffer(buf->cur->table, buf->cur_idx,
 			 buf->output_off);
-	wrmsrl(MSR_IA32_RTIT_STATUS, 0);
 	pt_config(event);
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 5cbd4e6..81431c0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -86,6 +86,10 @@ static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
 			 1<<RAPL_IDX_RAM_NRG_STAT|\
 			 1<<RAPL_IDX_PP1_NRG_STAT)
 
+/* Knights Landing has PKG, RAM */
+#define RAPL_IDX_KNL	(1<<RAPL_IDX_PKG_NRG_STAT|\
+			 1<<RAPL_IDX_RAM_NRG_STAT)
+
 /*
  * event code: LSB 8 bits, passed in attr->config
  * any other bit is reserved
@@ -486,6 +490,18 @@ static struct attribute *rapl_events_hsw_attr[] = {
 	NULL,
 };
 
+static struct attribute *rapl_events_knl_attr[] = {
+	EVENT_PTR(rapl_pkg),
+	EVENT_PTR(rapl_ram),
+
+	EVENT_PTR(rapl_pkg_unit),
+	EVENT_PTR(rapl_ram_unit),
+
+	EVENT_PTR(rapl_pkg_scale),
+	EVENT_PTR(rapl_ram_scale),
+	NULL,
+};
+
 static struct attribute_group rapl_pmu_events_group = {
 	.name = "events",
 	.attrs = NULL, /* patched at runtime */
@@ -730,6 +746,10 @@ static int __init rapl_pmu_init(void)
 		rapl_cntr_mask = RAPL_IDX_SRV;
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
 		break;
+	case 87: /* Knights Landing */
+		rapl_add_quirk(rapl_hsw_server_quirk);
+		rapl_cntr_mask = RAPL_IDX_KNL;
+		rapl_pmu_events_group.attrs = rapl_events_knl_attr;
 
 	default:
 		/* unsupported */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 21b5e38..560e525 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -911,6 +911,9 @@ static int __init uncore_pci_init(void)
 	case 63: /* Haswell-EP */
 		ret = hswep_uncore_pci_init();
 		break;
+	case 86: /* BDX-DE */
+		ret = bdx_uncore_pci_init();
+		break;
 	case 42: /* Sandy Bridge */
 		ret = snb_uncore_pci_init();
 		break;
@@ -1209,6 +1212,11 @@ static int __init uncore_cpu_init(void)
 		break;
 	case 42: /* Sandy Bridge */
 	case 58: /* Ivy Bridge */
+	case 60: /* Haswell */
+	case 69: /* Haswell */
+	case 70: /* Haswell */
+	case 61: /* Broadwell */
+	case 71: /* Broadwell */
 		snb_uncore_cpu_init();
 		break;
 	case 45: /* Sandy Bridge-EP */
@@ -1224,6 +1232,9 @@ static int __init uncore_cpu_init(void)
 	case 63: /* Haswell-EP */
 		hswep_uncore_cpu_init();
 		break;
+	case 86: /* BDX-DE */
+		bdx_uncore_cpu_init();
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 0f77f0a..72c54c2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,8 @@ int ivbep_uncore_pci_init(void);
 void ivbep_uncore_cpu_init(void);
 int hswep_uncore_pci_init(void);
 void hswep_uncore_cpu_init(void);
+int bdx_uncore_pci_init(void);
+void bdx_uncore_cpu_init(void);
 
 /* perf_event_intel_uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index b005a78..f78574b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -45,6 +45,11 @@
 #define SNB_UNC_CBO_0_PER_CTR0                  0x706
 #define SNB_UNC_CBO_MSR_OFFSET                  0x10
 
+/* SNB ARB register */
+#define SNB_UNC_ARB_PER_CTR0			0x3b0
+#define SNB_UNC_ARB_PERFEVTSEL0			0x3b2
+#define SNB_UNC_ARB_MSR_OFFSET			0x10
+
 /* NHM global control register */
 #define NHM_UNC_PERF_GLOBAL_CTL                 0x391
 #define NHM_UNC_FIXED_CTR                       0x394
@@ -115,7 +120,7 @@ static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.read_counter	= uncore_msr_read_counter,
 };
 
-static struct event_constraint snb_uncore_cbox_constraints[] = {
+static struct event_constraint snb_uncore_arb_constraints[] = {
 	UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
 	UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
 	EVENT_CONSTRAINT_END
@@ -134,14 +139,28 @@ static struct intel_uncore_type snb_uncore_cbox = {
 	.single_fixed	= 1,
 	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
 	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET,
-	.constraints	= snb_uncore_cbox_constraints,
 	.ops		= &snb_uncore_msr_ops,
 	.format_group	= &snb_uncore_format_group,
 	.event_descs	= snb_uncore_events,
 };
 
+static struct intel_uncore_type snb_uncore_arb = {
+	.name		= "arb",
+	.num_counters   = 2,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 44,
+	.perf_ctr	= SNB_UNC_ARB_PER_CTR0,
+	.event_ctl	= SNB_UNC_ARB_PERFEVTSEL0,
+	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
+	.msr_offset	= SNB_UNC_ARB_MSR_OFFSET,
+	.constraints	= snb_uncore_arb_constraints,
+	.ops		= &snb_uncore_msr_ops,
+	.format_group	= &snb_uncore_format_group,
+};
+
 static struct intel_uncore_type *snb_msr_uncores[] = {
 	&snb_uncore_cbox,
+	&snb_uncore_arb,
 	NULL,
 };
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 6d6e85d..694510a8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -2215,7 +2215,7 @@ static struct intel_uncore_type *hswep_pci_uncores[] = {
 	NULL,
 };
 
-static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
+static const struct pci_device_id hswep_uncore_pci_ids[] = {
 	{ /* Home Agent 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
 		.driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
@@ -2321,3 +2321,167 @@ int hswep_uncore_pci_init(void)
 	return 0;
 }
 /* end of Haswell-EP uncore support */
+
+/* BDX-DE uncore support */
+
+static struct intel_uncore_type bdx_uncore_ubox = {
+	.name			= "ubox",
+	.num_counters		= 2,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.fixed_ctr_bits		= 48,
+	.perf_ctr		= HSWEP_U_MSR_PMON_CTR0,
+	.event_ctl		= HSWEP_U_MSR_PMON_CTL0,
+	.event_mask		= SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+	.fixed_ctr		= HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+	.fixed_ctl		= HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &ivbep_uncore_msr_ops,
+	.format_group		= &ivbep_uncore_ubox_format_group,
+};
+
+static struct event_constraint bdx_uncore_cbox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_cbox = {
+	.name			= "cbox",
+	.num_counters		= 4,
+	.num_boxes		= 8,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= HSWEP_C0_MSR_PMON_CTL0,
+	.perf_ctr		= HSWEP_C0_MSR_PMON_CTR0,
+	.event_mask		= SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= HSWEP_C0_MSR_PMON_BOX_CTL,
+	.msr_offset		= HSWEP_CBO_MSR_OFFSET,
+	.num_shared_regs	= 1,
+	.constraints		= bdx_uncore_cbox_constraints,
+	.ops			= &hswep_uncore_cbox_ops,
+	.format_group		= &hswep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_type *bdx_msr_uncores[] = {
+	&bdx_uncore_ubox,
+	&bdx_uncore_cbox,
+	&hswep_uncore_pcu,
+	NULL,
+};
+
+void bdx_uncore_cpu_init(void)
+{
+	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+	uncore_msr_uncores = bdx_msr_uncores;
+}
+
+static struct intel_uncore_type bdx_uncore_ha = {
+	.name		= "ha",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_imc = {
+	.name		= "imc",
+	.num_counters   = 5,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+	.event_descs	= hswep_uncore_imc_events,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_irp = {
+	.name			= "irp",
+	.num_counters		= 4,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.event_mask		= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_PCI_PMON_BOX_CTL,
+	.ops			= &hswep_uncore_irp_ops,
+	.format_group		= &snbep_uncore_format_group,
+};
+
+
+static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r2pcie = {
+	.name		= "r2pcie",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.constraints	= bdx_uncore_r2pcie_constraints,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+	BDX_PCI_UNCORE_HA,
+	BDX_PCI_UNCORE_IMC,
+	BDX_PCI_UNCORE_IRP,
+	BDX_PCI_UNCORE_R2PCIE,
+};
+
+static struct intel_uncore_type *bdx_pci_uncores[] = {
+	[BDX_PCI_UNCORE_HA]	= &bdx_uncore_ha,
+	[BDX_PCI_UNCORE_IMC]	= &bdx_uncore_imc,
+	[BDX_PCI_UNCORE_IRP]	= &bdx_uncore_irp,
+	[BDX_PCI_UNCORE_R2PCIE]	= &bdx_uncore_r2pcie,
+	NULL,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
+	{ /* Home Agent 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
+		.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
+	},
+	{ /* MC0 Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
+		.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
+	},
+	{ /* MC0 Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
+		.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
+	},
+	{ /* IRP */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
+		.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
+	},
+	{ /* R2PCIe */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
+		.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
+	},
+	{ /* end: all zeroes */ }
+};
+
+static struct pci_driver bdx_uncore_pci_driver = {
+	.name		= "bdx_uncore",
+	.id_table	= bdx_uncore_pci_ids,
+};
+
+int bdx_uncore_pci_init(void)
+{
+	int ret = snbep_pci2phy_map_init(0x6f1e);
+
+	if (ret)
+		return ret;
+	uncore_pci_uncores = bdx_pci_uncores;
+	uncore_pci_driver = &bdx_uncore_pci_driver;
+	return 0;
+}
+
+/* end of BDX-DE uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
new file mode 100644
index 0000000..086b12e
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -0,0 +1,242 @@
+#include <linux/perf_event.h>
+
+enum perf_msr_id {
+	PERF_MSR_TSC			= 0,
+	PERF_MSR_APERF			= 1,
+	PERF_MSR_MPERF			= 2,
+	PERF_MSR_PPERF			= 3,
+	PERF_MSR_SMI			= 4,
+
+	PERF_MSR_EVENT_MAX,
+};
+
+bool test_aperfmperf(int idx)
+{
+	return boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+bool test_intel(int idx)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+	    boot_cpu_data.x86 != 6)
+		return false;
+
+	switch (boot_cpu_data.x86_model) {
+	case 30: /* 45nm Nehalem    */
+	case 26: /* 45nm Nehalem-EP */
+	case 46: /* 45nm Nehalem-EX */
+
+	case 37: /* 32nm Westmere    */
+	case 44: /* 32nm Westmere-EP */
+	case 47: /* 32nm Westmere-EX */
+
+	case 42: /* 32nm SandyBridge         */
+	case 45: /* 32nm SandyBridge-E/EN/EP */
+
+	case 58: /* 22nm IvyBridge       */
+	case 62: /* 22nm IvyBridge-EP/EX */
+
+	case 60: /* 22nm Haswell Core */
+	case 63: /* 22nm Haswell Server */
+	case 69: /* 22nm Haswell ULT */
+	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+	case 61: /* 14nm Broadwell Core-M */
+	case 86: /* 14nm Broadwell Xeon D */
+	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+	case 79: /* 14nm Broadwell Server */
+
+	case 55: /* 22nm Atom "Silvermont"                */
+	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+	case 76: /* 14nm Atom "Airmont"                   */
+		if (idx == PERF_MSR_SMI)
+			return true;
+		break;
+
+	case 78: /* 14nm Skylake Mobile */
+	case 94: /* 14nm Skylake Desktop */
+		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
+			return true;
+		break;
+	}
+
+	return false;
+}
+
+struct perf_msr {
+	u64	msr;
+	struct	perf_pmu_events_attr *attr;
+	bool	(*test)(int idx);
+};
+
+PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
+
+static struct perf_msr msr[] = {
+	[PERF_MSR_TSC]   = { 0,			&evattr_tsc,	NULL,		 },
+	[PERF_MSR_APERF] = { MSR_IA32_APERF,	&evattr_aperf,	test_aperfmperf, },
+	[PERF_MSR_MPERF] = { MSR_IA32_MPERF,	&evattr_mperf,	test_aperfmperf, },
+	[PERF_MSR_PPERF] = { MSR_PPERF,		&evattr_pperf,	test_intel,	 },
+	[PERF_MSR_SMI]   = { MSR_SMI_COUNT,	&evattr_smi,	test_intel,	 },
+};
+
+static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+	NULL,
+};
+
+static struct attribute_group events_attr_group = {
+	.name = "events",
+	.attrs = events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+static struct attribute_group format_attr_group = {
+	.name = "format",
+	.attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&events_attr_group,
+	&format_attr_group,
+	NULL,
+};
+
+static int msr_event_init(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (cfg >= PERF_MSR_EVENT_MAX)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	if (!msr[cfg].attr)
+		return -EINVAL;
+
+	event->hw.idx = -1;
+	event->hw.event_base = msr[cfg].msr;
+	event->hw.config = cfg;
+
+	return 0;
+}
+
+static inline u64 msr_read_counter(struct perf_event *event)
+{
+	u64 now;
+
+	if (event->hw.event_base)
+		rdmsrl(event->hw.event_base, now);
+	else
+		rdtscll(now);
+
+	return now;
+}
+static void msr_event_update(struct perf_event *event)
+{
+	u64 prev, now;
+	s64 delta;
+
+	/* Careful, an NMI might modify the previous event value. */
+again:
+	prev = local64_read(&event->hw.prev_count);
+	now = msr_read_counter(event);
+
+	if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
+		goto again;
+
+	delta = now - prev;
+	if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
+		delta <<= 32;
+		delta >>= 32; /* sign extend */
+	}
+	local64_add(now - prev, &event->count);
+}
+
+static void msr_event_start(struct perf_event *event, int flags)
+{
+	u64 now;
+
+	now = msr_read_counter(event);
+	local64_set(&event->hw.prev_count, now);
+}
+
+static void msr_event_stop(struct perf_event *event, int flags)
+{
+	msr_event_update(event);
+}
+
+static void msr_event_del(struct perf_event *event, int flags)
+{
+	msr_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int msr_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		msr_event_start(event, flags);
+
+	return 0;
+}
+
+static struct pmu pmu_msr = {
+	.task_ctx_nr	= perf_sw_context,
+	.attr_groups	= attr_groups,
+	.event_init	= msr_event_init,
+	.add		= msr_event_add,
+	.del		= msr_event_del,
+	.start		= msr_event_start,
+	.stop		= msr_event_stop,
+	.read		= msr_event_update,
+	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init msr_init(void)
+{
+	int i, j = 0;
+
+	if (!boot_cpu_has(X86_FEATURE_TSC)) {
+		pr_cont("no MSR PMU driver.\n");
+		return 0;
+	}
+
+	/* Probe the MSRs. */
+	for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
+		u64 val;
+
+		/*
+		 * Virt sucks arse; you cannot tell if a R/O MSR is present :/
+		 */
+		if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+			msr[i].attr = NULL;
+	}
+
+	/* List remaining MSRs in the sysfs attrs. */
+	for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
+		if (msr[i].attr)
+			events_attrs[j++] = &msr[i].attr->attr.attr;
+	}
+	events_attrs[j] = NULL;
+
+	perf_pmu_register(&pmu_msr, "msr", -1);
+
+	return 0;
+}
+device_initcall(msr_init);
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 89427d8..eec40f5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -175,7 +175,9 @@ static __init void early_serial_init(char *s)
 	}
 
 	if (*s) {
-		if (kstrtoul(s, 0, &baud) < 0 || baud == 0)
+		baud = simple_strtoull(s, &e, 0);
+
+		if (baud == 0 || s == e)
 			baud = DEFAULT_BAUD;
 	}
 
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index f5d0730..ce95676 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -131,25 +131,24 @@ void __init init_espfix_bsp(void)
 	init_espfix_random();
 
 	/* The rest is the same as for any other processor */
-	init_espfix_ap();
+	init_espfix_ap(0);
 }
 
-void init_espfix_ap(void)
+void init_espfix_ap(int cpu)
 {
-	unsigned int cpu, page;
+	unsigned int page;
 	unsigned long addr;
 	pud_t pud, *pud_p;
 	pmd_t pmd, *pmd_p;
 	pte_t pte, *pte_p;
-	int n;
+	int n, node;
 	void *stack_page;
 	pteval_t ptemask;
 
 	/* We only have to do this once... */
-	if (likely(this_cpu_read(espfix_stack)))
+	if (likely(per_cpu(espfix_stack, cpu)))
 		return;		/* Already initialized */
 
-	cpu = smp_processor_id();
 	addr = espfix_base_addr(cpu);
 	page = cpu/ESPFIX_STACKS_PER_PAGE;
 
@@ -165,12 +164,15 @@ void init_espfix_ap(void)
 	if (stack_page)
 		goto unlock_done;
 
+	node = cpu_to_node(cpu);
 	ptemask = __supported_pte_mask;
 
 	pud_p = &espfix_pud_page[pud_index(addr)];
 	pud = *pud_p;
 	if (!pud_present(pud)) {
-		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+		pmd_p = (pmd_t *)page_address(page);
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -180,7 +182,9 @@ void init_espfix_ap(void)
 	pmd_p = pmd_offset(&pud, addr);
 	pmd = *pmd_p;
 	if (!pmd_present(pmd)) {
-		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+		pte_p = (pte_t *)page_address(page);
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -188,7 +192,7 @@ void init_espfix_ap(void)
 	}
 
 	pte_p = pte_offset_kernel(&pmd, addr);
-	stack_page = (void *)__get_free_page(GFP_KERNEL);
+	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
@@ -199,7 +203,7 @@ void init_espfix_ap(void)
 unlock_done:
 	mutex_unlock(&espfix_init_mutex);
 done:
-	this_cpu_write(espfix_stack, addr);
-	this_cpu_write(espfix_waddr, (unsigned long)stack_page
-		       + (addr & ~PAGE_MASK));
+	per_cpu(espfix_stack, cpu) = addr;
+	per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+				      + (addr & ~PAGE_MASK);
 }
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 79de954..d25097c 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -270,7 +270,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 
-	if (src_fpu->fpstate_active)
+	if (src_fpu->fpstate_active && cpu_has_fpu)
 		fpu_copy(dst_fpu, src_fpu);
 
 	return 0;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 3282679..d14e9ac 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -4,6 +4,8 @@
 #include <asm/fpu/internal.h>
 #include <asm/tlbflush.h>
 
+#include <linux/sched.h>
+
 /*
  * Initialize the TS bit in CR0 according to the style of context-switches
  * we are using:
@@ -38,7 +40,12 @@ static void fpu__init_cpu_generic(void)
 	write_cr0(cr0);
 
 	/* Flush out any pending x87 state: */
-	asm volatile ("fninit");
+#ifdef CONFIG_MATH_EMULATION
+	if (!cpu_has_fpu)
+		fpstate_init_soft(&current->thread.fpu.state.soft);
+	else
+#endif
+		asm volatile ("fninit");
 }
 
 /*
@@ -136,6 +143,43 @@ static void __init fpu__init_system_generic(void)
 unsigned int xstate_size;
 EXPORT_SYMBOL_GPL(xstate_size);
 
+/* Enforce that 'MEMBER' is the last field of 'TYPE': */
+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
+	BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+
+/*
+ * We append the 'struct fpu' to the task_struct:
+ */
+static void __init fpu__init_task_struct_size(void)
+{
+	int task_size = sizeof(struct task_struct);
+
+	/*
+	 * Subtract off the static size of the register state.
+	 * It potentially has a bunch of padding.
+	 */
+	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
+
+	/*
+	 * Add back the dynamically-calculated register state
+	 * size.
+	 */
+	task_size += xstate_size;
+
+	/*
+	 * We dynamically size 'struct fpu', so we require that
+	 * it be at the end of 'thread_struct' and that
+	 * 'thread_struct' be at the end of 'task_struct'.  If
+	 * you hit a compile error here, check the structure to
+	 * see if something got added to the end.
+	 */
+	CHECK_MEMBER_AT_END_OF(struct fpu, state);
+	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
+	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
+
+	arch_task_struct_size = task_size;
+}
+
 /*
  * Set up the xstate_size based on the legacy FPU context size.
  *
@@ -287,6 +331,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
 	fpu__init_system_generic();
 	fpu__init_system_xstate_size_legacy();
 	fpu__init_system_xstate();
+	fpu__init_task_struct_size();
 
 	fpu__init_system_ctx_switch();
 }
@@ -311,9 +356,15 @@ static int __init x86_noxsave_setup(char *s)
 
 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+	setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
 	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 	setup_clear_cpu_cap(X86_FEATURE_AVX);
 	setup_clear_cpu_cap(X86_FEATURE_AVX2);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+	setup_clear_cpu_cap(X86_FEATURE_MPX);
 
 	return 1;
 }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5a46681..f129a9a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -161,11 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	/* Kill off the identity-map trampoline */
 	reset_early_page_tables();
 
-	kasan_map_early_shadow(early_level4_pgt);
-
-	/* clear bss before set_intr_gate with early_idt_handler */
 	clear_bss();
 
+	clear_page(init_level4_pgt);
+
+	kasan_early_init();
+
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
 		set_intr_gate(i, early_idt_handler_array[i]);
 	load_idt((const struct desc_ptr *)&idt_descr);
@@ -177,12 +178,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	 */
 	load_ucode_bsp();
 
-	clear_page(init_level4_pgt);
 	/* set init_level4_pgt kernel high mapping*/
 	init_level4_pgt[511] = early_level4_pgt[511];
 
-	kasan_map_early_shadow(init_level4_pgt);
-
 	x86_64_start_reservations(real_mode_data);
 }
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e5c27f7..1d40ca8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -516,38 +516,9 @@ ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
 
-#ifdef CONFIG_KASAN
-#define FILL(VAL, COUNT)				\
-	.rept (COUNT) ;					\
-	.quad	(VAL) ;					\
-	.endr
-
-NEXT_PAGE(kasan_zero_pte)
-	FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512)
-NEXT_PAGE(kasan_zero_pmd)
-	FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512)
-NEXT_PAGE(kasan_zero_pud)
-	FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512)
-
-#undef FILL
-#endif
-
-
 #include "../../x86/xen/xen-head.S"
 	
 	__PAGE_ALIGNED_BSS
 NEXT_PAGE(empty_zero_page)
 	.skip PAGE_SIZE
 
-#ifdef CONFIG_KASAN
-/*
- * This page used as early shadow. We don't use empty_zero_page
- * at early stages, stack instrumentation could write some garbage
- * to this page.
- * Latter we reuse it as zero shadow for large ranges of memory
- * that allowed to access, but not instrumented by kasan
- * (vmalloc/vmemmap ...).
- */
-NEXT_PAGE(kasan_zero_page)
-	.skip PAGE_SIZE
-#endif
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 7114ba2..50a3fad 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -32,6 +32,7 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/kallsyms.h>
+#include <linux/kprobes.h>
 #include <linux/percpu.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
@@ -179,7 +180,11 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
 	va = info->address;
 	len = bp->attr.bp_len;
 
-	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+	/*
+	 * We don't need to worry about va + len - 1 overflowing:
+	 * we already require that va is aligned to a multiple of len.
+	 */
+	return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
 }
 
 int arch_bp_generic_fields(int x86_len, int x86_type,
@@ -243,6 +248,20 @@ static int arch_build_bp_info(struct perf_event *bp)
 		info->type = X86_BREAKPOINT_RW;
 		break;
 	case HW_BREAKPOINT_X:
+		/*
+		 * We don't allow kernel breakpoints in places that are not
+		 * acceptable for kprobes.  On non-kprobes kernels, we don't
+		 * allow kernel breakpoints at all.
+		 */
+		if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
+#ifdef CONFIG_KPROBES
+			if (within_kprobe_blacklist(bp->attr.bp_addr))
+				return -EINVAL;
+#else
+			return -EINVAL;
+#endif
+		}
+
 		info->type = X86_BREAKPOINT_EXECUTE;
 		/*
 		 * x86 inst breakpoints need to have a specific undefined len.
@@ -276,8 +295,18 @@ static int arch_build_bp_info(struct perf_event *bp)
 		break;
 #endif
 	default:
+		/* AMD range breakpoint */
 		if (!is_power_of_2(bp->attr.bp_len))
 			return -EINVAL;
+		if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
+			return -EINVAL;
+		/*
+		 * It's impossible to use a range breakpoint to fake out
+		 * user vs kernel detection because bp_len - 1 can't
+		 * have the high bit set.  If we ever allow range instruction
+		 * breakpoints, then we'll have to check for kprobe-blacklisted
+		 * addresses anywhere in the range.
+		 */
 		if (!cpu_has_bpext)
 			return -EOPNOTSUPP;
 		info->mask = bp->attr.bp_len - 1;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 88b36648..c7dfe1b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -347,14 +347,22 @@ int check_irq_vectors_for_cpu_disable(void)
 			if (!desc)
 				continue;
 
+			/*
+			 * Protect against concurrent action removal,
+			 * affinity changes etc.
+			 */
+			raw_spin_lock(&desc->lock);
 			data = irq_desc_get_irq_data(desc);
 			cpumask_copy(&affinity_new, data->affinity);
 			cpumask_clear_cpu(this_cpu, &affinity_new);
 
 			/* Do not count inactive or per-cpu irqs. */
-			if (!irq_has_action(irq) || irqd_is_per_cpu(data))
+			if (!irq_has_action(irq) || irqd_is_per_cpu(data)) {
+				raw_spin_unlock(&desc->lock);
 				continue;
+			}
 
+			raw_spin_unlock(&desc->lock);
 			/*
 			 * A single irq may be mapped to multiple
 			 * cpu's vector_irq[] (for example IOAPIC cluster
@@ -385,6 +393,9 @@ int check_irq_vectors_for_cpu_disable(void)
 		 * vector. If the vector is marked in the used vectors
 		 * bitmap or an irq is assigned to it, we don't count
 		 * it as available.
+		 *
+		 * As this is an inaccurate snapshot anyway, we can do
+		 * this w/o holding vector_lock.
 		 */
 		for (vector = FIRST_EXTERNAL_VECTOR;
 		     vector < first_system_vector; vector++) {
@@ -486,6 +497,11 @@ void fixup_irqs(void)
 	 */
 	mdelay(1);
 
+	/*
+	 * We can walk the vector array of this cpu without holding
+	 * vector_lock because the cpu is already marked !online, so
+	 * nothing else will touch it.
+	 */
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 		unsigned int irr;
 
@@ -497,9 +513,9 @@ void fixup_irqs(void)
 			irq = __this_cpu_read(vector_irq[vector]);
 
 			desc = irq_to_desc(irq);
+			raw_spin_lock(&desc->lock);
 			data = irq_desc_get_irq_data(desc);
 			chip = irq_data_get_irq_chip(data);
-			raw_spin_lock(&desc->lock);
 			if (chip->irq_retrigger) {
 				chip->irq_retrigger(data);
 				__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c37886d..2bcc052 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
 
@@ -20,82 +21,82 @@
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
 
-#ifdef CONFIG_SMP
+/* context.lock is held for us, so we don't need any locking. */
 static void flush_ldt(void *current_mm)
 {
-	if (current->active_mm == current_mm)
-		load_LDT(&current->active_mm->context);
+	mm_context_t *pc;
+
+	if (current->active_mm != current_mm)
+		return;
+
+	pc = &current->active_mm->context;
+	set_ldt(pc->ldt->entries, pc->ldt->size);
 }
-#endif
 
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+static struct ldt_struct *alloc_ldt_struct(int size)
 {
-	void *oldldt, *newldt;
-	int oldsize;
-
-	if (mincount <= pc->size)
-		return 0;
-	oldsize = pc->size;
-	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
-			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
-	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
+	struct ldt_struct *new_ldt;
+	int alloc_size;
+
+	if (size > LDT_ENTRIES)
+		return NULL;
+
+	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+	if (!new_ldt)
+		return NULL;
+
+	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
+	alloc_size = size * LDT_ENTRY_SIZE;
+
+	/*
+	 * Xen is very picky: it requires a page-aligned LDT that has no
+	 * trailing nonzero bytes in any page that contains LDT descriptors.
+	 * Keep it simple: zero the whole allocation and never allocate less
+	 * than PAGE_SIZE.
+	 */
+	if (alloc_size > PAGE_SIZE)
+		new_ldt->entries = vzalloc(alloc_size);
 	else
-		newldt = (void *)__get_free_page(GFP_KERNEL);
-
-	if (!newldt)
-		return -ENOMEM;
+		new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
 
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
-	       (mincount - oldsize) * LDT_ENTRY_SIZE);
+	if (!new_ldt->entries) {
+		kfree(new_ldt);
+		return NULL;
+	}
 
-	paravirt_alloc_ldt(newldt, mincount);
+	new_ldt->size = size;
+	return new_ldt;
+}
 
-#ifdef CONFIG_X86_64
-	/* CHECKME: Do we really need this ? */
-	wmb();
-#endif
-	pc->ldt = newldt;
-	wmb();
-	pc->size = mincount;
-	wmb();
-
-	if (reload) {
-#ifdef CONFIG_SMP
-		preempt_disable();
-		load_LDT(pc);
-		if (!cpumask_equal(mm_cpumask(current->mm),
-				   cpumask_of(smp_processor_id())))
-			smp_call_function(flush_ldt, current->mm, 1);
-		preempt_enable();
-#else
-		load_LDT(pc);
-#endif
-	}
-	if (oldsize) {
-		paravirt_free_ldt(oldldt, oldsize);
-		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			put_page(virt_to_page(oldldt));
-	}
-	return 0;
+/* After calling this, the LDT is immutable. */
+static void finalize_ldt_struct(struct ldt_struct *ldt)
+{
+	paravirt_alloc_ldt(ldt->entries, ldt->size);
 }
 
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+/* context.lock is held */
+static void install_ldt(struct mm_struct *current_mm,
+			struct ldt_struct *ldt)
 {
-	int err = alloc_ldt(new, old->size, 0);
-	int i;
+	/* Synchronizes with lockless_dereference in load_mm_ldt. */
+	smp_store_release(&current_mm->context.ldt, ldt);
+
+	/* Activate the LDT for all CPUs using current_mm. */
+	on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+}
 
-	if (err < 0)
-		return err;
+static void free_ldt_struct(struct ldt_struct *ldt)
+{
+	if (likely(!ldt))
+		return;
 
-	for (i = 0; i < old->size; i++)
-		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
-	return 0;
+	paravirt_free_ldt(ldt->entries, ldt->size);
+	if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+		vfree(ldt->entries);
+	else
+		kfree(ldt->entries);
+	kfree(ldt);
 }
 
 /*
@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
  */
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
+	struct ldt_struct *new_ldt;
 	struct mm_struct *old_mm;
 	int retval = 0;
 
 	mutex_init(&mm->context.lock);
-	mm->context.size = 0;
 	old_mm = current->mm;
-	if (old_mm && old_mm->context.size > 0) {
-		mutex_lock(&old_mm->context.lock);
-		retval = copy_ldt(&mm->context, &old_mm->context);
-		mutex_unlock(&old_mm->context.lock);
+	if (!old_mm) {
+		mm->context.ldt = NULL;
+		return 0;
 	}
+
+	mutex_lock(&old_mm->context.lock);
+	if (!old_mm->context.ldt) {
+		mm->context.ldt = NULL;
+		goto out_unlock;
+	}
+
+	new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
+	if (!new_ldt) {
+		retval = -ENOMEM;
+		goto out_unlock;
+	}
+
+	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
+	       new_ldt->size * LDT_ENTRY_SIZE);
+	finalize_ldt_struct(new_ldt);
+
+	mm->context.ldt = new_ldt;
+
+out_unlock:
+	mutex_unlock(&old_mm->context.lock);
 	return retval;
 }
 
@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  */
 void destroy_context(struct mm_struct *mm)
 {
-	if (mm->context.size) {
-#ifdef CONFIG_X86_32
-		/* CHECKME: Can this ever happen ? */
-		if (mm == current->active_mm)
-			clear_LDT();
-#endif
-		paravirt_free_ldt(mm->context.ldt, mm->context.size);
-		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			put_page(virt_to_page(mm->context.ldt));
-		mm->context.size = 0;
-	}
+	free_ldt_struct(mm->context.ldt);
+	mm->context.ldt = NULL;
 }
 
 static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
-	int err;
+	int retval;
 	unsigned long size;
 	struct mm_struct *mm = current->mm;
 
-	if (!mm->context.size)
-		return 0;
+	mutex_lock(&mm->context.lock);
+
+	if (!mm->context.ldt) {
+		retval = 0;
+		goto out_unlock;
+	}
+
 	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
 		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
 
-	mutex_lock(&mm->context.lock);
-	size = mm->context.size * LDT_ENTRY_SIZE;
+	size = mm->context.ldt->size * LDT_ENTRY_SIZE;
 	if (size > bytecount)
 		size = bytecount;
 
-	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
-	mutex_unlock(&mm->context.lock);
-	if (err < 0)
-		goto error_return;
+	if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
+		retval = -EFAULT;
+		goto out_unlock;
+	}
+
 	if (size != bytecount) {
-		/* zero-fill the rest */
-		if (clear_user(ptr + size, bytecount - size) != 0) {
-			err = -EFAULT;
-			goto error_return;
+		/* Zero-fill the rest and pretend we read bytecount bytes. */
+		if (clear_user(ptr + size, bytecount - size)) {
+			retval = -EFAULT;
+			goto out_unlock;
 		}
 	}
-	return bytecount;
-error_return:
-	return err;
+	retval = bytecount;
+
+out_unlock:
+	mutex_unlock(&mm->context.lock);
+	return retval;
 }
 
 static int read_default_ldt(void __user *ptr, unsigned long bytecount)
@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	struct desc_struct ldt;
 	int error;
 	struct user_desc ldt_info;
+	int oldsize, newsize;
+	struct ldt_struct *new_ldt, *old_ldt;
 
 	error = -EINVAL;
 	if (bytecount != sizeof(ldt_info))
@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 			goto out;
 	}
 
-	mutex_lock(&mm->context.lock);
-	if (ldt_info.entry_number >= mm->context.size) {
-		error = alloc_ldt(&current->mm->context,
-				  ldt_info.entry_number + 1, 1);
-		if (error < 0)
-			goto out_unlock;
-	}
-
-	/* Allow LDTs to be cleared by the user. */
-	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode || LDT_empty(&ldt_info)) {
-			memset(&ldt, 0, sizeof(ldt));
-			goto install;
+	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
+	    LDT_empty(&ldt_info)) {
+		/* The user wants to clear the entry. */
+		memset(&ldt, 0, sizeof(ldt));
+	} else {
+		if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+			error = -EINVAL;
+			goto out;
 		}
+
+		fill_ldt(&ldt, &ldt_info);
+		if (oldmode)
+			ldt.avl = 0;
 	}
 
-	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
-		error = -EINVAL;
+	mutex_lock(&mm->context.lock);
+
+	old_ldt = mm->context.ldt;
+	oldsize = old_ldt ? old_ldt->size : 0;
+	newsize = max((int)(ldt_info.entry_number + 1), oldsize);
+
+	error = -ENOMEM;
+	new_ldt = alloc_ldt_struct(newsize);
+	if (!new_ldt)
 		goto out_unlock;
-	}
 
-	fill_ldt(&ldt, &ldt_info);
-	if (oldmode)
-		ldt.avl = 0;
+	if (old_ldt)
+		memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
+	new_ldt->entries[ldt_info.entry_number] = ldt;
+	finalize_ldt_struct(new_ldt);
 
-	/* Install the new entry ...  */
-install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
+	install_ldt(mm, new_ldt);
+	free_ldt_struct(old_ldt);
 	error = 0;
 
 out_unlock:
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index c3e985d..d05bd2e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs)
 NOKPROBE_SYMBOL(default_do_nmi);
 
 /*
- * NMIs can hit breakpoints which will cause it to lose its
- * NMI context with the CPU when the breakpoint does an iret.
- */
-#ifdef CONFIG_X86_32
-/*
- * For i386, NMIs use the same stack as the kernel, and we can
- * add a workaround to the iret problem in C (preventing nested
- * NMIs if an NMI takes a trap). Simply have 3 states the NMI
- * can be in:
+ * NMIs can page fault or hit breakpoints which will cause it to lose
+ * its NMI context with the CPU when the breakpoint or page fault does an IRET.
+ *
+ * As a result, NMIs can nest if NMIs get unmasked due an IRET during
+ * NMI processing.  On x86_64, the asm glue protects us from nested NMIs
+ * if the outer NMI came from kernel mode, but we can still nest if the
+ * outer NMI came from user mode.
+ *
+ * To handle these nested NMIs, we have three states:
  *
  *  1) not running
  *  2) executing
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi);
  * (Note, the latch is binary, thus multiple NMIs triggering,
  *  when one is running, are ignored. Only one NMI is restarted.)
  *
- * If an NMI hits a breakpoint that executes an iret, another
- * NMI can preempt it. We do not want to allow this new NMI
- * to run, but we want to execute it when the first one finishes.
- * We set the state to "latched", and the exit of the first NMI will
- * perform a dec_return, if the result is zero (NOT_RUNNING), then
- * it will simply exit the NMI handler. If not, the dec_return
- * would have set the state to NMI_EXECUTING (what we want it to
- * be when we are running). In this case, we simply jump back
- * to rerun the NMI handler again, and restart the 'latched' NMI.
+ * If an NMI executes an iret, another NMI can preempt it. We do not
+ * want to allow this new NMI to run, but we want to execute it when the
+ * first one finishes.  We set the state to "latched", and the exit of
+ * the first NMI will perform a dec_return, if the result is zero
+ * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
+ * dec_return would have set the state to NMI_EXECUTING (what we want it
+ * to be when we are running). In this case, we simply jump back to
+ * rerun the NMI handler again, and restart the 'latched' NMI.
  *
  * No trap (breakpoint or page fault) should be hit before nmi_restart,
  * thus there is no race between the first check of state for NOT_RUNNING
@@ -461,49 +460,36 @@ enum nmi_states {
 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
 static DEFINE_PER_CPU(unsigned long, nmi_cr2);
 
-#define nmi_nesting_preprocess(regs)					\
-	do {								\
-		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\
-			this_cpu_write(nmi_state, NMI_LATCHED);		\
-			return;						\
-		}							\
-		this_cpu_write(nmi_state, NMI_EXECUTING);		\
-		this_cpu_write(nmi_cr2, read_cr2());			\
-	} while (0);							\
-	nmi_restart:
-
-#define nmi_nesting_postprocess()					\
-	do {								\
-		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\
-			write_cr2(this_cpu_read(nmi_cr2));		\
-		if (this_cpu_dec_return(nmi_state))			\
-			goto nmi_restart;				\
-	} while (0)
-#else /* x86_64 */
+#ifdef CONFIG_X86_64
 /*
- * In x86_64 things are a bit more difficult. This has the same problem
- * where an NMI hitting a breakpoint that calls iret will remove the
- * NMI context, allowing a nested NMI to enter. What makes this more
- * difficult is that both NMIs and breakpoints have their own stack.
- * When a new NMI or breakpoint is executed, the stack is set to a fixed
- * point. If an NMI is nested, it will have its stack set at that same
- * fixed address that the first NMI had, and will start corrupting the
- * stack. This is handled in entry_64.S, but the same problem exists with
- * the breakpoint stack.
+ * In x86_64, we need to handle breakpoint -> NMI -> breakpoint.  Without
+ * some care, the inner breakpoint will clobber the outer breakpoint's
+ * stack.
  *
- * If a breakpoint is being processed, and the debug stack is being used,
- * if an NMI comes in and also hits a breakpoint, the stack pointer
- * will be set to the same fixed address as the breakpoint that was
- * interrupted, causing that stack to be corrupted. To handle this case,
- * check if the stack that was interrupted is the debug stack, and if
- * so, change the IDT so that new breakpoints will use the current stack
- * and not switch to the fixed address. On return of the NMI, switch back
- * to the original IDT.
+ * If a breakpoint is being processed, and the debug stack is being
+ * used, if an NMI comes in and also hits a breakpoint, the stack
+ * pointer will be set to the same fixed address as the breakpoint that
+ * was interrupted, causing that stack to be corrupted. To handle this
+ * case, check if the stack that was interrupted is the debug stack, and
+ * if so, change the IDT so that new breakpoints will use the current
+ * stack and not switch to the fixed address. On return of the NMI,
+ * switch back to the original IDT.
  */
 static DEFINE_PER_CPU(int, update_debug_stack);
+#endif
 
-static inline void nmi_nesting_preprocess(struct pt_regs *regs)
+dotraplinkage notrace void
+do_nmi(struct pt_regs *regs, long error_code)
 {
+	if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
+		this_cpu_write(nmi_state, NMI_LATCHED);
+		return;
+	}
+	this_cpu_write(nmi_state, NMI_EXECUTING);
+	this_cpu_write(nmi_cr2, read_cr2());
+nmi_restart:
+
+#ifdef CONFIG_X86_64
 	/*
 	 * If we interrupted a breakpoint, it is possible that
 	 * the nmi handler will have breakpoints too. We need to
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
 		debug_stack_set_zero();
 		this_cpu_write(update_debug_stack, 1);
 	}
-}
-
-static inline void nmi_nesting_postprocess(void)
-{
-	if (unlikely(this_cpu_read(update_debug_stack))) {
-		debug_stack_reset();
-		this_cpu_write(update_debug_stack, 0);
-	}
-}
 #endif
 
-dotraplinkage notrace void
-do_nmi(struct pt_regs *regs, long error_code)
-{
-	nmi_nesting_preprocess(regs);
-
 	nmi_enter();
 
 	inc_irq_stat(__nmi_count);
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 	nmi_exit();
 
-	/* On i386, may loop back to preprocess */
-	nmi_nesting_postprocess();
+#ifdef CONFIG_X86_64
+	if (unlikely(this_cpu_read(update_debug_stack))) {
+		debug_stack_reset();
+		this_cpu_write(update_debug_stack, 0);
+	}
+#endif
+
+	if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
+		write_cr2(this_cpu_read(nmi_cr2));
+	if (this_cpu_dec_return(nmi_state))
+		goto nmi_restart;
 }
 NOKPROBE_SYMBOL(do_nmi);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9cad694..d83740a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -29,6 +29,7 @@
 #include <asm/debugreg.h>
 #include <asm/nmi.h>
 #include <asm/tlbflush.h>
+#include <asm/mce.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -81,7 +82,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
  */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	*dst = *src;
+	memcpy(dst, src, arch_task_struct_size);
 
 	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
 }
@@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy)
 	 */
 	set_cpu_online(smp_processor_id(), false);
 	disable_local_APIC();
+	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
 	for (;;)
 		halt();
@@ -408,6 +410,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
 static void mwait_idle(void)
 {
 	if (!current_set_polling_and_test()) {
+		trace_cpu_idle_rcuidle(1, smp_processor_id());
 		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
 			smp_mb(); /* quirk */
 			clflush((void *)&current_thread_info()->flags);
@@ -419,6 +422,7 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
+		trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	} else {
 		local_irq_enable();
 	}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71d7849..f6b9163 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
 void release_thread(struct task_struct *dead_task)
 {
 	if (dead_task->mm) {
-		if (dead_task->mm->context.size) {
+		if (dead_task->mm->context.ldt) {
 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
 				dead_task->comm,
 				dead_task->mm->context.ldt,
-				dead_task->mm->context.size);
+				dead_task->mm->context.ldt->size);
 			BUG();
 		}
 	}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 206996c..71820c4 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 		COPY(r15);
 #endif /* CONFIG_X86_64 */
 
+#ifdef CONFIG_X86_32
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
+		/* Kernel saves and restores only the CS segment register on signals,
+		 * which is the bare minimum needed to allow mixed 32/64-bit code.
+		 * App's signal handler can save/restore other segments if needed. */
+		COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 #else /* !CONFIG_X86_32 */
 		put_user_ex(regs->flags, &sc->flags);
 		put_user_ex(regs->cs, &sc->cs);
-		put_user_ex(0, &sc->__pad2);
-		put_user_ex(0, &sc->__pad1);
-		put_user_ex(regs->ss, &sc->ss);
+		put_user_ex(0, &sc->gs);
+		put_user_ex(0, &sc->fs);
 #endif /* CONFIG_X86_32 */
 
 		put_user_ex(fpstate, &sc->fpstate);
@@ -451,19 +457,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
 	regs->sp = (unsigned long)frame;
 
-	/*
-	 * Set up the CS and SS registers to run signal handlers in
-	 * 64-bit mode, even if the handler happens to be interrupting
-	 * 32-bit or 16-bit code.
-	 *
-	 * SS is subtle.  In 64-bit mode, we don't need any particular
-	 * SS descriptor, but we do need SS to be valid.  It's possible
-	 * that the old SS is entirely bogus -- this can happen if the
-	 * signal we're trying to deliver is #GP or #SS caused by a bad
-	 * SS value.
-	 */
+	/* Set up the CS register to run signal handlers in 64-bit mode,
+	   even if the handler happens to be interrupting 32-bit code. */
 	regs->cs = __USER_CS;
-	regs->ss = __USER_DS;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 15aaa69..12c8286 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
 #include <asm/proto.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
+#include <asm/mce.h>
 #include <asm/trace/irq_vectors.h>
 /*
  *	Some notes on x86 processor bugs affecting SMP operation:
@@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
 finish:
 	local_irq_save(flags);
 	disable_local_APIC();
+	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8add66b..b1f3ed9c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -171,11 +171,6 @@ static void smp_callin(void)
 	apic_ap_setup();
 
 	/*
-	 * Need to setup vector mappings before we enable interrupts.
-	 */
-	setup_vector_irq(smp_processor_id());
-
-	/*
 	 * Save our processor parameters. Note: this information
 	 * is needed for clock calibration.
 	 */
@@ -239,18 +234,13 @@ static void notrace start_secondary(void *unused)
 	check_tsc_sync_target();
 
 	/*
-	 * Enable the espfix hack for this CPU
-	 */
-#ifdef CONFIG_X86_ESPFIX64
-	init_espfix_ap();
-#endif
-
-	/*
-	 * We need to hold vector_lock so there the set of online cpus
-	 * does not change while we are assigning vectors to cpus.  Holding
-	 * this lock ensures we don't half assign or remove an irq from a cpu.
+	 * Lock vector_lock and initialize the vectors on this cpu
+	 * before setting the cpu online. We must set it online with
+	 * vector_lock held to prevent a concurrent setup/teardown
+	 * from seeing a half valid vector space.
 	 */
 	lock_vector_lock();
+	setup_vector_irq(smp_processor_id());
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
 	cpu_set_state_online(smp_processor_id());
@@ -854,6 +844,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
 
+	/*
+	 * Enable the espfix hack for this CPU
+	*/
+#ifdef CONFIG_X86_ESPFIX64
+	init_espfix_ap(cpu);
+#endif
+
 	/* So we see what's up */
 	announce_cpu(cpu, apicid);
 
@@ -995,8 +992,17 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 
 	common_cpu_up(cpu, tidle);
 
+	/*
+	 * We have to walk the irq descriptors to setup the vector
+	 * space for the cpu which comes online.  Prevent irq
+	 * alloc/free across the bringup.
+	 */
+	irq_lock_sparse();
+
 	err = do_boot_cpu(apicid, cpu, tidle);
+
 	if (err) {
+		irq_unlock_sparse();
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
 		return -EIO;
 	}
@@ -1014,6 +1020,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 		touch_nmi_watchdog();
 	}
 
+	irq_unlock_sparse();
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 9b4d51d..0ccb53a 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <asm/desc.h>
+#include <asm/mmu_context.h>
 
 unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
@@ -27,13 +28,14 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 		struct desc_struct *desc;
 		unsigned long base;
 
-		seg &= ~7UL;
+		seg >>= 3;
 
 		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
+		if (unlikely(!child->mm->context.ldt ||
+			     seg >= child->mm->context.ldt->size))
 			addr = -1L; /* bogus selector, access would fault */
 		else {
-			desc = child->mm->context.ldt + seg;
+			desc = &child->mm->context.ldt->entries[seg];
 			base = get_desc_base(desc);
 
 			/* 16-bit code segment? */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f579192..c5a5231 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -136,7 +136,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
 	preempt_count_add(HARDIRQ_OFFSET);
 
 	/* This code is a bit fragile.  Test it. */
-	rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
 
 	return prev_state;
 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 5054497..88e9a38 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -296,6 +296,14 @@ u64 native_sched_clock(void)
 	return cycles_2_ns(tsc_now);
 }
 
+/*
+ * Generate a sched_clock if you already have a TSC value.
+ */
+u64 native_sched_clock_from_tsc(u64 tsc)
+{
+	return cycles_2_ns(tsc);
+}
+
 /* We need to define a real function for sched_clock, to override the
    weak default version */
 #ifdef CONFIG_PARAVIRT
@@ -598,10 +606,19 @@ static unsigned long quick_pit_calibrate(void)
 			if (!pit_expect_msb(0xff-i, &delta, &d2))
 				break;
 
+			delta -= tsc;
+
+			/*
+			 * Extrapolate the error and fail fast if the error will
+			 * never be below 500 ppm.
+			 */
+			if (i == 1 &&
+			    d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
+				return 0;
+
 			/*
 			 * Iterate until the error is less than 500 ppm
 			 */
-			delta -= tsc;
 			if (d1+d2 >= delta >> 11)
 				continue;
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6647624..bf4db6e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -985,3 +985,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
 
 	return -1;
 }
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+				struct pt_regs *regs)
+{
+	if (ctx == RP_CHECK_CALL) /* sp was just decremented by "call" insn */
+		return regs->sp < ret->stack;
+	else
+		return regs->sp <= ret->stack;
+}
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215c..a1ff508 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,9 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
+			   hyperv.o
+
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o pmu_intel.o
 kvm-amd-y		+= svm.o pmu_amd.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 64dd467..2fbea25 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
 	vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
+	if (vcpu->arch.eager_fpu)
+		kvm_x86_ops->fpu_activate(vcpu);
 
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
new file mode 100644
index 0000000..a8160d2
--- /dev/null
+++ b/arch/x86/kvm/hyperv.c
@@ -0,0 +1,377 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
+ *   Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "x86.h"
+#include "lapic.h"
+#include "hyperv.h"
+
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+	bool r = false;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
+	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
+				     u32 index, u64 *pdata)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+		return -EINVAL;
+
+	*pdata = hv->hv_crash_param[index];
+	return 0;
+}
+
+static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	*pdata = hv->hv_crash_ctl;
+	return 0;
+}
+
+static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (host)
+		hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
+
+	if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
+
+		vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
+			  hv->hv_crash_param[0],
+			  hv->hv_crash_param[1],
+			  hv->hv_crash_param[2],
+			  hv->hv_crash_param[3],
+			  hv->hv_crash_param[4]);
+
+		/* Send notification about crash to user space */
+		kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
+	}
+
+	return 0;
+}
+
+static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
+				     u32 index, u64 data)
+{
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+		return -EINVAL;
+
+	hv->hv_crash_param[index] = data;
+	return 0;
+}
+
+static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
+			     bool host)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		hv->hv_guest_os_id = data;
+		/* setting guest os id to zero disables hypercall page */
+		if (!hv->hv_guest_os_id)
+			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+		break;
+	case HV_X64_MSR_HYPERCALL: {
+		u64 gfn;
+		unsigned long addr;
+		u8 instructions[4];
+
+		/* if guest os id is not set hypercall should remain disabled */
+		if (!hv->hv_guest_os_id)
+			break;
+		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+			hv->hv_hypercall = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		kvm_x86_ops->patch_hypercall(vcpu, instructions);
+		((unsigned char *)instructions)[3] = 0xc3; /* ret */
+		if (__copy_to_user((void __user *)addr, instructions, 4))
+			return 1;
+		hv->hv_hypercall = data;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+
+		memset(&tsc_ref, 0, sizeof(tsc_ref));
+		hv->hv_tsc_page = data;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+			break;
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		if (kvm_write_guest(
+				kvm,
+				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
+				&tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_hv_msr_set_crash_data(vcpu,
+						 msr - HV_X64_MSR_CRASH_P0,
+						 data);
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+			    msr, data);
+		return 1;
+	}
+	return 0;
+}
+
+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		u64 gfn;
+		unsigned long addr;
+
+		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+			hv->hv_vapic = data;
+			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+				return 1;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (__clear_user((void __user *)addr, PAGE_SIZE))
+			return 1;
+		hv->hv_vapic = data;
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+		if (kvm_lapic_enable_pv_eoi(vcpu,
+					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+			return 1;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+			    msr, data);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		data = hv->hv_guest_os_id;
+		break;
+	case HV_X64_MSR_HYPERCALL:
+		data = hv->hv_hypercall;
+		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		data =
+		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = hv->hv_tsc_page;
+		break;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_hv_msr_get_crash_data(vcpu,
+						 msr - HV_X64_MSR_CRASH_P0,
+						 pdata);
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+	switch (msr) {
+	case HV_X64_MSR_VP_INDEX: {
+		int r;
+		struct kvm_vcpu *v;
+
+		kvm_for_each_vcpu(r, v, vcpu->kvm) {
+			if (v == vcpu) {
+				data = r;
+				break;
+			}
+		}
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+	case HV_X64_MSR_APIC_ASSIST_PAGE:
+		data = hv->hv_vapic;
+		break;
+	default:
+		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
+{
+	if (kvm_hv_msr_partition_wide(msr)) {
+		int r;
+
+		mutex_lock(&vcpu->kvm->lock);
+		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
+		mutex_unlock(&vcpu->kvm->lock);
+		return r;
+	} else
+		return kvm_hv_set_msr(vcpu, msr, data);
+}
+
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	if (kvm_hv_msr_partition_wide(msr)) {
+		int r;
+
+		mutex_lock(&vcpu->kvm->lock);
+		r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
+		mutex_unlock(&vcpu->kvm->lock);
+		return r;
+	} else
+		return kvm_hv_get_msr(vcpu, msr, pdata);
+}
+
+bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+	u64 param, ingpa, outgpa, ret;
+	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	bool fast, longmode;
+
+	/*
+	 * hypercall generates UD from non zero cpl and real mode
+	 * per HYPER-V spec
+	 */
+	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	longmode = is_64_bit_mode(vcpu);
+
+	if (!longmode) {
+		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
+		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
+		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+
+	code = param & 0xffff;
+	fast = (param >> 16) & 0x1;
+	rep_cnt = (param >> 32) & 0xfff;
+	rep_idx = (param >> 48) & 0xfff;
+
+	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+	switch (code) {
+	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+		kvm_vcpu_on_spin(vcpu);
+		break;
+	default:
+		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		break;
+	}
+
+	ret = res | (((u64)rep_done & 0xfff) << 32);
+	if (longmode) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+	} else {
+		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+	}
+
+	return 1;
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
new file mode 100644
index 0000000..c7bce55
--- /dev/null
+++ b/arch/x86/kvm/hyperv.h
@@ -0,0 +1,32 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
+ *   Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __ARCH_X86_KVM_HYPERV_H__
+#define __ARCH_X86_KVM_HYPERV_H__
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+bool kvm_hv_hypercall_enabled(struct kvm *kvm);
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index fef922f..7cc2360 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -651,15 +651,10 @@ fail_unlock:
 	return NULL;
 }
 
-void kvm_destroy_pic(struct kvm *kvm)
+void kvm_destroy_pic(struct kvm_pic *vpic)
 {
-	struct kvm_pic *vpic = kvm->arch.vpic;
-
-	if (vpic) {
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
-		kvm->arch.vpic = NULL;
-		kfree(vpic);
-	}
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
+	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+	kfree(vpic);
 }
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index 7dbced3..5c520eb 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 			goto out_unmap;
 	}
 
+	kvm_arch_start_assignment(kvm);
 	pci_set_dev_assigned(pdev);
 
 	dev_info(&pdev->dev, "kvm assign device\n");
@@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 	iommu_detach_device(domain, &pdev->dev);
 
 	pci_clear_dev_assigned(pdev);
+	kvm_arch_end_assignment(kvm);
 
 	dev_info(&pdev->dev, "kvm deassign device\n");
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73..3d782a2 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -74,7 +74,7 @@ struct kvm_pic {
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_destroy_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm_pic *vpic);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 
@@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
-	int ret;
+	struct kvm_pic *vpic = pic_irqchip(kvm);
 
-	ret = (pic_irqchip(kvm) != NULL);
+	/* Read vpic before kvm->irq_routing.  */
 	smp_rmb();
-	return ret;
+	return vpic != NULL;
 }
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 954e98a..9a3e342 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1595,7 +1595,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic_update_lvtt(apic);
-	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
 		apic_set_reg(apic, APIC_LVT0,
 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 	apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
@@ -1900,8 +1900,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
 		return;
 
-	kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
-				sizeof(u32));
+	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+				  sizeof(u32)))
+		return;
 
 	apic_set_tpr(vcpu->arch.apic, data & 0xff);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 7195274..7640379 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 
 static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+	return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
 }
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f807496..fb16a8e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep)
 {
 	return ACCESS_ONCE(*sptep);
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-	/* It is valid if the spte is zapped. */
-	return spte == 0ull;
-}
 #else
 union split_spte {
 	struct {
@@ -478,23 +472,6 @@ retry:
 
 	return spte.spte;
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-	union split_spte sspte = (union split_spte)spte;
-	u32 high_mmio_mask = shadow_mmio_mask >> 32;
-
-	/* It is valid if the spte is zapped. */
-	if (spte == 0ull)
-		return true;
-
-	/* It is valid if the spte is being zapped. */
-	if (sspte.spte_low == 0ull &&
-	    (sspte.spte_high & high_mmio_mask) == high_mmio_mask)
-		return true;
-
-	return false;
-}
 #endif
 
 static bool spte_is_locklessly_modifiable(u64 spte)
@@ -2479,6 +2456,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return 0;
 }
 
+static bool kvm_is_mmio_pfn(pfn_t pfn)
+{
+	if (pfn_valid(pfn))
+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+
+	return true;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    unsigned pte_access, int level,
 		    gfn_t gfn, pfn_t pfn, bool speculative,
@@ -2506,7 +2491,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= PT_PAGE_SIZE_MASK;
 	if (tdp_enabled)
 		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-			kvm_is_reserved_pfn(pfn));
+			kvm_is_mmio_pfn(pfn));
 
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
@@ -3283,54 +3268,89 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
 	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
-static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
 {
-	if (direct)
-		return vcpu_match_mmio_gpa(vcpu, addr);
+	int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
 
-	return vcpu_match_mmio_gva(vcpu, addr);
+	return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+		((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
 }
 
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
 
-/*
- * On direct hosts, the last spte is only allows two states
- * for mmio page fault:
- *   - It is the mmio spte
- *   - It is zapped or it is being zapped.
- *
- * This function completely checks the spte when the last spte
- * is not the mmio spte.
- */
-static bool check_direct_spte_mmio_pf(u64 spte)
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
 {
-	return __check_direct_spte_mmio_pf(spte);
+	return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
 }
 
-static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
+static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+{
+	if (direct)
+		return vcpu_match_mmio_gpa(vcpu, addr);
+
+	return vcpu_match_mmio_gva(vcpu, addr);
+}
+
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	u64 spte = 0ull;
+	u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+	int root, leaf;
+	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return spte;
+		goto exit;
 
 	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
+
+	for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
+	     shadow_walk_okay(&iterator);
+	     __shadow_walk_next(&iterator, spte)) {
+		leaf = iterator.level;
+		spte = mmu_spte_get_lockless(iterator.sptep);
+
+		sptes[leaf - 1] = spte;
+
 		if (!is_shadow_present_pte(spte))
 			break;
+
+		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+						    leaf);
+	}
+
 	walk_shadow_page_lockless_end(vcpu);
 
-	return spte;
+	if (reserved) {
+		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+		       __func__, addr);
+		while (root >= leaf) {
+			pr_err("------ spte 0x%llx level %d.\n",
+			       sptes[root - 1], root);
+			root--;
+		}
+	}
+exit:
+	*sptep = spte;
+	return reserved;
 }
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	u64 spte;
+	bool reserved;
 
 	if (quickly_check_mmio_pf(vcpu, addr, direct))
 		return RET_MMIO_PF_EMULATE;
 
-	spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
+	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+	if (unlikely(reserved))
+		return RET_MMIO_PF_BUG;
 
 	if (is_mmio_spte(spte)) {
 		gfn_t gfn = get_mmio_spte_gfn(spte);
@@ -3348,13 +3368,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	}
 
 	/*
-	 * It's ok if the gva is remapped by other cpus on shadow guest,
-	 * it's a BUG if the gfn is not a mmio page.
-	 */
-	if (direct && !check_direct_spte_mmio_pf(spte))
-		return RET_MMIO_PF_BUG;
-
-	/*
 	 * If the page table is zapped by other cpus, let CPU fault again on
 	 * the address.
 	 */
@@ -3596,19 +3609,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
 #include "paging_tmpl.h"
 #undef PTTYPE
 
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+			struct rsvd_bits_validate *rsvd_check,
+			int maxphyaddr, int level, bool nx, bool gbpages,
+			bool pse)
 {
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 	u64 gbpages_bit_rsvd = 0;
 	u64 nonleaf_bit8_rsvd = 0;
 
-	context->bad_mt_xwr = 0;
+	rsvd_check->bad_mt_xwr = 0;
 
-	if (!context->nx)
+	if (!nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
-	if (!guest_cpuid_has_gbpages(vcpu))
+	if (!gbpages)
 		gbpages_bit_rsvd = rsvd_bits(7, 7);
 
 	/*
@@ -3618,80 +3633,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	if (guest_cpuid_is_amd(vcpu))
 		nonleaf_bit8_rsvd = rsvd_bits(8, 8);
 
-	switch (context->root_level) {
+	switch (level) {
 	case PT32_ROOT_LEVEL:
 		/* no rsvd bits for 2 level 4K page table entries */
-		context->rsvd_bits_mask[0][1] = 0;
-		context->rsvd_bits_mask[0][0] = 0;
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[0][1] = 0;
+		rsvd_check->rsvd_bits_mask[0][0] = 0;
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 
-		if (!is_pse(vcpu)) {
-			context->rsvd_bits_mask[1][1] = 0;
+		if (!pse) {
+			rsvd_check->rsvd_bits_mask[1][1] = 0;
 			break;
 		}
 
 		if (is_cpuid_PSE36())
 			/* 36bits PSE 4MB page */
-			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
 		else
 			/* 32 bits PSE 4MB page */
-			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
 		break;
 	case PT32E_ROOT_LEVEL:
-		context->rsvd_bits_mask[0][2] =
+		rsvd_check->rsvd_bits_mask[0][2] =
 			rsvd_bits(maxphyaddr, 63) |
 			rsvd_bits(5, 8) | rsvd_bits(1, 2);	/* PDPTE */
-		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);	/* PDE */
-		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62); 	/* PTE */
-		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62) |
 			rsvd_bits(13, 20);		/* large page */
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 		break;
 	case PT64_ROOT_LEVEL:
-		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
 			rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | gbpages_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
-		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-		context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[1][3] =
+			rsvd_check->rsvd_bits_mask[0][3];
+		rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
 			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 29);
-		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51) |
 			rsvd_bits(13, 20);		/* large page */
-		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
 		break;
 	}
 }
 
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *context, bool execonly)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+				cpuid_maxphyaddr(vcpu), context->root_level,
+				context->nx, guest_cpuid_has_gbpages(vcpu),
+				is_pse(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+			    int maxphyaddr, bool execonly)
 {
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	int pte;
 
-	context->rsvd_bits_mask[0][3] =
+	rsvd_check->rsvd_bits_mask[0][3] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-	context->rsvd_bits_mask[0][2] =
+	rsvd_check->rsvd_bits_mask[0][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	context->rsvd_bits_mask[0][1] =
+	rsvd_check->rsvd_bits_mask[0][1] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
 
 	/* large page */
-	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-	context->rsvd_bits_mask[1][2] =
+	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+	rsvd_check->rsvd_bits_mask[1][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
-	context->rsvd_bits_mask[1][1] =
+	rsvd_check->rsvd_bits_mask[1][1] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
-	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
 
 	for (pte = 0; pte < 64; pte++) {
 		int rwx_bits = pte & 7;
@@ -3699,10 +3729,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
 				rwx_bits == 0x2 || rwx_bits == 0x6 ||
 				(rwx_bits == 0x4 && !execonly))
-			context->bad_mt_xwr |= (1ull << pte);
+			rsvd_check->bad_mt_xwr |= (1ull << pte);
 	}
 }
 
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+				    cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+				boot_cpu_data.x86_phys_bits,
+				context->shadow_root_level, context->nx,
+				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu));
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context)
+{
+	if (guest_cpuid_is_amd(vcpu))
+		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+					boot_cpu_data.x86_phys_bits,
+					context->shadow_root_level, false,
+					cpu_has_gbpages, true);
+	else
+		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+					    boot_cpu_data.x86_phys_bits,
+					    false);
+
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+				    boot_cpu_data.x86_phys_bits, execonly);
+}
+
 static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 				      struct kvm_mmu *mmu, bool ept)
 {
@@ -3881,6 +3965,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
 	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
+	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3908,6 +3993,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	context->base_role.smap_andnot_wp
 		= smap && !is_write_protection(vcpu);
 	context->base_role.smm = is_smm(vcpu);
+	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -3931,6 +4017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
 
 	update_permission_bitmask(vcpu, context, true);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
@@ -4852,28 +4939,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	return nr_mmu_pages;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
-{
-	struct kvm_shadow_walk_iterator iterator;
-	u64 spte;
-	int nr_sptes = 0;
-
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return nr_sptes;
-
-	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
-		sptes[iterator.level-1] = spte;
-		nr_sptes++;
-		if (!is_shadow_present_pte(spte))
-			break;
-	}
-	walk_shadow_page_lockless_end(vcpu);
-
-	return nr_sptes;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
-
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 398d21c..e4202e4 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e)
 	return ((1ULL << (e - s + 1)) - 1) << s;
 }
 
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+
 /*
  * Return values of handle_mmio_page_fault_common:
  * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index de1d2d8..9e8bf13 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -120,6 +120,16 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
 	return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
 }
 
+static u8 mtrr_disabled_type(void)
+{
+	/*
+	 * Intel SDM 11.11.2.2: all MTRRs are disabled when
+	 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
+	 * memory type is applied to all of physical memory.
+	 */
+	return MTRR_TYPE_UNCACHABLE;
+}
+
 /*
 * Three terms are used in the following code:
 * - segment, it indicates the address segments covered by fixed MTRRs.
@@ -434,6 +444,8 @@ struct mtrr_iter {
 
 	/* output fields. */
 	int mem_type;
+	/* mtrr is completely disabled? */
+	bool mtrr_disabled;
 	/* [start, end) is not fully covered in MTRRs? */
 	bool partial_map;
 
@@ -549,7 +561,7 @@ static void mtrr_lookup_var_next(struct mtrr_iter *iter)
 static void mtrr_lookup_start(struct mtrr_iter *iter)
 {
 	if (!mtrr_is_enabled(iter->mtrr_state)) {
-		iter->partial_map = true;
+		iter->mtrr_disabled = true;
 		return;
 	}
 
@@ -563,6 +575,7 @@ static void mtrr_lookup_init(struct mtrr_iter *iter,
 	iter->mtrr_state = mtrr_state;
 	iter->start = start;
 	iter->end = end;
+	iter->mtrr_disabled = false;
 	iter->partial_map = false;
 	iter->fixed = false;
 	iter->range = NULL;
@@ -656,15 +669,19 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 		return MTRR_TYPE_WRBACK;
 	}
 
-	/* It is not covered by MTRRs. */
-	if (iter.partial_map) {
-		/*
-		 * We just check one page, partially covered by MTRRs is
-		 * impossible.
-		 */
-		WARN_ON(type != -1);
-		type = mtrr_default_type(mtrr_state);
-	}
+	if (iter.mtrr_disabled)
+		return mtrr_disabled_type();
+
+	/* not contained in any MTRRs. */
+	if (type == -1)
+		return mtrr_default_type(mtrr_state);
+
+	/*
+	 * We just check one page, partially covered by MTRRs is
+	 * impossible.
+	 */
+	WARN_ON(iter.partial_map);
+
 	return type;
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
@@ -689,6 +706,9 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 			return false;
 	}
 
+	if (iter.mtrr_disabled)
+		return true;
+
 	if (!iter.partial_map)
 		return true;
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0f67d7e..736e6ab 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 	*access &= mask;
 }
 
-static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
-{
-	int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
-
-	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
-		((mmu->bad_mt_xwr & (1ull << low6)) != 0);
-}
-
 static inline int FNAME(is_present_gpte)(unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
@@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  u64 gpte)
 {
-	if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+	if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
 		goto no_present;
 
 	if (!FNAME(is_present_gpte)(gpte))
@@ -353,8 +345,7 @@ retry_walk:
 		if (unlikely(!FNAME(is_present_gpte)(pte)))
 			goto error;
 
-		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte,
-					             walker->level))) {
+		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
 			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
 			goto error;
 		}
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 886aa25..39b9112 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	/* MSR_K7_PERFCTRn */
 	pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
 	if (pmc) {
-		if (!msr_info->host_initiated)
-			data = (s64)data;
 		pmc->counter += data - pmc_read_counter(pmc);
 		return 0;
 	}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 602b974..74d8257 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
+#define MTRR_TYPE_UC_MINUS	7
+#define MTRR2PROTVAL_INVALID 0xff
+
+static u8 mtrr2protval[8];
+
+static u8 fallback_mtrr_type(int mtrr)
+{
+	/*
+	 * WT and WP aren't always available in the host PAT.  Treat
+	 * them as UC and UC- respectively.  Everything else should be
+	 * there.
+	 */
+	switch (mtrr)
+	{
+	case MTRR_TYPE_WRTHROUGH:
+		return MTRR_TYPE_UNCACHABLE;
+	case MTRR_TYPE_WRPROT:
+		return MTRR_TYPE_UC_MINUS;
+	default:
+		BUG();
+	}
+}
+
+static void build_mtrr2protval(void)
+{
+	int i;
+	u64 pat;
+
+	for (i = 0; i < 8; i++)
+		mtrr2protval[i] = MTRR2PROTVAL_INVALID;
+
+	/* Ignore the invalid MTRR types.  */
+	mtrr2protval[2] = 0;
+	mtrr2protval[3] = 0;
+
+	/*
+	 * Use host PAT value to figure out the mapping from guest MTRR
+	 * values to nested page table PAT/PCD/PWT values.  We do not
+	 * want to change the host PAT value every time we enter the
+	 * guest.
+	 */
+	rdmsrl(MSR_IA32_CR_PAT, pat);
+	for (i = 0; i < 8; i++) {
+		u8 mtrr = pat >> (8 * i);
+
+		if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
+			mtrr2protval[mtrr] = __cm_idx2pte(i);
+	}
+
+	for (i = 0; i < 8; i++) {
+		if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
+			u8 fallback = fallback_mtrr_type(i);
+			mtrr2protval[i] = mtrr2protval[fallback];
+			BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
+		}
+	}
+}
+
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();
 
+	build_mtrr2protval();
 	return 0;
 
 err:
@@ -1085,6 +1144,43 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }
 
+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
+	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
+	 *
+	 * AMD does not have IPAT.  To emulate it for the case of guests
+	 * with no assigned devices, just set everything to WB.  If guests
+	 * have assigned devices, however, we cannot force WB for RAM
+	 * pages only, so use the guest PAT directly.
+	 */
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		*g_pat = 0x0606060606060606;
+	else
+		*g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	u8 mtrr;
+
+	/*
+	 * 1. MMIO: trust guest MTRR, so same as item 3.
+	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+	 * 3. Passthrough: can't guarantee the result, try to trust guest.
+	 */
+	if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
+		return 0;
+
+	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
+	    kvm_read_cr0(vcpu) & X86_CR0_CD)
+		return _PAGE_NOCACHE;
+
+	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+	return mtrr2protval[mtrr];
+}
+
 static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1180,6 +1276,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
+		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -1574,13 +1671,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-	/*
-	 * re-enable caching here because the QEMU bios
-	 * does not do it - this results in some delay at
-	 * reboot
-	 */
-	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
-		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
+	/* These are emulated via page tables.  */
+	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
@@ -2013,6 +2107,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
 	vcpu->arch.mmu.shadow_root_level = get_npt_level();
+	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
 
@@ -3254,6 +3349,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
+	case MSR_IA32_CR_PAT:
+		if (npt_enabled) {
+			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+				return 1;
+			vcpu->arch.pat = data;
+			svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
+			mark_dirty(svm->vmcb, VMCB_NPT);
+			break;
+		}
+		/* fall through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@@ -4088,11 +4193,6 @@ static bool svm_has_high_real_mode_segbase(void)
 	return true;
 }
 
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	return 0;
-}
-
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e856dd5..da1590e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
-		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
-		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
-		CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
-		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
+		CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
+		CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
+		CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
 	 * hardware. For example, L1 can specify an MSR bitmap - and we
@@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 	vmx_segment_cache_clear(to_vmx(vcpu));
 
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
-	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
 		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
 				     __func__);
 		vmcs_write32(GUEST_TR_AR_BYTES,
-			     (guest_tr_ar & ~AR_TYPE_MASK)
-			     | AR_TYPE_BUSY_64_TSS);
+			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
+			     | VMX_AR_TYPE_BUSY_64_TSS);
 	}
 	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
 }
@@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 		return 0;
 	else {
 		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
-		return AR_DPL(ar);
+		return VMX_AR_DPL(ar);
 	}
 }
 
@@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
 
 	if (cs.unusable)
 		return false;
-	if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
+	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
 		return false;
 	if (!cs.s)
 		return false;
-	if (cs.type & AR_TYPE_WRITEABLE_MASK) {
+	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
 		if (cs.dpl > cs_rpl)
 			return false;
 	} else {
@@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (!var.present)
 		return false;
-	if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
+	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
 		if (var.dpl < rpl) /* DPL < RPL */
 			return false;
 	}
@@ -5759,73 +5759,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
-static u64 ept_rsvd_mask(u64 spte, int level)
-{
-	int i;
-	u64 mask = 0;
-
-	for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
-		mask |= (1ULL << i);
-
-	if (level == 4)
-		/* bits 7:3 reserved */
-		mask |= 0xf8;
-	else if (spte & (1ULL << 7))
-		/*
-		 * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
-		 * level == 1 if the hypervisor is using the ignored bit 7.
-		 */
-		mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
-	else if (level > 1)
-		/* bits 6:3 reserved */
-		mask |= 0x78;
-
-	return mask;
-}
-
-static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
-				       int level)
-{
-	printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
-
-	/* 010b (write-only) */
-	WARN_ON((spte & 0x7) == 0x2);
-
-	/* 110b (write/execute) */
-	WARN_ON((spte & 0x7) == 0x6);
-
-	/* 100b (execute-only) and value not supported by logical processor */
-	if (!cpu_has_vmx_ept_execute_only())
-		WARN_ON((spte & 0x7) == 0x4);
-
-	/* not 000b */
-	if ((spte & 0x7)) {
-		u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
-
-		if (rsvd_bits != 0) {
-			printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
-					 __func__, rsvd_bits);
-			WARN_ON(1);
-		}
-
-		/* bits 5:3 are _not_ reserved for large page or leaf page */
-		if ((rsvd_bits & 0x38) == 0) {
-			u64 ept_mem_type = (spte & 0x38) >> 3;
-
-			if (ept_mem_type == 2 || ept_mem_type == 3 ||
-			    ept_mem_type == 7) {
-				printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
-						__func__, ept_mem_type);
-				WARN_ON(1);
-			}
-		}
-	}
-}
-
 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
-	u64 sptes[4];
-	int nr_sptes, i, ret;
+	int ret;
 	gpa_t gpa;
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -5846,13 +5782,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 		return 1;
 
 	/* It is the real ept misconfig */
-	printk(KERN_ERR "EPT: Misconfiguration.\n");
-	printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
-
-	nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
-
-	for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
-		ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
+	WARN_ON(1);
 
 	vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 	vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
@@ -6246,6 +6176,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
 	return handle_nop(vcpu);
 }
 
+static int handle_monitor_trap(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
 static int handle_monitor(struct kvm_vcpu *vcpu)
 {
 	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
@@ -6408,8 +6343,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
  */
 static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 				 unsigned long exit_qualification,
-				 u32 vmx_instruction_info, gva_t *ret)
+				 u32 vmx_instruction_info, bool wr, gva_t *ret)
 {
+	gva_t off;
+	bool exn;
+	struct kvm_segment s;
+
 	/*
 	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
 	 * Execution", on an exit, vmx_instruction_info holds most of the
@@ -6434,22 +6373,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
 	/* Addr = segment_base + offset */
 	/* offset = base + [index * scale] + displacement */
-	*ret = vmx_get_segment_base(vcpu, seg_reg);
+	off = exit_qualification; /* holds the displacement */
 	if (base_is_valid)
-		*ret += kvm_register_read(vcpu, base_reg);
+		off += kvm_register_read(vcpu, base_reg);
 	if (index_is_valid)
-		*ret += kvm_register_read(vcpu, index_reg)<<scaling;
-	*ret += exit_qualification; /* holds the displacement */
+		off += kvm_register_read(vcpu, index_reg)<<scaling;
+	vmx_get_segment(vcpu, &s, seg_reg);
+	*ret = s.base + off;
 
 	if (addr_size == 1) /* 32 bit */
 		*ret &= 0xffffffff;
 
-	/*
-	 * TODO: throw #GP (and return 1) in various cases that the VM*
-	 * instructions require it - e.g., offset beyond segment limit,
-	 * unusable or unreadable/unwritable segment, non-canonical 64-bit
-	 * address, and so on. Currently these are not checked.
-	 */
+	/* Checks for #GP/#SS exceptions. */
+	exn = false;
+	if (is_protmode(vcpu)) {
+		/* Protected mode: apply checks for segment validity in the
+		 * following order:
+		 * - segment type check (#GP(0) may be thrown)
+		 * - usability check (#GP(0)/#SS(0))
+		 * - limit check (#GP(0)/#SS(0))
+		 */
+		if (wr)
+			/* #GP(0) if the destination operand is located in a
+			 * read-only data segment or any code segment.
+			 */
+			exn = ((s.type & 0xa) == 0 || (s.type & 8));
+		else
+			/* #GP(0) if the source operand is located in an
+			 * execute-only code segment
+			 */
+			exn = ((s.type & 0xa) == 8);
+	}
+	if (exn) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+		return 1;
+	}
+	if (is_long_mode(vcpu)) {
+		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+		 * non-canonical form. This is an only check for long mode.
+		 */
+		exn = is_noncanonical_address(*ret);
+	} else if (is_protmode(vcpu)) {
+		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
+		 */
+		exn = (s.unusable != 0);
+		/* Protected mode: #GP(0)/#SS(0) if the memory
+		 * operand is outside the segment limit.
+		 */
+		exn = exn || (off + sizeof(u64) > s.limit);
+	}
+	if (exn) {
+		kvm_queue_exception_e(vcpu,
+				      seg_reg == VCPU_SREG_SS ?
+						SS_VECTOR : GP_VECTOR,
+				      0);
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -6471,7 +6451,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
 	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
@@ -6999,7 +6979,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 			field_value);
 	} else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
-				vmx_instruction_info, &gva))
+				vmx_instruction_info, true, &gva))
 			return 1;
 		/* _system ok, as nested_vmx_check_permission verified cpl=0 */
 		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
@@ -7036,7 +7016,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 			(((vmx_instruction_info) >> 3) & 0xf));
 	else {
 		if (get_vmx_mem_address(vcpu, exit_qualification,
-				vmx_instruction_info, &gva))
+				vmx_instruction_info, false, &gva))
 			return 1;
 		if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
 			   &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
@@ -7128,7 +7108,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (get_vmx_mem_address(vcpu, exit_qualification,
-			vmx_instruction_info, &vmcs_gva))
+			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
 	/* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
 	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
@@ -7184,7 +7164,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	 * operand is read even if it isn't needed (e.g., for type==global)
 	 */
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-			vmx_instruction_info, &gva))
+			vmx_instruction_info, false, &gva))
 		return 1;
 	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
 				sizeof(operand), &e)) {
@@ -7282,6 +7262,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
+	[EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
@@ -7542,6 +7523,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return true;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
+	case EXIT_REASON_MONITOR_TRAP_FLAG:
+		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
 	case EXIT_REASON_MONITOR_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
 	case EXIT_REASON_PAUSE_INSTRUCTION:
@@ -8632,22 +8615,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	u64 ipat = 0;
 
 	/* For VT-d and EPT combination
-	 * 1. MMIO: always map as UC
+	 * 1. MMIO: guest may want to apply WC, trust it.
 	 * 2. EPT with VT-d:
 	 *   a. VT-d without snooping control feature: can't guarantee the
-	 *	result, try to trust guest.
+	 *	result, try to trust guest.  So the same as item 1.
 	 *   b. VT-d with snooping control feature: snooping control feature of
 	 *	VT-d engine can guarantee the cache correctness. Just set it
 	 *	to WB to keep consistent with host. So the same as item 3.
 	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
 	 *    consistent with host MTRR
 	 */
-	if (is_mmio) {
-		cache = MTRR_TYPE_UNCACHABLE;
-		goto exit;
-	}
-
-	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+	if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
 		ipat = VMX_EPT_IPAT_BIT;
 		cache = MTRR_TYPE_WRBACK;
 		goto exit;
@@ -8655,7 +8633,10 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 
 	if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
 		ipat = VMX_EPT_IPAT_BIT;
-		cache = MTRR_TYPE_UNCACHABLE;
+		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+			cache = MTRR_TYPE_WRBACK;
+		else
+			cache = MTRR_TYPE_UNCACHABLE;
 		goto exit;
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbaf44e..4bbc2a1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
 #include "cpuid.h"
 #include "assigned-dev.h"
 #include "pmu.h"
+#include "hyperv.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -221,11 +222,9 @@ static void shared_msr_update(unsigned slot, u32 msr)
 void kvm_define_shared_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_NR_SHARED_MSRS);
+	shared_msrs_global.msrs[slot] = msr;
 	if (slot >= shared_msrs_global.nr)
 		shared_msrs_global.nr = slot + 1;
-	shared_msrs_global.msrs[slot] = msr;
-	/* we need ensured the shared_msr_global have been updated */
-	smp_wmb();
 }
 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
@@ -526,7 +525,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
 		if (is_present_gpte(pdpte[i]) &&
-		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
+		    (pdpte[i] &
+		     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
@@ -949,6 +949,8 @@ static u32 emulated_msrs[] = {
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
+	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -1217,11 +1219,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
 		 __func__, base_khz, scaled_khz, shift, *pmultiplier);
 }
 
-static inline u64 get_kernel_ns(void)
-{
-	return ktime_get_boot_ns();
-}
-
 #ifdef CONFIG_X86_64
 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 #endif
@@ -1869,123 +1866,6 @@ out:
 	return r;
 }
 
-static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
-{
-	return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
-}
-
-static bool kvm_hv_msr_partition_wide(u32 msr)
-{
-	bool r = false;
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-	case HV_X64_MSR_HYPERCALL:
-	case HV_X64_MSR_REFERENCE_TSC:
-	case HV_X64_MSR_TIME_REF_COUNT:
-		r = true;
-		break;
-	}
-
-	return r;
-}
-
-static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	struct kvm *kvm = vcpu->kvm;
-
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-		kvm->arch.hv_guest_os_id = data;
-		/* setting guest os id to zero disables hypercall page */
-		if (!kvm->arch.hv_guest_os_id)
-			kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
-		break;
-	case HV_X64_MSR_HYPERCALL: {
-		u64 gfn;
-		unsigned long addr;
-		u8 instructions[4];
-
-		/* if guest os id is not set hypercall should remain disabled */
-		if (!kvm->arch.hv_guest_os_id)
-			break;
-		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
-			kvm->arch.hv_hypercall = data;
-			break;
-		}
-		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
-		addr = gfn_to_hva(kvm, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		kvm_x86_ops->patch_hypercall(vcpu, instructions);
-		((unsigned char *)instructions)[3] = 0xc3; /* ret */
-		if (__copy_to_user((void __user *)addr, instructions, 4))
-			return 1;
-		kvm->arch.hv_hypercall = data;
-		mark_page_dirty(kvm, gfn);
-		break;
-	}
-	case HV_X64_MSR_REFERENCE_TSC: {
-		u64 gfn;
-		HV_REFERENCE_TSC_PAGE tsc_ref;
-		memset(&tsc_ref, 0, sizeof(tsc_ref));
-		kvm->arch.hv_tsc_page = data;
-		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-			break;
-		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
-			&tsc_ref, sizeof(tsc_ref)))
-			return 1;
-		mark_page_dirty(kvm, gfn);
-		break;
-	}
-	default:
-		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
-			    "data 0x%llx\n", msr, data);
-		return 1;
-	}
-	return 0;
-}
-
-static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	switch (msr) {
-	case HV_X64_MSR_APIC_ASSIST_PAGE: {
-		u64 gfn;
-		unsigned long addr;
-
-		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
-			vcpu->arch.hv_vapic = data;
-			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
-				return 1;
-			break;
-		}
-		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
-		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		if (__clear_user((void __user *)addr, PAGE_SIZE))
-			return 1;
-		vcpu->arch.hv_vapic = data;
-		kvm_vcpu_mark_page_dirty(vcpu, gfn);
-		if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
-			return 1;
-		break;
-	}
-	case HV_X64_MSR_EOI:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
-	case HV_X64_MSR_ICR:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
-	case HV_X64_MSR_TPR:
-		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
-	default:
-		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
-			    "data 0x%llx\n", msr, data);
-		return 1;
-	}
-
-	return 0;
-}
-
 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
@@ -2105,7 +1985,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (guest_cpuid_has_tsc_adjust(vcpu)) {
 			if (!msr_info->host_initiated) {
 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
-				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
+				adjust_tsc_offset_guest(vcpu, adj);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
 		}
@@ -2224,15 +2104,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 */
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-		if (kvm_hv_msr_partition_wide(msr)) {
-			int r;
-			mutex_lock(&vcpu->kvm->lock);
-			r = set_msr_hyperv_pw(vcpu, msr, data);
-			mutex_unlock(&vcpu->kvm->lock);
-			return r;
-		} else
-			return set_msr_hyperv(vcpu, msr, data);
-		break;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_set_msr_common(vcpu, msr, data,
+					     msr_info->host_initiated);
 	case MSR_IA32_BBL_CR_CTL3:
 		/* Drop writes to this legacy MSR -- see rdmsr
 		 * counterpart for further detail.
@@ -2315,68 +2190,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
-static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 data = 0;
-	struct kvm *kvm = vcpu->kvm;
-
-	switch (msr) {
-	case HV_X64_MSR_GUEST_OS_ID:
-		data = kvm->arch.hv_guest_os_id;
-		break;
-	case HV_X64_MSR_HYPERCALL:
-		data = kvm->arch.hv_hypercall;
-		break;
-	case HV_X64_MSR_TIME_REF_COUNT: {
-		data =
-		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
-		break;
-	}
-	case HV_X64_MSR_REFERENCE_TSC:
-		data = kvm->arch.hv_tsc_page;
-		break;
-	default:
-		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
-		return 1;
-	}
-
-	*pdata = data;
-	return 0;
-}
-
-static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 data = 0;
-
-	switch (msr) {
-	case HV_X64_MSR_VP_INDEX: {
-		int r;
-		struct kvm_vcpu *v;
-		kvm_for_each_vcpu(r, v, vcpu->kvm) {
-			if (v == vcpu) {
-				data = r;
-				break;
-			}
-		}
-		break;
-	}
-	case HV_X64_MSR_EOI:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
-	case HV_X64_MSR_ICR:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
-	case HV_X64_MSR_TPR:
-		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
-	case HV_X64_MSR_APIC_ASSIST_PAGE:
-		data = vcpu->arch.hv_vapic;
-		break;
-	default:
-		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
-		return 1;
-	}
-	*pdata = data;
-	return 0;
-}
-
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	switch (msr_info->index) {
@@ -2493,14 +2306,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0x20000000;
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
-		if (kvm_hv_msr_partition_wide(msr_info->index)) {
-			int r;
-			mutex_lock(&vcpu->kvm->lock);
-			r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
-			mutex_unlock(&vcpu->kvm->lock);
-			return r;
-		} else
-			return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_hv_get_msr_common(vcpu,
+					     msr_info->index, &msr_info->data);
 		break;
 	case MSR_IA32_BBL_CR_CTL3:
 		/* This legacy MSR exists but isn't fully documented in current
@@ -2651,6 +2460,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_TSC_DEADLINE_TIMER:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
+	case KVM_CAP_SET_BOOT_CPU_ID:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3157,8 +2967,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 			cpuid_count(XSTATE_CPUID, index,
 				    &size, &offset, &ecx, &edx);
 			memcpy(dest, src + offset, size);
-		} else
-			WARN_ON_ONCE(1);
+		}
 
 		valid -= feature;
 	}
@@ -3818,30 +3627,25 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = kvm_ioapic_init(kvm);
 			if (r) {
 				mutex_lock(&kvm->slots_lock);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_master);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_slave);
-				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev_eclr);
+				kvm_destroy_pic(vpic);
 				mutex_unlock(&kvm->slots_lock);
-				kfree(vpic);
 				goto create_irqchip_unlock;
 			}
 		} else
 			goto create_irqchip_unlock;
-		smp_wmb();
-		kvm->arch.vpic = vpic;
-		smp_wmb();
 		r = kvm_setup_default_irq_routing(kvm);
 		if (r) {
 			mutex_lock(&kvm->slots_lock);
 			mutex_lock(&kvm->irq_lock);
 			kvm_ioapic_destroy(kvm);
-			kvm_destroy_pic(kvm);
+			kvm_destroy_pic(vpic);
 			mutex_unlock(&kvm->irq_lock);
 			mutex_unlock(&kvm->slots_lock);
+			goto create_irqchip_unlock;
 		}
+		/* Write kvm->irq_routing before kvm->arch.vpic.  */
+		smp_wmb();
+		kvm->arch.vpic = vpic;
 	create_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
@@ -3968,6 +3772,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_reinject(kvm, &control);
 		break;
 	}
+	case KVM_SET_BOOT_CPU_ID:
+		r = 0;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) != 0)
+			r = -EBUSY;
+		else
+			kvm->arch.bsp_vcpu_id = arg;
+		mutex_unlock(&kvm->lock);
+		break;
 	case KVM_XEN_HVM_CONFIG: {
 		r = -EFAULT;
 		if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
@@ -5883,66 +5696,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
-{
-	u64 param, ingpa, outgpa, ret;
-	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
-	bool fast, longmode;
-
-	/*
-	 * hypercall generates UD from non zero cpl and real mode
-	 * per HYPER-V spec
-	 */
-	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 0;
-	}
-
-	longmode = is_64_bit_mode(vcpu);
-
-	if (!longmode) {
-		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
-		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
-		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
-			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
-	}
-#ifdef CONFIG_X86_64
-	else {
-		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
-		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
-		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
-	}
-#endif
-
-	code = param & 0xffff;
-	fast = (param >> 16) & 0x1;
-	rep_cnt = (param >> 32) & 0xfff;
-	rep_idx = (param >> 48) & 0xfff;
-
-	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
-
-	switch (code) {
-	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
-		kvm_vcpu_on_spin(vcpu);
-		break;
-	default:
-		res = HV_STATUS_INVALID_HYPERCALL_CODE;
-		break;
-	}
-
-	ret = res | (((u64)rep_done & 0xfff) << 32);
-	if (longmode) {
-		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
-	} else {
-		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
-		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
-	}
-
-	return 1;
-}
-
 /*
  * kvm_pv_kick_cpu_op:  Kick a vcpu.
  *
@@ -6328,6 +6081,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 static void process_smi(struct kvm_vcpu *vcpu)
 {
 	struct kvm_segment cs, ds;
+	struct desc_ptr dt;
 	char buf[512];
 	u32 cr0;
 
@@ -6360,6 +6114,10 @@ static void process_smi(struct kvm_vcpu *vcpu)
 
 	kvm_x86_ops->set_cr4(vcpu, 0);
 
+	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
+	dt.address = dt.size = 0;
+	kvm_x86_ops->set_idt(vcpu, &dt);
+
 	__kvm_set_dr(vcpu, 7, DR7_FIXED_1);
 
 	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
@@ -6514,6 +6272,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
 			kvm_vcpu_reload_apic_access_page(vcpu);
+		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+			r = 0;
+			goto out;
+		}
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7315,11 +7079,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu = kvm_x86_ops->vcpu_create(kvm, id);
 
-	/*
-	 * Activate fpu unconditionally in case the guest needs eager FPU.  It will be
-	 * deactivated soon if it doesn't.
-	 */
-	kvm_x86_ops->fpu_activate(vcpu);
 	return vcpu;
 }
 
@@ -7541,6 +7300,17 @@ void kvm_arch_check_processor_compat(void *rtn)
 	kvm_x86_ops->check_processor_compatibility(rtn);
 }
 
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
+
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 {
 	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
@@ -8218,6 +7988,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+void kvm_arch_start_assignment(struct kvm *kvm)
+{
+	atomic_inc(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
+
+void kvm_arch_end_assignment(struct kvm *kvm)
+{
+	atomic_dec(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+
+bool kvm_arch_has_assigned_device(struct kvm *kvm)
+{
+	return atomic_read(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+
 void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 {
 	atomic_inc(&kvm->arch.noncoherent_dma_count);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index edc8cdc..2f822cd 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,16 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
 	return kvm_register_write(vcpu, reg, val);
 }
 
+static inline u64 get_kernel_ns(void)
+{
+	return ktime_get_boot_ns();
+}
+
+static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
+{
+	return !(kvm->arch.disabled_quirks & quirk);
+}
+
 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index ddf9ecb..e342586 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -20,7 +20,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	unsigned long ret;
 
 	if (__range_not_ok(from, n, TASK_SIZE))
-		return 0;
+		return n;
 
 	/*
 	 * Even though this function is typically called from NMI/IRQ context
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index f37e84a..3d8f2e4 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -29,7 +29,6 @@
 
 #include <asm/uaccess.h>
 #include <asm/traps.h>
-#include <asm/desc.h>
 #include <asm/user.h>
 #include <asm/fpu/internal.h>
 
@@ -181,7 +180,7 @@ void math_emulate(struct math_emu_info *info)
 			math_abort(FPU_info, SIGILL);
 		}
 
-		code_descriptor = LDT_DESCRIPTOR(FPU_CS);
+		code_descriptor = FPU_get_ldt_descriptor(FPU_CS);
 		if (SEG_D_SIZE(code_descriptor)) {
 			/* The above test may be wrong, the book is not clear */
 			/* Segmented 32 bit protected mode */
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 9ccecb6..5e044d5 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -16,9 +16,24 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 
-/* s is always from a cpu register, and the cpu does bounds checking
- * during register load --> no further bounds checks needed */
-#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg)
+{
+	static struct desc_struct zero_desc;
+	struct desc_struct ret = zero_desc;
+
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+	seg >>= 3;
+	mutex_lock(&current->mm->context.lock);
+	if (current->mm->context.ldt && seg < current->mm->context.ldt->size)
+		ret = current->mm->context.ldt->entries[seg];
+	mutex_unlock(&current->mm->context.lock);
+#endif
+	return ret;
+}
+
 #define SEG_D_SIZE(x)		((x).b & (3 << 21))
 #define SEG_G_BIT(x)		((x).b & (1 << 23))
 #define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 6ef5e99..8300db7 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -20,7 +20,6 @@
 #include <linux/stddef.h>
 
 #include <asm/uaccess.h>
-#include <asm/desc.h>
 
 #include "fpu_system.h"
 #include "exception.h"
@@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment,
 		addr->selector = PM_REG_(segment);
 	}
 
-	descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+	descriptor = FPU_get_ldt_descriptor(addr->selector);
 	base_address = SEG_BASE_ADDR(descriptor);
 	address = base_address + offset;
 	limit = base_address
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index cc5ccc4..b9c78f3 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -63,8 +63,6 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
 		    !PageReserved(pfn_to_page(start_pfn + i)))
 			return 1;
 
-	WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn);
-
 	return 0;
 }
 
@@ -94,7 +92,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	pgprot_t prot;
 	int retval;
 	void __iomem *ret_addr;
-	int ram_region;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -117,23 +114,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
-	/* First check if whole region can be identified as RAM or not */
-	ram_region = region_is_ram(phys_addr, size);
-	if (ram_region > 0) {
-		WARN_ONCE(1, "ioremap on RAM at 0x%lx - 0x%lx\n",
-				(unsigned long int)phys_addr,
-				(unsigned long int)last_addr);
+	pfn      = phys_addr >> PAGE_SHIFT;
+	last_pfn = last_addr >> PAGE_SHIFT;
+	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
+					  __ioremap_check_ram) == 1) {
+		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
+			  &phys_addr, &last_addr);
 		return NULL;
 	}
 
-	/* If could not be identified(-1), check page by page */
-	if (ram_region < 0) {
-		pfn      = phys_addr >> PAGE_SHIFT;
-		last_pfn = last_addr >> PAGE_SHIFT;
-		if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
-					  __ioremap_check_ram) == 1)
-			return NULL;
-	}
 	/*
 	 * Mappings have to be page-aligned
 	 */
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 4860906..9ce5da2 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) "kasan: " fmt
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
 #include <linux/kdebug.h>
@@ -11,8 +12,6 @@
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
 
-extern unsigned char kasan_zero_page[PAGE_SIZE];
-
 static int __init map_range(struct range *range)
 {
 	unsigned long start;
@@ -36,7 +35,7 @@ static void __init clear_pgds(unsigned long start,
 		pgd_clear(pgd_offset_k(start));
 }
 
-void __init kasan_map_early_shadow(pgd_t *pgd)
+static void __init kasan_map_early_shadow(pgd_t *pgd)
 {
 	int i;
 	unsigned long start = KASAN_SHADOW_START;
@@ -49,106 +48,6 @@ void __init kasan_map_early_shadow(pgd_t *pgd)
 	}
 }
 
-static int __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
-				unsigned long end)
-{
-	pte_t *pte = pte_offset_kernel(pmd, addr);
-
-	while (addr + PAGE_SIZE <= end) {
-		WARN_ON(!pte_none(*pte));
-		set_pte(pte, __pte(__pa_nodebug(kasan_zero_page)
-					| __PAGE_KERNEL_RO));
-		addr += PAGE_SIZE;
-		pte = pte_offset_kernel(pmd, addr);
-	}
-	return 0;
-}
-
-static int __init zero_pmd_populate(pud_t *pud, unsigned long addr,
-				unsigned long end)
-{
-	int ret = 0;
-	pmd_t *pmd = pmd_offset(pud, addr);
-
-	while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) {
-		WARN_ON(!pmd_none(*pmd));
-		set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte)
-					| __PAGE_KERNEL_RO));
-		addr += PMD_SIZE;
-		pmd = pmd_offset(pud, addr);
-	}
-	if (addr < end) {
-		if (pmd_none(*pmd)) {
-			void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
-			if (!p)
-				return -ENOMEM;
-			set_pmd(pmd, __pmd(__pa_nodebug(p) | _KERNPG_TABLE));
-		}
-		ret = zero_pte_populate(pmd, addr, end);
-	}
-	return ret;
-}
-
-
-static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
-				unsigned long end)
-{
-	int ret = 0;
-	pud_t *pud = pud_offset(pgd, addr);
-
-	while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) {
-		WARN_ON(!pud_none(*pud));
-		set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd)
-					| __PAGE_KERNEL_RO));
-		addr += PUD_SIZE;
-		pud = pud_offset(pgd, addr);
-	}
-
-	if (addr < end) {
-		if (pud_none(*pud)) {
-			void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
-			if (!p)
-				return -ENOMEM;
-			set_pud(pud, __pud(__pa_nodebug(p) | _KERNPG_TABLE));
-		}
-		ret = zero_pmd_populate(pud, addr, end);
-	}
-	return ret;
-}
-
-static int __init zero_pgd_populate(unsigned long addr, unsigned long end)
-{
-	int ret = 0;
-	pgd_t *pgd = pgd_offset_k(addr);
-
-	while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) {
-		WARN_ON(!pgd_none(*pgd));
-		set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud)
-					| __PAGE_KERNEL_RO));
-		addr += PGDIR_SIZE;
-		pgd = pgd_offset_k(addr);
-	}
-
-	if (addr < end) {
-		if (pgd_none(*pgd)) {
-			void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
-			if (!p)
-				return -ENOMEM;
-			set_pgd(pgd, __pgd(__pa_nodebug(p) | _KERNPG_TABLE));
-		}
-		ret = zero_pud_populate(pgd, addr, end);
-	}
-	return ret;
-}
-
-
-static void __init populate_zero_shadow(const void *start, const void *end)
-{
-	if (zero_pgd_populate((unsigned long)start, (unsigned long)end))
-		panic("kasan: unable to map zero shadow!");
-}
-
-
 #ifdef CONFIG_KASAN_INLINE
 static int kasan_die_handler(struct notifier_block *self,
 			     unsigned long val,
@@ -166,6 +65,26 @@ static struct notifier_block kasan_die_notifier = {
 };
 #endif
 
+void __init kasan_early_init(void)
+{
+	int i;
+	pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL;
+	pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE;
+	pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		kasan_zero_pte[i] = __pte(pte_val);
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		kasan_zero_pmd[i] = __pmd(pmd_val);
+
+	for (i = 0; i < PTRS_PER_PUD; i++)
+		kasan_zero_pud[i] = __pud(pud_val);
+
+	kasan_map_early_shadow(early_level4_pgt);
+	kasan_map_early_shadow(init_level4_pgt);
+}
+
 void __init kasan_init(void)
 {
 	int i;
@@ -176,10 +95,11 @@ void __init kasan_init(void)
 
 	memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
 	load_cr3(early_level4_pgt);
+	__flush_tlb_all();
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
-	populate_zero_shadow((void *)KASAN_SHADOW_START,
+	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 			kasan_mem_to_shadow((void *)PAGE_OFFSET));
 
 	for (i = 0; i < E820_X_MAX; i++) {
@@ -189,18 +109,22 @@ void __init kasan_init(void)
 		if (map_range(&pfn_mapped[i]))
 			panic("kasan: unable to allocate shadow!");
 	}
-	populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-			kasan_mem_to_shadow((void *)__START_KERNEL_map));
+	kasan_populate_zero_shadow(
+		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+		kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
 	vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
 			(unsigned long)kasan_mem_to_shadow(_end),
 			NUMA_NO_NODE);
 
-	populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
 			(void *)KASAN_SHADOW_END);
 
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 
 	load_cr3(init_level4_pgt);
+	__flush_tlb_all();
 	init_task.kasan_depth = 0;
+
+	pr_info("Kernel address sanitizer initialized\n");
 }
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 9d518d6..844b06d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_MPX)
+		return "[mpx]";
+	return NULL;
+}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 7a657f5..db1b0bc 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -20,20 +20,6 @@
 #define CREATE_TRACE_POINTS
 #include <asm/trace/mpx.h>
 
-static const char *mpx_mapping_name(struct vm_area_struct *vma)
-{
-	return "[mpx]";
-}
-
-static struct vm_operations_struct mpx_vma_ops = {
-	.name = mpx_mapping_name,
-};
-
-static int is_mpx_vma(struct vm_area_struct *vma)
-{
-	return (vma->vm_ops == &mpx_vma_ops);
-}
-
 static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
 {
 	if (is_64bit_mm(mm))
@@ -53,9 +39,6 @@ static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
 /*
  * This is really a simplified "vm_mmap". it only handles MPX
  * bounds tables (the bounds directory is user-allocated).
- *
- * Later on, we use the vma->vm_ops to uniquely identify these
- * VMAs.
  */
 static unsigned long mpx_mmap(unsigned long len)
 {
@@ -101,7 +84,6 @@ static unsigned long mpx_mmap(unsigned long len)
 		ret = -ENOMEM;
 		goto out;
 	}
-	vma->vm_ops = &mpx_vma_ops;
 
 	if (vm_flags & VM_LOCKED) {
 		up_write(&mm->mmap_sem);
@@ -812,7 +794,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
 		 * so stop immediately and return an error.  This
 		 * probably results in a SIGSEGV.
 		 */
-		if (!is_mpx_vma(vma))
+		if (!(vma->vm_flags & VM_MPX))
 			return -EINVAL;
 
 		len = min(vma->vm_end, end) - addr;
@@ -945,9 +927,9 @@ static int try_unmap_single_bt(struct mm_struct *mm,
 	 * lots of tables even though we have no actual table
 	 * entries in use.
 	 */
-	while (next && is_mpx_vma(next))
+	while (next && (next->vm_flags & VM_MPX))
 		next = next->vm_next;
-	while (prev && is_mpx_vma(prev))
+	while (prev && (prev->vm_flags & VM_MPX))
 		prev = prev->vm_prev;
 	/*
 	 * We know 'start' and 'end' lie within an area controlled
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3250f23..90b924a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -117,7 +117,7 @@ static void flush_tlb_func(void *info)
 		} else {
 			unsigned long addr;
 			unsigned long nr_pages =
-				f->flush_end - f->flush_start / PAGE_SIZE;
+				(f->flush_end - f->flush_start) / PAGE_SIZE;
 			addr = f->flush_start;
 			while (addr < f->flush_end) {
 				__flush_tlb_single(addr);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 579a8fd..be2e7a2 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
 	      offsetof(struct bpf_array, map.max_entries));
 	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
-#define OFFSET1 44 /* number of bytes to jump */
+#define OFFSET1 47 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
 
@@ -278,15 +278,15 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 */
 	EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 33
+#define OFFSET2 36
 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
 	label2 = cnt;
 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 	EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
 
 	/* prog = array->prog[index]; */
-	EMIT4(0x48, 0x8D, 0x44, 0xD6);            /* lea rax, [rsi + rdx * 8 + 0x50] */
-	EMIT1(offsetof(struct bpf_array, prog));
+	EMIT4_off32(0x48, 0x8D, 0x84, 0xD6,       /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+		    offsetof(struct bpf_array, prog));
 	EMIT3(0x48, 0x8B, 0x00);                  /* mov rax, qword ptr [rax] */
 
 	/* if (prog == NULL)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 8fd6f44..09d3afc 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -166,7 +166,6 @@ void pcibios_fixup_bus(struct pci_bus *b)
 {
 	struct pci_dev *dev;
 
-	pci_read_bridge_bases(b);
 	list_for_each_entry(dev, &b->devices, bus_list)
 		pcibios_fixup_device_resources(dev);
 }
@@ -673,24 +672,22 @@ int pcibios_add_device(struct pci_dev *dev)
 	return 0;
 }
 
-int pcibios_enable_device(struct pci_dev *dev, int mask)
+int pcibios_alloc_irq(struct pci_dev *dev)
 {
-	int err;
-
-	if ((err = pci_enable_resources(dev, mask)) < 0)
-		return err;
-
-	if (!pci_dev_msi_enabled(dev))
-		return pcibios_enable_irq(dev);
-	return 0;
+	return pcibios_enable_irq(dev);
 }
 
-void pcibios_disable_device (struct pci_dev *dev)
+void pcibios_free_irq(struct pci_dev *dev)
 {
-	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+	if (pcibios_disable_irq)
 		pcibios_disable_irq(dev);
 }
 
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	return pci_enable_resources(dev, mask);
+}
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 9a2b710..e585655 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -62,19 +62,6 @@ static void pci_fixup_umc_ide(struct pci_dev *d)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
 
-static void pci_fixup_ncr53c810(struct pci_dev *d)
-{
-	/*
-	 * NCR 53C810 returns class code 0 (at least on some systems).
-	 * Fix class to be PCI_CLASS_STORAGE_SCSI
-	 */
-	if (!d->class) {
-		dev_warn(&d->dev, "Fixing NCR 53C810 class code\n");
-		d->class = PCI_CLASS_STORAGE_SCSI << 8;
-	}
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810);
-
 static void pci_fixup_latency(struct pci_dev *d)
 {
 	/*
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 2706230..22aaefb 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -211,7 +211,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	struct irq_alloc_info info;
 	int polarity;
 
-	if (dev->irq_managed && dev->irq > 0)
+	if (pci_has_managed_irq(dev))
 		return 0;
 
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
@@ -234,10 +234,13 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
-	    dev->irq > 0) {
+	if (pci_has_managed_irq(dev)) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
+		/*
+		 * Don't reset dev->irq here, otherwise
+		 * intel_mid_pci_irq_enable() will fail on next call.
+		 */
 	}
 }
 
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 9bd1154..32e7034 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			struct pci_dev *temp_dev;
 			int irq;
 
-			if (dev->irq_managed && dev->irq > 0)
+			if (pci_has_managed_irq(dev))
 				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,8 +1230,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				dev->irq_managed = 1;
-				dev->irq = irq;
+				pci_set_managed_irq(dev, irq);
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
 				return 0;
@@ -1257,24 +1256,10 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	return 0;
 }
 
-bool mp_should_keep_irq(struct device *dev)
-{
-	if (dev->power.is_prepared)
-		return true;
-#ifdef CONFIG_PM
-	if (dev->power.runtime_status == RPM_SUSPENDING)
-		return true;
-#endif
-
-	return false;
-}
-
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
-	    dev->irq_managed && dev->irq) {
+	if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
 		mp_unmap_irq(dev->irq);
-		dev->irq = 0;
-		dev->irq_managed = 0;
+		pci_reset_managed_irq(dev);
 	}
 }
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index cfba30f..e4308fe 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -972,6 +972,11 @@ u64 efi_mem_attributes(unsigned long phys_addr)
 
 static int __init arch_parse_efi_cmdline(char *str)
 {
+	if (!str) {
+		pr_warn("need at least one option\n");
+		return -EINVAL;
+	}
+
 	if (parse_option_str(str, "old_map"))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 	if (parse_option_str(str, "debug"))
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 0d7dd1f..9ab5279 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -22,6 +22,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/debugreg.h>
 #include <asm/cpu.h>
+#include <asm/mmu_context.h>
 
 #ifdef CONFIG_X86_32
 __visible unsigned long saved_context_ebx;
@@ -153,7 +154,7 @@ static void fix_processor_context(void)
 	syscall_init();				/* This sets MSR_*STAR and related */
 #endif
 	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
+	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
 }
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
new file mode 100644
index 0000000..10fea5f
--- /dev/null
+++ b/arch/x86/ras/Kconfig
@@ -0,0 +1,11 @@
+config AMD_MCE_INJ
+	tristate "Simple MCE injection interface for AMD processors"
+	depends on RAS && EDAC_DECODE_MCE && DEBUG_FS
+	default n
+	help
+	  This is a simple debugfs interface to inject MCEs and test different
+	  aspects of the MCE handling code.
+
+	  WARNING: Do not even assume this interface is staying stable!
+
+
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
new file mode 100644
index 0000000..dd2c98b
--- /dev/null
+++ b/arch/x86/ras/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_AMD_MCE_INJ)		+= mce_amd_inj.o
+
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
new file mode 100644
index 0000000..17e35b5
--- /dev/null
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -0,0 +1,375 @@
+/*
+ * A simple MCE injection facility for testing different aspects of the RAS
+ * code. This driver should be built as module so that it can be loaded
+ * on production kernels for testing purposes.
+ *
+ * This file may be distributed under the terms of the GNU General Public
+ * License version 2.
+ *
+ * Copyright (c) 2010-15:  Borislav Petkov <bp@alien8.de>
+ *			Advanced Micro Devices Inc.
+ */
+
+#include <linux/kobject.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <asm/mce.h>
+
+#include "../kernel/cpu/mcheck/mce-internal.h"
+
+/*
+ * Collect all the MCi_XXX settings
+ */
+static struct mce i_mce;
+static struct dentry *dfs_inj;
+
+static u8 n_banks;
+
+#define MAX_FLAG_OPT_SIZE	3
+
+enum injection_type {
+	SW_INJ = 0,	/* SW injection, simply decode the error */
+	HW_INJ,		/* Trigger a #MC */
+	N_INJ_TYPES,
+};
+
+static const char * const flags_options[] = {
+	[SW_INJ] = "sw",
+	[HW_INJ] = "hw",
+	NULL
+};
+
+/* Set default injection to SW_INJ */
+static enum injection_type inj_type = SW_INJ;
+
+#define MCE_INJECT_SET(reg)						\
+static int inj_##reg##_set(void *data, u64 val)				\
+{									\
+	struct mce *m = (struct mce *)data;				\
+									\
+	m->reg = val;							\
+	return 0;							\
+}
+
+MCE_INJECT_SET(status);
+MCE_INJECT_SET(misc);
+MCE_INJECT_SET(addr);
+
+#define MCE_INJECT_GET(reg)						\
+static int inj_##reg##_get(void *data, u64 *val)			\
+{									\
+	struct mce *m = (struct mce *)data;				\
+									\
+	*val = m->reg;							\
+	return 0;							\
+}
+
+MCE_INJECT_GET(status);
+MCE_INJECT_GET(misc);
+MCE_INJECT_GET(addr);
+
+DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
+
+/*
+ * Caller needs to be make sure this cpu doesn't disappear
+ * from under us, i.e.: get_cpu/put_cpu.
+ */
+static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
+{
+	u32 l, h;
+	int err;
+
+	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
+	if (err) {
+		pr_err("%s: error reading HWCR\n", __func__);
+		return err;
+	}
+
+	enable ? (l |= BIT(18)) : (l &= ~BIT(18));
+
+	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
+	if (err)
+		pr_err("%s: error writing HWCR\n", __func__);
+
+	return err;
+}
+
+static int __set_inj(const char *buf)
+{
+	int i;
+
+	for (i = 0; i < N_INJ_TYPES; i++) {
+		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+			inj_type = i;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+static ssize_t flags_read(struct file *filp, char __user *ubuf,
+			  size_t cnt, loff_t *ppos)
+{
+	char buf[MAX_FLAG_OPT_SIZE];
+	int n;
+
+	n = sprintf(buf, "%s\n", flags_options[inj_type]);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
+}
+
+static ssize_t flags_write(struct file *filp, const char __user *ubuf,
+			   size_t cnt, loff_t *ppos)
+{
+	char buf[MAX_FLAG_OPT_SIZE], *__buf;
+	int err;
+	size_t ret;
+
+	if (cnt > MAX_FLAG_OPT_SIZE)
+		cnt = MAX_FLAG_OPT_SIZE;
+
+	ret = cnt;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt - 1] = 0;
+
+	/* strip whitespace */
+	__buf = strstrip(buf);
+
+	err = __set_inj(__buf);
+	if (err) {
+		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
+		return err;
+	}
+
+	*ppos += ret;
+
+	return ret;
+}
+
+static const struct file_operations flags_fops = {
+	.read           = flags_read,
+	.write          = flags_write,
+	.llseek         = generic_file_llseek,
+};
+
+/*
+ * On which CPU to inject?
+ */
+MCE_INJECT_GET(extcpu);
+
+static int inj_extcpu_set(void *data, u64 val)
+{
+	struct mce *m = (struct mce *)data;
+
+	if (val >= nr_cpu_ids || !cpu_online(val)) {
+		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
+		return -EINVAL;
+	}
+	m->extcpu = val;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
+
+static void trigger_mce(void *info)
+{
+	asm volatile("int $18");
+}
+
+static void do_inject(void)
+{
+	u64 mcg_status = 0;
+	unsigned int cpu = i_mce.extcpu;
+	u8 b = i_mce.bank;
+
+	if (i_mce.misc)
+		i_mce.status |= MCI_STATUS_MISCV;
+
+	if (inj_type == SW_INJ) {
+		mce_inject_log(&i_mce);
+		return;
+	}
+
+	/* prep MCE global settings for the injection */
+	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
+
+	if (!(i_mce.status & MCI_STATUS_PCC))
+		mcg_status |= MCG_STATUS_RIPV;
+
+	get_online_cpus();
+	if (!cpu_online(cpu))
+		goto err;
+
+	toggle_hw_mce_inject(cpu, true);
+
+	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
+		     (u32)mcg_status, (u32)(mcg_status >> 32));
+
+	wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
+		     (u32)i_mce.status, (u32)(i_mce.status >> 32));
+
+	wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
+		     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+
+	wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
+		     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+
+	toggle_hw_mce_inject(cpu, false);
+
+	smp_call_function_single(cpu, trigger_mce, NULL, 0);
+
+err:
+	put_online_cpus();
+
+}
+
+/*
+ * This denotes into which bank we're injecting and triggers
+ * the injection, at the same time.
+ */
+static int inj_bank_set(void *data, u64 val)
+{
+	struct mce *m = (struct mce *)data;
+
+	if (val >= n_banks) {
+		pr_err("Non-existent MCE bank: %llu\n", val);
+		return -EINVAL;
+	}
+
+	m->bank = val;
+	do_inject();
+
+	return 0;
+}
+
+MCE_INJECT_GET(bank);
+
+DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
+
+static const char readme_msg[] =
+"Description of the files and their usages:\n"
+"\n"
+"Note1: i refers to the bank number below.\n"
+"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
+"as they mirror the hardware registers.\n"
+"\n"
+"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
+"\t attributes of the error which caused the MCE.\n"
+"\n"
+"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
+"\t used for error thresholding purposes and its validity is indicated by\n"
+"\t MCi_STATUS[MiscV].\n"
+"\n"
+"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
+"\t associated with the error.\n"
+"\n"
+"cpu:\t The CPU to inject the error on.\n"
+"\n"
+"bank:\t Specify the bank you want to inject the error into: the number of\n"
+"\t banks in a processor varies and is family/model-specific, therefore, the\n"
+"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
+"\t injection.\n"
+"\n"
+"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
+"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
+"\t for AMD processors.\n"
+"\n"
+"\t Allowed error injection types:\n"
+"\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
+"\t    format only. Safe to use.\n"
+"\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
+"\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
+"\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
+"\t    before injecting.\n"
+"\n";
+
+static ssize_t
+inj_readme_read(struct file *filp, char __user *ubuf,
+		       size_t cnt, loff_t *ppos)
+{
+	return simple_read_from_buffer(ubuf, cnt, ppos,
+					readme_msg, strlen(readme_msg));
+}
+
+static const struct file_operations readme_fops = {
+	.read		= inj_readme_read,
+};
+
+static struct dfs_node {
+	char *name;
+	struct dentry *d;
+	const struct file_operations *fops;
+	umode_t perm;
+} dfs_fls[] = {
+	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
+	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
+	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
+	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
+	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
+	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
+	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
+};
+
+static int __init init_mce_inject(void)
+{
+	int i;
+	u64 cap;
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	n_banks = cap & MCG_BANKCNT_MASK;
+
+	dfs_inj = debugfs_create_dir("mce-inject", NULL);
+	if (!dfs_inj)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
+		dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
+						    dfs_fls[i].perm,
+						    dfs_inj,
+						    &i_mce,
+						    dfs_fls[i].fops);
+
+		if (!dfs_fls[i].d)
+			goto err_dfs_add;
+	}
+
+	return 0;
+
+err_dfs_add:
+	while (--i >= 0)
+		debugfs_remove(dfs_fls[i].d);
+
+	debugfs_remove(dfs_inj);
+	dfs_inj = NULL;
+
+	return -ENOMEM;
+}
+
+static void __exit exit_mce_inject(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
+		debugfs_remove(dfs_fls[i].d);
+
+	memset(&dfs_fls, 0, sizeof(dfs_fls));
+
+	debugfs_remove(dfs_inj);
+	dfs_inj = NULL;
+}
+module_init(init_mce_inject);
+module_exit(exit_mce_inject);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
+MODULE_AUTHOR("AMD Inc.");
+MODULE_DESCRIPTION("MCE injection facility for RAS testing");
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e88fda8..4841453 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -8,7 +8,7 @@ config XEN
 	select PARAVIRT_CLOCK
 	select XEN_HAVE_PVMMU
 	depends on X86_64 || (X86_32 && X86_PAE)
-	depends on X86_TSC
+	depends on X86_LOCAL_APIC && X86_TSC
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
@@ -17,7 +17,7 @@ config XEN
 config XEN_DOM0
 	def_bool y
 	depends on XEN && PCI_XEN && SWIOTLB_XEN
-	depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
+	depends on X86_IO_APIC && ACPI && PCI
 
 config XEN_PVHVM
 	def_bool y
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7322755..4b6e29a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,13 +13,13 @@ CFLAGS_mmu.o			:= $(nostackp)
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
 			grant-table.o suspend.o platform-pci-unplug.o \
-			p2m.o
+			p2m.o apic.o
 
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
-obj-$(CONFIG_XEN_DOM0)		+= apic.o vga.o
+obj-$(CONFIG_XEN_DOM0)		+= vga.o
 obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)		+= efi.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0b95c9b..11d6fb4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
 	pte_t pte;
 	unsigned long pfn;
 	struct page *page;
+	unsigned char dummy;
 
 	ptep = lookup_address((unsigned long)v, &level);
 	BUG_ON(ptep == NULL);
@@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot)
 
 	pte = pfn_pte(pfn, prot);
 
+	/*
+	 * Careful: update_va_mapping() will fail if the virtual address
+	 * we're poking isn't populated in the page tables.  We don't
+	 * need to worry about the direct map (that's always in the page
+	 * tables), but we need to be careful about vmap space.  In
+	 * particular, the top level page table can lazily propagate
+	 * entries between processes, so if we've switched mms since we
+	 * vmapped the target in the first place, we might not have the
+	 * top-level page table entry populated.
+	 *
+	 * We disable preemption because we want the same mm active when
+	 * we probe the target and when we issue the hypercall.  We'll
+	 * have the same nominal mm, but if we're a kernel thread, lazy
+	 * mm dropping could change our pgd.
+	 *
+	 * Out of an abundance of caution, this uses __get_user() to fault
+	 * in the target address just in case there's some obscure case
+	 * in which the target address isn't readable.
+	 */
+
+	preempt_disable();
+
+	pagefault_disable();	/* Avoid warnings due to being atomic. */
+	__get_user(dummy, (unsigned char __user __force *)v);
+	pagefault_enable();
+
 	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
 		BUG();
 
@@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot)
 				BUG();
 	} else
 		kmap_flush_unused();
+
+	preempt_enable();
 }
 
 static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
@@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 	int i;
 
+	/*
+	 * We need to mark the all aliases of the LDT pages RO.  We
+	 * don't need to call vm_flush_aliases(), though, since that's
+	 * only responsible for flushing aliases out the TLBs, not the
+	 * page tables, and Xen will flush the TLB for us if needed.
+	 *
+	 * To avoid confusing future readers: none of this is necessary
+	 * to load the LDT.  The hypervisor only checks this when the
+	 * LDT is faulted in due to subsequent descriptor access.
+	 */
+
 	for(i = 0; i < entries; i += entries_per_page)
 		set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index c20fe29..2292721 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -101,17 +101,15 @@ struct dom0_vga_console_info;
 
 #ifdef CONFIG_XEN_DOM0
 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
-void __init xen_init_apic(void);
 #else
 static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
 				       size_t size)
 {
 }
-static inline void __init xen_init_apic(void)
-{
-}
 #endif
 
+void __init xen_init_apic(void);
+
 #ifdef CONFIG_XEN_EFI
 extern void xen_efi_init(void);
 #else
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e5b872b..3bd3504 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -14,12 +14,15 @@ config XTENSA
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
+	select HAVE_DMA_API_DEBUG
+	select HAVE_DMA_ATTRS
 	select HAVE_FUNCTION_TRACER
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
+	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
@@ -61,9 +64,7 @@ config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
 config MMU
-	bool
-	default n if !XTENSA_VARIANT_CUSTOM
-	default XTENSA_VARIANT_MMU if XTENSA_VARIANT_CUSTOM
+	def_bool n
 
 config VARIANT_IRQ_SWITCH
 	def_bool n
@@ -71,9 +72,6 @@ config VARIANT_IRQ_SWITCH
 config HAVE_XTENSA_GPIO32
 	def_bool n
 
-config MAY_HAVE_SMP
-	def_bool n
-
 menu "Processor type and features"
 
 choice
@@ -100,7 +98,6 @@ config XTENSA_VARIANT_DC233C
 
 config XTENSA_VARIANT_CUSTOM
 	bool "Custom Xtensa processor configuration"
-	select MAY_HAVE_SMP
 	select HAVE_XTENSA_GPIO32
 	help
 	  Select this variant to use a custom Xtensa processor configuration.
@@ -126,10 +123,21 @@ config XTENSA_VARIANT_MMU
 	bool "Core variant has a Full MMU (TLB, Pages, Protection, etc)"
 	depends on XTENSA_VARIANT_CUSTOM
 	default y
+	select MMU
 	help
 	  Build a Conventional Kernel with full MMU support,
 	  ie: it supports a TLB with auto-loading, page protection.
 
+config XTENSA_VARIANT_HAVE_PERF_EVENTS
+	bool "Core variant has Performance Monitor Module"
+	depends on XTENSA_VARIANT_CUSTOM
+	default n
+	help
+	  Enable if core variant has Performance Monitor Module with
+	  External Registers Interface.
+
+	  If unsure, say N.
+
 config XTENSA_UNALIGNED_USER
 	bool "Unaligned memory access in use space"
 	help
@@ -143,7 +151,7 @@ source "kernel/Kconfig.preempt"
 
 config HAVE_SMP
 	bool "System Supports SMP (MX)"
-	depends on MAY_HAVE_SMP
+	depends on XTENSA_VARIANT_CUSTOM
 	select XTENSA_MX
 	help
 	  This option is use to indicate that the system-on-a-chip (SOC)
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 14d15bf..63c223d 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -2,7 +2,6 @@ generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += clkdev.h
 generic-y += cputime.h
-generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += errno.h
@@ -19,6 +18,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += resource.h
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 00b7d46..ebcd1f6 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -29,7 +29,7 @@
  *
  * Locking interrupts looks like this:
  *
- *    rsil a15, LOCKLEVEL
+ *    rsil a15, TOPLEVEL
  *    <code>
  *    wsr  a15, PS
  *    rsync
@@ -106,7 +106,7 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"\
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"\
 			"       l32i    %0, %2, 0\n"			\
 			"       " #op " %0, %0, %1\n"			\
 			"       s32i    %0, %2, 0\n"			\
@@ -124,7 +124,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	unsigned int vval;						\
 									\
 	__asm__ __volatile__(						\
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"	\
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
 			"       l32i    %0, %2, 0\n"			\
 			"       " #op " %0, %0, %1\n"			\
 			"       s32i    %0, %2, 0\n"			\
@@ -272,7 +272,7 @@ static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 	unsigned int vval;
 
 	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %2, 0\n"
 			"       xor     %1, %4, %3\n"
 			"       and     %0, %0, %4\n"
@@ -306,7 +306,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 	unsigned int vval;
 
 	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %2, 0\n"
 			"       or      %0, %0, %1\n"
 			"       s32i    %0, %2, 0\n"
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 370b26f..201e900 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -34,7 +34,7 @@ __cmpxchg_u32(volatile int *p, int old, int new)
 	return new;
 #else
 	__asm__ __volatile__(
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %1, 0\n"
 			"       bne     %0, %2, 1f\n"
 			"       s32i    %3, %1, 0\n"
@@ -123,7 +123,7 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 #else
 	unsigned long tmp;
 	__asm__ __volatile__(
-			"       rsil    a15, "__stringify(LOCKLEVEL)"\n"
+			"       rsil    a15, "__stringify(TOPLEVEL)"\n"
 			"       l32i    %0, %1, 0\n"
 			"       s32i    %2, %1, 0\n"
 			"       wsr     a15, ps\n"
diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h
new file mode 100644
index 0000000..fe1f5c8
--- /dev/null
+++ b/arch/xtensa/include/asm/device.h
@@ -0,0 +1,19 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#ifndef _ASM_XTENSA_DEVICE_H
+#define _ASM_XTENSA_DEVICE_H
+
+struct dma_map_ops;
+
+struct dev_archdata {
+	/* DMA operations on that device */
+	struct dma_map_ops *dma_ops;
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_XTENSA_DEVICE_H */
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 1f5f6dc..f01cb30 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -1,11 +1,10 @@
 /*
- * include/asm-xtensa/dma-mapping.h
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2003 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_DMA_MAPPING_H
@@ -13,142 +12,67 @@
 
 #include <asm/cache.h>
 #include <asm/io.h>
+
+#include <asm-generic/dma-coherent.h>
+
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
-/*
- * DMA-consistent mapping functions.
- */
-
-extern void *consistent_alloc(int, size_t, dma_addr_t, unsigned long);
-extern void consistent_free(void*, size_t, dma_addr_t);
-extern void consistent_sync(void*, size_t, int);
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flag);
+extern struct dma_map_ops xtensa_dma_map_ops;
 
-void dma_free_coherent(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle);
-
-static inline dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
-	       enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-	consistent_sync(ptr, size, direction);
-	return virt_to_phys(ptr);
-}
-
-static inline void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 enum dma_data_direction direction)
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
-	BUG_ON(direction == DMA_NONE);
+	if (dev && dev->archdata.dma_ops)
+		return dev->archdata.dma_ops;
+	else
+		return &xtensa_dma_map_ops;
 }
 
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
-	   enum dma_data_direction direction)
-{
-	int i;
-	struct scatterlist *sg;
-
-	BUG_ON(direction == DMA_NONE);
-
-	for_each_sg(sglist, sg, nents, i) {
-		BUG_ON(!sg_page(sg));
+#include <asm-generic/dma-mapping-common.h>
 
-		sg->dma_address = sg_phys(sg);
-		consistent_sync(sg_virt(sg), sg->length, direction);
-	}
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
 
-	return nents;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
-	     size_t size, enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-	return (dma_addr_t)(page_to_pfn(page)) * PAGE_SIZE + offset;
-}
-
-static inline void
-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-	       enum dma_data_direction direction)
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t gfp,
+				    struct dma_attrs *attrs)
 {
-	BUG_ON(direction == DMA_NONE);
-}
+	void *ret;
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+		return ret;
 
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-	     enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-}
-
-static inline void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	consistent_sync((void *)bus_to_virt(dma_handle), size, direction);
-}
+	ret = ops->alloc(dev, size, dma_handle, gfp, attrs);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
 
-static inline void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-		           size_t size, enum dma_data_direction direction)
-{
-	consistent_sync((void *)bus_to_virt(dma_handle), size, direction);
+	return ret;
 }
 
-static inline void
-dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
-{
-
-	consistent_sync((void *)bus_to_virt(dma_handle)+offset,size,direction);
-}
-
-static inline void
-dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dma_handle,
+				  struct dma_attrs *attrs)
 {
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-	consistent_sync((void *)bus_to_virt(dma_handle)+offset,size,direction);
-}
-static inline void
-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, int nelems,
-		 enum dma_data_direction dir)
-{
-	int i;
-	struct scatterlist *sg;
+	if (dma_release_from_coherent(dev, get_order(size), vaddr))
+		return;
 
-	for_each_sg(sglist, sg, nelems, i)
-		consistent_sync(sg_virt(sg), sg->length, dir);
+	ops->free(dev, size, vaddr, dma_handle, attrs);
+	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
 }
 
-static inline void
-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
-		       int nelems, enum dma_data_direction dir)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sglist, sg, nelems, i)
-		consistent_sync(sg_virt(sg), sg->length, dir);
-}
 static inline int
 dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-	return 0;
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	debug_dma_mapping_error(dev, dma_addr);
+	return ops->mapping_error(dev, dma_addr);
 }
 
 static inline int
@@ -168,39 +92,7 @@ dma_set_mask(struct device *dev, u64 mask)
 	return 0;
 }
 
-static inline void
-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
-	       enum dma_data_direction direction)
-{
-	consistent_sync(vaddr, size, direction);
-}
-
-/* Not supported for now */
-static inline int dma_mmap_coherent(struct device *dev,
-				    struct vm_area_struct *vma, void *cpu_addr,
-				    dma_addr_t dma_addr, size_t size)
-{
-	return -EINVAL;
-}
-
-static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
-				  void *cpu_addr, dma_addr_t dma_addr,
-				  size_t size)
-{
-	return -EINVAL;
-}
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
-				    dma_addr_t *dma_handle, gfp_t flag,
-				    struct dma_attrs *attrs)
-{
-	return NULL;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
-				  void *vaddr, dma_addr_t dma_handle,
-				  struct dma_attrs *attrs)
-{
-}
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		    enum dma_data_direction direction);
 
 #endif	/* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
index ea36674..8e090c7 100644
--- a/arch/xtensa/include/asm/irqflags.h
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -6,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_IRQFLAGS_H
@@ -23,8 +24,27 @@ static inline unsigned long arch_local_save_flags(void)
 static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
-	asm volatile("rsil %0, "__stringify(LOCKLEVEL)
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	unsigned long tmp;
+
+	asm volatile("rsr	%0, ps\t\n"
+		     "extui	%1, %0, 0, 4\t\n"
+		     "bgei	%1, "__stringify(LOCKLEVEL)", 1f\t\n"
+		     "rsil	%0, "__stringify(LOCKLEVEL)"\n"
+		     "1:"
+		     : "=a" (flags), "=a" (tmp) :: "memory");
+#else
+	asm volatile("rsr	%0, ps\t\n"
+		     "or	%0, %0, %1\t\n"
+		     "xsr	%0, ps\t\n"
+		     "rsync"
+		     : "=&a" (flags) : "a" (LOCKLEVEL) : "memory");
+#endif
+#else
+	asm volatile("rsil	%0, "__stringify(LOCKLEVEL)
 		     : "=a" (flags) :: "memory");
+#endif
 	return flags;
 }
 
diff --git a/arch/xtensa/include/asm/mm-arch-hooks.h b/arch/xtensa/include/asm/mm-arch-hooks.h
deleted file mode 100644
index d2e5cfd..0000000
--- a/arch/xtensa/include/asm/mm-arch-hooks.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Architecture specific mm hooks
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_XTENSA_MM_ARCH_HOOKS_H
-#define _ASM_XTENSA_MM_ARCH_HOOKS_H
-
-#endif /* _ASM_XTENSA_MM_ARCH_HOOKS_H */
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index b61bdf0..83e2e4bc 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -1,11 +1,10 @@
 /*
- * include/asm-xtensa/processor.h
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
 #ifndef _XTENSA_PROCESSOR_H
@@ -45,6 +44,14 @@
 #define STACK_TOP_MAX	STACK_TOP
 
 /*
+ * General exception cause assigned to fake NMI. Fake NMI needs to be handled
+ * differently from other interrupts, but it uses common kernel entry/exit
+ * code.
+ */
+
+#define EXCCAUSE_MAPPED_NMI	62
+
+/*
  * General exception cause assigned to debug exceptions. Debug exceptions go
  * to their own vector, rather than the general exception vectors (user,
  * kernel, double); and their specific causes are reported via DEBUGCAUSE
@@ -65,10 +72,30 @@
 
 #define VALID_DOUBLE_EXCEPTION_ADDRESS	64
 
+#define XTENSA_INT_LEVEL(intno) _XTENSA_INT_LEVEL(intno)
+#define _XTENSA_INT_LEVEL(intno) XCHAL_INT##intno##_LEVEL
+
+#define XTENSA_INTLEVEL_MASK(level) _XTENSA_INTLEVEL_MASK(level)
+#define _XTENSA_INTLEVEL_MASK(level) (XCHAL_INTLEVEL##level##_MASK)
+
+#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
+
+#define PROFILING_INTLEVEL XTENSA_INT_LEVEL(XCHAL_PROFILING_INTERRUPT)
+
 /* LOCKLEVEL defines the interrupt level that masks all
  * general-purpose interrupts.
  */
+#if defined(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) && \
+	defined(XCHAL_PROFILING_INTERRUPT) && \
+	PROFILING_INTLEVEL == XCHAL_EXCM_LEVEL && \
+	XCHAL_EXCM_LEVEL > 1 && \
+	IS_POW2(XTENSA_INTLEVEL_MASK(PROFILING_INTLEVEL))
+#define LOCKLEVEL (XCHAL_EXCM_LEVEL - 1)
+#else
 #define LOCKLEVEL XCHAL_EXCM_LEVEL
+#endif
+#define TOPLEVEL XCHAL_EXCM_LEVEL
+#define XTENSA_FAKE_NMI (LOCKLEVEL < TOPLEVEL)
 
 /* WSBITS and WBBITS are the width of the WINDOWSTART and WINDOWBASE
  * registers
diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
index 6a05fcb..fe06e8e 100644
--- a/arch/xtensa/include/asm/stacktrace.h
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -33,4 +33,12 @@ void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
 		void *data);
 
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data);
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data);
+
 #endif /* _XTENSA_STACKTRACE_H */
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 677bfcf..28f33a8 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -25,30 +25,39 @@ static inline void spill_registers(void)
 {
 #if XCHAL_NUM_AREGS > 16
 	__asm__ __volatile__ (
-		"	call12	1f\n"
+		"	call8	1f\n"
 		"	_j	2f\n"
 		"	retw\n"
 		"	.align	4\n"
 		"1:\n"
+#if XCHAL_NUM_AREGS == 32
+		"	_entry	a1, 32\n"
+		"	addi	a8, a0, 3\n"
+		"	_entry	a1, 16\n"
+		"	mov	a12, a12\n"
+		"	retw\n"
+#else
 		"	_entry	a1, 48\n"
-		"	addi	a12, a0, 3\n"
-#if XCHAL_NUM_AREGS > 32
-		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n"
+		"	call12	1f\n"
+		"	retw\n"
+		"	.align	4\n"
+		"1:\n"
+		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n"
 		"	_entry	a1, 48\n"
 		"	mov	a12, a0\n"
 		"	.endr\n"
-#endif
-		"	_entry	a1, 48\n"
+		"	_entry	a1, 16\n"
 #if XCHAL_NUM_AREGS % 12 == 0
-		"	mov	a8, a8\n"
-#elif XCHAL_NUM_AREGS % 12 == 4
 		"	mov	a12, a12\n"
-#elif XCHAL_NUM_AREGS % 12 == 8
+#elif XCHAL_NUM_AREGS % 12 == 4
 		"	mov	a4, a4\n"
+#elif XCHAL_NUM_AREGS % 12 == 8
+		"	mov	a8, a8\n"
 #endif
 		"	retw\n"
+#endif
 		"2:\n"
-		: : : "a12", "a13", "memory");
+		: : : "a8", "a9", "memory");
 #else
 	__asm__ __volatile__ (
 		"	mov	a12, a12\n"
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index d3a0f0f..50137bc 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
 obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
 obj-$(CONFIG_SMP) += smp.o mxhead.o
+obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
 
 AFLAGS_head.o += -mtext-section-literals
 
@@ -27,10 +28,11 @@ AFLAGS_head.o += -mtext-section-literals
 #
 # Replicate rules in scripts/Makefile.build
 
-sed-y = -e 's/\*(\(\.[a-z]*it\|\.ref\|\)\.text)/*(\1.literal \1.text)/g' \
-	-e 's/\.text\.unlikely/.literal.unlikely .text.unlikely/g'	 \
-	-e 's/\*(\(\.text .*\))/*(.literal \1)/g'			 \
-	-e 's/\*(\(\.text\.[a-z]*\))/*(\1.literal \1)/g'
+sed-y = -e ':a; s/\*(\([^)]*\)\.text\.unlikely/*(\1.literal.unlikely .{text}.unlikely/; ta; ' \
+	-e ':b; s/\*(\([^)]*\)\.text\(\.[a-z]*\)/*(\1.{text}\2.literal .{text}\2/; tb; ' \
+	-e ':c; s/\*(\([^)]*\)\(\.[a-z]*it\|\.ref\)\.text/*(\1\2.literal \2.{text}/; tc; ' \
+	-e ':d; s/\*(\([^)]\+ \|\)\.text/*(\1.literal .{text}/; td; ' \
+	-e 's/\.{text}/.text/g'
 
 quiet_cmd__cpp_lds_S = LDS     $@
 cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $<    \
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 82bbfa5..5041303 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/entry.S
- *
  * Low-level exception handling
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -8,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2004 - 2008 by Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Chris Zankel <chris@zankel.net>
  *
@@ -75,6 +74,27 @@
 #endif
 	.endm
 
+
+	.macro	irq_save flags tmp
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	rsr	\flags, ps
+	extui	\tmp, \flags, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	\tmp, LOCKLEVEL, 99f
+	rsil	\tmp, LOCKLEVEL
+99:
+#else
+	movi	\tmp, LOCKLEVEL
+	rsr	\flags, ps
+	or	\flags, \flags, \tmp
+	xsr	\flags, ps
+	rsync
+#endif
+#else
+	rsil	\flags, LOCKLEVEL
+#endif
+	.endm
+
 /* ----------------- DEFAULT FIRST LEVEL EXCEPTION HANDLERS ----------------- */
 
 /*
@@ -122,6 +142,7 @@ _user_exception:
 	/* Save SAR and turn off single stepping */
 
 	movi	a2, 0
+	wsr	a2, depc		# terminate user stack trace with 0
 	rsr	a3, sar
 	xsr	a2, icountlevel
 	s32i	a3, a1, PT_SAR
@@ -301,7 +322,18 @@ _kernel_exception:
 	s32i	a14, a1, PT_AREG14
 	s32i	a15, a1, PT_AREG15
 
+	_bnei	a2, 1, 1f
+
+	/* Copy spill slots of a0 and a1 to imitate movsp
+	 * in order to keep exception stack continuous
+	 */
+	l32i	a3, a1, PT_SIZE
+	l32i	a0, a1, PT_SIZE + 4
+	s32e	a3, a1, -16
+	s32e	a0, a1, -12
 1:
+	l32i	a0, a1, PT_AREG0	# restore saved a0
+	wsr	a0, depc
 
 #ifdef KERNEL_STACK_OVERFLOW_CHECK
 
@@ -340,75 +372,88 @@ common_exception:
 
 	/* It is now save to restore the EXC_TABLE_FIXUP variable. */
 
-	rsr	a0, exccause
+	rsr	a2, exccause
 	movi	a3, 0
-	rsr	a2, excsave1
-	s32i	a0, a1, PT_EXCCAUSE
-	s32i	a3, a2, EXC_TABLE_FIXUP
-
-	/* All unrecoverable states are saved on stack, now, and a1 is valid,
-	 * so we can allow exceptions and interrupts (*) again.
-	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
+	rsr	a0, excsave1
+	s32i	a2, a1, PT_EXCCAUSE
+	s32i	a3, a0, EXC_TABLE_FIXUP
+
+	/* All unrecoverable states are saved on stack, now, and a1 is valid.
+	 * Now we can allow exceptions again. In case we've got an interrupt
+	 * PS.INTLEVEL is set to LOCKLEVEL disabling furhter interrupts,
+	 * otherwise it's left unchanged.
 	 *
-	 * (*) We only allow interrupts if they were previously enabled and
-	 *     we're not handling an IRQ
+	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
 	 */
 
 	rsr	a3, ps
-	addi	a0, a0, -EXCCAUSE_LEVEL1_INTERRUPT
-	movi	a2, LOCKLEVEL
+	s32i	a3, a1, PT_PS		# save ps
+
+#if XTENSA_FAKE_NMI
+	/* Correct PS needs to be saved in the PT_PS:
+	 * - in case of exception or level-1 interrupt it's in the PS,
+	 *   and is already saved.
+	 * - in case of medium level interrupt it's in the excsave2.
+	 */
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	beq	a2, a0, .Lmedium_level_irq
+	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, .Lexception
+	beqz	a3, .Llevel1_irq	# level-1 IRQ sets ps.intlevel to 0
+
+.Lmedium_level_irq:
+	rsr	a0, excsave2
+	s32i	a0, a1, PT_PS		# save medium-level interrupt ps
+	bgei	a3, LOCKLEVEL, .Lexception
+
+.Llevel1_irq:
+	movi	a3, LOCKLEVEL
+
+.Lexception:
+	movi	a0, 1 << PS_WOE_BIT
+	or	a3, a3, a0
+#else
+	addi	a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT
+	movi	a0, LOCKLEVEL
 	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
 					# a3 = PS.INTLEVEL
-	moveqz	a3, a2, a0		# a3 = LOCKLEVEL iff interrupt
+	moveqz	a3, a0, a2		# a3 = LOCKLEVEL iff interrupt
 	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
-	rsr	a0, exccause
-	xsr	a3, ps
+	rsr	a2, exccause
+#endif
 
-	s32i	a3, a1, PT_PS		# save ps
+	/* restore return address (or 0 if return to userspace) */
+	rsr	a0, depc
+	wsr	a3, ps
+	rsync				# PS.WOE => rsync => overflow
 
 	/* Save lbeg, lend */
 
-	rsr	a2, lbeg
+	rsr	a4, lbeg
 	rsr	a3, lend
-	s32i	a2, a1, PT_LBEG
+	s32i	a4, a1, PT_LBEG
 	s32i	a3, a1, PT_LEND
 
 	/* Save SCOMPARE1 */
 
 #if XCHAL_HAVE_S32C1I
-	rsr     a2, scompare1
-	s32i    a2, a1, PT_SCOMPARE1
+	rsr     a3, scompare1
+	s32i    a3, a1, PT_SCOMPARE1
 #endif
 
 	/* Save optional registers. */
 
-	save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+	save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
 	
-#ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we've just disabled them.
-	 */
-	movi	a4, trace_hardirqs_off
-	callx4	a4
-1:
-#endif
-
 	/* Go to second-level dispatcher. Set up parameters to pass to the
 	 * exception handler and call the exception handler.
 	 */
 
 	rsr	a4, excsave1
 	mov	a6, a1			# pass stack frame
-	mov	a7, a0			# pass EXCCAUSE
-	addx4	a4, a0, a4
+	mov	a7, a2			# pass EXCCAUSE
+	addx4	a4, a2, a4
 	l32i	a4, a4, EXC_TABLE_DEFAULT		# load handler
 
 	/* Call the second-level handler */
@@ -419,8 +464,17 @@ common_exception:
 	.global common_exception_return
 common_exception_return:
 
+#if XTENSA_FAKE_NMI
+	l32i	a2, a1, PT_EXCCAUSE
+	movi	a3, EXCCAUSE_MAPPED_NMI
+	beq	a2, a3, .LNMIexit
+#endif
 1:
-	rsil	a2, LOCKLEVEL
+	irq_save a2, a3
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_off
+	callx4	a4
+#endif
 
 	/* Jump if we are returning from kernel exceptions. */
 
@@ -445,6 +499,10 @@ common_exception_return:
 
 	/* Call do_signal() */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, do_notify_resume	# int do_notify_resume(struct pt_regs*)
 	mov	a6, a1
@@ -453,6 +511,10 @@ common_exception_return:
 
 3:	/* Reschedule */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, schedule	# void schedule (void)
 	callx4	a4
@@ -471,6 +533,12 @@ common_exception_return:
 	j	1b
 #endif
 
+#if XTENSA_FAKE_NMI
+.LNMIexit:
+	l32i	a3, a1, PT_PS
+	_bbci.l	a3, PS_UM_BIT, 4f
+#endif
+
 5:
 #ifdef CONFIG_DEBUG_TLB_SANITY
 	l32i	a4, a1, PT_DEPC
@@ -481,16 +549,8 @@ common_exception_return:
 6:
 4:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we'll reenable them on return.
-	 */
+	extui	a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	a4, LOCKLEVEL, 1f
 	movi	a4, trace_hardirqs_on
 	callx4	a4
 1:
@@ -568,12 +628,13 @@ user_exception_exit:
 	 *	 (if we have restored WSBITS-1 frames).
 	 */
 
+2:
 #if XCHAL_HAVE_THREADPTR
 	l32i	a3, a1, PT_THREADPTR
 	wur	a3, threadptr
 #endif
 
-2:	j	common_exception_exit
+	j	common_exception_exit
 
 	/* This is the kernel exception exit.
 	 * We avoided to do a MOVSP when we entered the exception, but we
@@ -1561,6 +1622,13 @@ ENTRY(fast_second_level_miss)
 	rfde
 
 9:	l32i	a0, a1, TASK_ACTIVE_MM	# unlikely case mm == 0
+	bnez	a0, 8b
+
+	/* Even more unlikely case active_mm == 0.
+	 * We can get here with NMI in the middle of context_switch that
+	 * touches vmalloc area.
+	 */
+	movi	a0, init_mm
 	j	8b
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
@@ -1820,7 +1888,7 @@ ENDPROC(system_call)
 	mov	a12, a0
 	.endr
 #endif
-	_entry	a1, 48
+	_entry	a1, 16
 #if XCHAL_NUM_AREGS % 12 == 0
 	mov	a8, a8
 #elif XCHAL_NUM_AREGS % 12 == 4
@@ -1844,7 +1912,7 @@ ENDPROC(system_call)
 
 ENTRY(_switch_to)
 
-	entry	a1, 16
+	entry	a1, 48
 
 	mov	a11, a3			# and 'next' (a3)
 
@@ -1864,10 +1932,8 @@ ENTRY(_switch_to)
 
 	/* Disable ints while we manipulate the stack pointer. */
 
-	rsil	a14, LOCKLEVEL
-	rsr	a3, excsave1
+	irq_save a14, a3
 	rsync
-	s32i	a3, a3, EXC_TABLE_FIXUP	/* enter critical section */
 
 	/* Switch CPENABLE */
 
@@ -1888,9 +1954,7 @@ ENTRY(_switch_to)
 	 */
 
 	rsr	a3, excsave1		# exc_table
-	movi	a6, 0
 	addi	a7, a5, PT_REGS_OFFSET
-	s32i	a6, a3, EXC_TABLE_FIXUP
 	s32i	a7, a3, EXC_TABLE_KSTK
 
 	/* restore context of the task 'next' */
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 3eee94f..6df31ca 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -28,7 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/platform.h>
 
-atomic_t irq_err_count;
+DECLARE_PER_CPU(unsigned long, nmi_count);
 
 asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
@@ -57,11 +57,16 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
+	unsigned cpu __maybe_unused;
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
-	seq_printf(p, "%*s: ", prec, "ERR");
-	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
+#if XTENSA_FAKE_NMI
+	seq_printf(p, "%*s:", prec, "NMI");
+	for_each_online_cpu(cpu)
+		seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
+	seq_puts(p, "   Non-maskable interrupts\n");
+#endif
 	return 0;
 }
 
@@ -106,6 +111,12 @@ int xtensa_irq_map(struct irq_domain *d, unsigned int irq,
 		irq_set_chip_and_handler_name(irq, irq_chip,
 				handle_percpu_irq, "timer");
 		irq_clear_status_flags(irq, IRQ_LEVEL);
+#ifdef XCHAL_INTTYPE_MASK_PROFILING
+	} else if (mask & XCHAL_INTTYPE_MASK_PROFILING) {
+		irq_set_chip_and_handler_name(irq, irq_chip,
+				handle_percpu_irq, "profiling");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+#endif
 	} else {/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
 		/* XCHAL_INTTYPE_MASK_NMI */
 		irq_set_chip_and_handler_name(irq, irq_chip,
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index e8b76b8..fb75ebf 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/pci-dma.c
- *
  * DMA coherent memory allocation.
  *
  * This program is free software; you can redistribute  it and/or modify it
@@ -9,6 +7,7 @@
  * option) any later version.
  *
  * Copyright (C) 2002 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Based on version for i386.
  *
@@ -25,13 +24,107 @@
 #include <asm/io.h>
 #include <asm/cacheflush.h>
 
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		    enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		__flush_invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		__invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_TO_DEVICE:
+		__flush_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+	}
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static void xtensa_sync_single_for_cpu(struct device *dev,
+				       dma_addr_t dma_handle, size_t size,
+				       enum dma_data_direction dir)
+{
+	void *vaddr;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+	case DMA_FROM_DEVICE:
+		vaddr = bus_to_virt(dma_handle);
+		__invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void xtensa_sync_single_for_device(struct device *dev,
+					  dma_addr_t dma_handle, size_t size,
+					  enum dma_data_direction dir)
+{
+	void *vaddr;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+	case DMA_TO_DEVICE:
+		vaddr = bus_to_virt(dma_handle);
+		__flush_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void xtensa_sync_sg_for_cpu(struct device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_sync_single_for_cpu(dev, sg_dma_address(s),
+					   sg_dma_len(s), dir);
+	}
+}
+
+static void xtensa_sync_sg_for_device(struct device *dev,
+				      struct scatterlist *sg, int nents,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_sync_single_for_device(dev, sg_dma_address(s),
+					      sg_dma_len(s), dir);
+	}
+}
+
 /*
  * Note: We assume that the full memory space is always mapped to 'kseg'
  *	 Otherwise we have to use page attributes (not implemented).
  */
 
-void *
-dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
+static void *xtensa_dma_alloc(struct device *dev, size_t size,
+			      dma_addr_t *handle, gfp_t flag,
+			      struct dma_attrs *attrs)
 {
 	unsigned long ret;
 	unsigned long uncached = 0;
@@ -52,20 +145,15 @@ dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
 	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
 	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
+	uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
+	*handle = virt_to_bus((void *)ret);
+	__invalidate_dcache_range(ret, size);
 
-	if (ret != 0) {
-		memset((void*) ret, 0, size);
-		uncached = ret+XCHAL_KSEG_BYPASS_VADDR-XCHAL_KSEG_CACHED_VADDR;
-		*handle = virt_to_bus((void*)ret);
-		__flush_invalidate_dcache_range(ret, size);
-	}
-
-	return (void*)uncached;
+	return (void *)uncached;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *hwdev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr,
+			    dma_addr_t dma_handle, struct dma_attrs *attrs)
 {
 	unsigned long addr = (unsigned long)vaddr +
 		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
@@ -75,24 +163,79 @@ void dma_free_coherent(struct device *hwdev, size_t size,
 
 	free_pages(addr, get_order(size));
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
+static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t size,
+				  enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	dma_addr_t dma_handle = page_to_phys(page) + offset;
+
+	BUG_ON(PageHighMem(page));
+	xtensa_sync_single_for_device(dev, dma_handle, size, dir);
+	return dma_handle;
+}
 
-void consistent_sync(void *vaddr, size_t size, int direction)
+static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
+			      size_t size, enum dma_data_direction dir,
+			      struct dma_attrs *attrs)
 {
-	switch (direction) {
-	case PCI_DMA_NONE:
-		BUG();
-	case PCI_DMA_FROMDEVICE:        /* invalidate only */
-		__invalidate_dcache_range((unsigned long)vaddr,
-				          (unsigned long)size);
-		break;
+	xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
+}
 
-	case PCI_DMA_TODEVICE:          /* writeback only */
-	case PCI_DMA_BIDIRECTIONAL:     /* writeback and invalidate */
-		__flush_invalidate_dcache_range((unsigned long)vaddr,
-				    		(unsigned long)size);
-		break;
+static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
+			 int nents, enum dma_data_direction dir,
+			 struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = xtensa_map_page(dev, sg_page(s), s->offset,
+						 s->length, dir, attrs);
+	}
+	return nents;
+}
+
+static void xtensa_unmap_sg(struct device *dev,
+			    struct scatterlist *sg, int nents,
+			    enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_unmap_page(dev, sg_dma_address(s),
+				  sg_dma_len(s), dir, attrs);
 	}
 }
-EXPORT_SYMBOL(consistent_sync);
+
+int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+struct dma_map_ops xtensa_dma_map_ops = {
+	.alloc = xtensa_dma_alloc,
+	.free = xtensa_dma_free,
+	.map_page = xtensa_map_page,
+	.unmap_page = xtensa_unmap_page,
+	.map_sg = xtensa_map_sg,
+	.unmap_sg = xtensa_unmap_sg,
+	.sync_single_for_cpu = xtensa_sync_single_for_cpu,
+	.sync_single_for_device = xtensa_sync_single_for_device,
+	.sync_sg_for_cpu = xtensa_sync_sg_for_cpu,
+	.sync_sg_for_device = xtensa_sync_sg_for_device,
+	.mapping_error = xtensa_dma_mapping_error,
+};
+EXPORT_SYMBOL(xtensa_dma_map_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init xtensa_dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(xtensa_dma_init);
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index b848cc3..d27b4dc 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -210,10 +210,6 @@ subsys_initcall(pcibios_init);
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
-	if (bus->parent) {
-		/* This is a subordinate bridge */
-		pci_read_bridge_bases(bus);
-	}
 }
 
 void pcibios_set_master(struct pci_dev *dev)
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
new file mode 100644
index 0000000..54f0118
--- /dev/null
+++ b/arch/xtensa/kernel/perf_event.c
@@ -0,0 +1,454 @@
+/*
+ * Xtensa Performance Monitor Module driver
+ * See Tensilica Debug User's Guide for PMU registers documentation.
+ *
+ * Copyright (C) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+/* Global control/status for all perf counters */
+#define XTENSA_PMU_PMG			0x1000
+/* Perf counter values */
+#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
+/* Perf counter control registers */
+#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
+/* Perf counter status registers */
+#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
+
+#define XTENSA_PMU_PMG_PMEN		0x1
+
+#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
+#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
+
+#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
+#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
+#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
+#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
+#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
+#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
+#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
+
+#define XTENSA_PMU_MASK(select, mask) \
+	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
+	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
+	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
+	 XTENSA_PMU_PMCTRL_INTEN)
+
+#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
+#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
+
+struct xtensa_pmu_events {
+	/* Array of events currently on this core */
+	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
+	/* Bitmap of used hardware counters */
+	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
+};
+static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
+
+static const u32 xtensa_hw_ctl[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
+	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
+	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
+	/* Taken and non-taken branches + taken loop ends */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
+	/* Instruction-related + other global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
+	/* Data-related global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
+};
+
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+
+static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
+		},
+	},
+};
+
+static int xtensa_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type = (config >>  0) & 0xff;
+	cache_op = (config >>  8) & 0xff;
+	cache_result = (config >> 16) & 0xff;
+
+	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
+	    cache_op >= C(OP_MAX) ||
+	    cache_result >= C(RESULT_MAX))
+		return -EINVAL;
+
+	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
+
+	if (ret == 0)
+		return -EINVAL;
+
+	return ret;
+}
+
+static inline uint32_t xtensa_pmu_read_counter(int idx)
+{
+	return get_er(XTENSA_PMU_PM(idx));
+}
+
+static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
+{
+	set_er(v, XTENSA_PMU_PM(idx));
+}
+
+static void xtensa_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	uint64_t prev_raw_count, new_raw_count;
+	int64_t delta;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static bool xtensa_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	bool rc = false;
+	s64 left;
+
+	if (!is_sampling_event(event)) {
+		left = XTENSA_PMU_COUNTER_MAX;
+	} else {
+		s64 period = hwc->sample_period;
+
+		left = local64_read(&hwc->period_left);
+		if (left <= -period) {
+			left = period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		} else if (left <= 0) {
+			left += period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		}
+		if (left > XTENSA_PMU_COUNTER_MAX)
+			left = XTENSA_PMU_COUNTER_MAX;
+	}
+
+	local64_set(&hwc->prev_count, -left);
+	xtensa_pmu_write_counter(idx, -left);
+	perf_event_update_userpage(event);
+
+	return rc;
+}
+
+static void xtensa_pmu_enable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static void xtensa_pmu_disable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static int xtensa_pmu_event_init(struct perf_event *event)
+{
+	int ret;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
+		    xtensa_hw_ctl[event->attr.config] == 0)
+			return -EINVAL;
+		event->hw.config = xtensa_hw_ctl[event->attr.config];
+		return 0;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = xtensa_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		event->hw.config = ret;
+		return 0;
+
+	case PERF_TYPE_RAW:
+		/* Not 'previous counter' select */
+		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
+		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
+			return -EINVAL;
+		event->hw.config = (event->attr.config &
+				    (XTENSA_PMU_PMCTRL_KRNLCNT |
+				     XTENSA_PMU_PMCTRL_TRACELEVEL |
+				     XTENSA_PMU_PMCTRL_SELECT |
+				     XTENSA_PMU_PMCTRL_MASK)) |
+			XTENSA_PMU_PMCTRL_INTEN;
+		return 0;
+
+	default:
+		return -ENOENT;
+	}
+}
+
+/*
+ * Starts/Stops a counter present on the PMU. The PMI handler
+ * should stop the counter when perf_event_overflow() returns
+ * !0. ->start() will be used to continue.
+ */
+static void xtensa_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		xtensa_perf_event_set_period(event, hwc, idx);
+	}
+
+	hwc->state = 0;
+
+	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
+}
+
+static void xtensa_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		set_er(0, XTENSA_PMU_PMCTRL(idx));
+		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
+		       XTENSA_PMU_PMSTAT(idx));
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		xtensa_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+/*
+ * Adds/Removes a counter to/from the PMU, can be done inside
+ * a transaction, see the ->*_txn() methods.
+ */
+static int xtensa_pmu_add(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (__test_and_set_bit(idx, ev->used_mask)) {
+		idx = find_first_zero_bit(ev->used_mask,
+					  XCHAL_NUM_PERF_COUNTERS);
+		if (idx == XCHAL_NUM_PERF_COUNTERS)
+			return -EAGAIN;
+
+		__set_bit(idx, ev->used_mask);
+		hwc->idx = idx;
+	}
+	ev->event[idx] = event;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		xtensa_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static void xtensa_pmu_del(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+
+	xtensa_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, ev->used_mask);
+	perf_event_update_userpage(event);
+}
+
+static void xtensa_pmu_read(struct perf_event *event)
+{
+	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int callchain_trace(struct stackframe *frame, void *data)
+{
+	struct perf_callchain_entry *entry = data;
+
+	perf_callchain_store(entry, frame->pc);
+	return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
+{
+	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+				callchain_trace, NULL, entry);
+}
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+			 struct pt_regs *regs)
+{
+	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+			      callchain_trace, entry);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned i;
+
+	local_irq_save(flags);
+	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
+		get_er(XTENSA_PMU_PMG));
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
+		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
+			i, get_er(XTENSA_PMU_PM(i)),
+			i, get_er(XTENSA_PMU_PMCTRL(i)),
+			i, get_er(XTENSA_PMU_PMSTAT(i)));
+	local_irq_restore(flags);
+}
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t rc = IRQ_NONE;
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	unsigned i;
+
+	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
+	     i < XCHAL_NUM_PERF_COUNTERS;
+	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
+		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
+		struct perf_event *event = ev->event[i];
+		struct hw_perf_event *hwc = &event->hw;
+		u64 last_period;
+
+		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
+			continue;
+
+		set_er(v, XTENSA_PMU_PMSTAT(i));
+		xtensa_perf_event_update(event, hwc, i);
+		last_period = hwc->last_period;
+		if (xtensa_perf_event_set_period(event, hwc, i)) {
+			struct perf_sample_data data;
+			struct pt_regs *regs = get_irq_regs();
+
+			perf_sample_data_init(&data, 0, last_period);
+			if (perf_event_overflow(event, &data, regs))
+				xtensa_pmu_stop(event, 0);
+		}
+
+		rc = IRQ_HANDLED;
+	}
+	return rc;
+}
+
+static struct pmu xtensa_pmu = {
+	.pmu_enable = xtensa_pmu_enable,
+	.pmu_disable = xtensa_pmu_disable,
+	.event_init = xtensa_pmu_event_init,
+	.add = xtensa_pmu_add,
+	.del = xtensa_pmu_del,
+	.start = xtensa_pmu_start,
+	.stop = xtensa_pmu_stop,
+	.read = xtensa_pmu_read,
+};
+
+static void xtensa_pmu_setup(void)
+{
+	unsigned i;
+
+	set_er(0, XTENSA_PMU_PMG);
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
+		set_er(0, XTENSA_PMU_PMCTRL(i));
+		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
+	}
+}
+
+static int xtensa_pmu_notifier(struct notifier_block *self,
+			       unsigned long action, void *data)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		xtensa_pmu_setup();
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init xtensa_pmu_init(void)
+{
+	int ret;
+	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
+
+	perf_cpu_notifier(xtensa_pmu_notifier);
+#if XTENSA_FAKE_NMI
+	enable_irq(irq);
+#else
+	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
+			  "pmu", NULL);
+	if (ret < 0)
+		return ret;
+#endif
+
+	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
+	if (ret)
+		free_irq(irq, NULL);
+
+	return ret;
+}
+early_initcall(xtensa_pmu_init);
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7d2c317..7538d80 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -1,11 +1,12 @@
 /*
- * arch/xtensa/kernel/stacktrace.c
+ * Kernel and userspace stack tracing.
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2013 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -13,6 +14,170 @@
 
 #include <asm/stacktrace.h>
 #include <asm/traps.h>
+#include <asm/uaccess.h>
+
+#if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS)
+
+/* Address of common_exception_return, used to check the
+ * transition from kernel to user space.
+ */
+extern int common_exception_return;
+
+/* A struct that maps to the part of the frame containing the a0 and
+ * a1 registers.
+ */
+struct frame_start {
+	unsigned long a0;
+	unsigned long a1;
+};
+
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data)
+{
+	unsigned long windowstart = regs->windowstart;
+	unsigned long windowbase = regs->windowbase;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+	unsigned long pc = regs->pc;
+	struct stackframe frame;
+	int index;
+
+	if (!depth--)
+		return;
+
+	frame.pc = pc;
+	frame.sp = a1;
+
+	if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+		return;
+
+	/* Two steps:
+	 *
+	 * 1. Look through the register window for the
+	 * previous PCs in the call trace.
+	 *
+	 * 2. Look on the stack.
+	 */
+
+	/* Step 1.  */
+	/* Rotate WINDOWSTART to move the bit corresponding to
+	 * the current window to the bit #0.
+	 */
+	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
+
+	/* Look for bits that are set, they correspond to
+	 * valid windows.
+	 */
+	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
+		if (windowstart & (1 << index)) {
+			/* Get the PC from a0 and a1. */
+			pc = MAKE_PC_FROM_RA(a0, pc);
+			/* Read a0 and a1 from the
+			 * corresponding position in AREGs.
+			 */
+			a0 = regs->areg[index * 4];
+			a1 = regs->areg[index * 4 + 1];
+
+			frame.pc = pc;
+			frame.sp = a1;
+
+			if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+				return;
+		}
+
+	/* Step 2. */
+	/* We are done with the register window, we need to
+	 * look through the stack.
+	 */
+	if (!depth)
+		return;
+
+	/* Start from the a1 register. */
+	/* a1 = regs->areg[1]; */
+	while (a0 != 0 && depth--) {
+		struct frame_start frame_start;
+		/* Get the location for a1, a0 for the
+		 * previous frame from the current a1.
+		 */
+		unsigned long *psp = (unsigned long *)a1;
+
+		psp -= 4;
+
+		/* Check if the region is OK to access. */
+		if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
+			return;
+		/* Copy a1, a0 from user space stack frame. */
+		if (__copy_from_user_inatomic(&frame_start, psp,
+					      sizeof(frame_start)))
+			return;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = frame_start.a0;
+		a1 = frame_start.a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+			return;
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_user);
+
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data)
+{
+	unsigned long pc = regs->depc > VALID_DOUBLE_EXCEPTION_ADDRESS ?
+		regs->depc : regs->pc;
+	unsigned long sp_start, sp_end;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+
+	sp_start = a1 & ~(THREAD_SIZE - 1);
+	sp_end = sp_start + THREAD_SIZE;
+
+	/* Spill the register window to the stack first. */
+	spill_registers();
+
+	/* Read the stack frames one by one and create the PC
+	 * from the a0 and a1 registers saved there.
+	 */
+	while (a1 > sp_start && a1 < sp_end && depth--) {
+		struct stackframe frame;
+		unsigned long *psp = (unsigned long *)a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (kernel_text_address(pc) && kfn(&frame, data))
+			return;
+
+		if (pc == (unsigned long)&common_exception_return) {
+			regs = (struct pt_regs *)a1;
+			if (user_mode(regs)) {
+				if (ufn == NULL)
+					return;
+				xtensa_backtrace_user(regs, depth, ufn, data);
+				return;
+			}
+			a0 = regs->areg[0];
+			a1 = regs->areg[1];
+			continue;
+		}
+
+		sp_start = a1;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = *(psp - 4);
+		a1 = *(psp - 3);
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_kernel);
+
+#endif
 
 void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 9d2f45f..42d441f 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -62,6 +62,7 @@ extern void fast_coprocessor(void);
 
 extern void do_illegal_instruction (struct pt_regs*);
 extern void do_interrupt (struct pt_regs*);
+extern void do_nmi(struct pt_regs *);
 extern void do_unaligned_user (struct pt_regs*);
 extern void do_multihit (struct pt_regs*, unsigned long);
 extern void do_page_fault (struct pt_regs*, unsigned long);
@@ -146,6 +147,9 @@ COPROCESSOR(6),
 #if XTENSA_HAVE_COPROCESSOR(7)
 COPROCESSOR(7),
 #endif
+#if XTENSA_FAKE_NMI
+{ EXCCAUSE_MAPPED_NMI,			0,		do_nmi },
+#endif
 { EXCCAUSE_MAPPED_DEBUG,		0,		do_debug },
 { -1, -1, 0 }
 
@@ -199,6 +203,28 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
 
 extern void do_IRQ(int, struct pt_regs *);
 
+#if XTENSA_FAKE_NMI
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
+
+DEFINE_PER_CPU(unsigned long, nmi_count);
+
+void do_nmi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
+		trace_hardirqs_off();
+
+	old_regs = set_irq_regs(regs);
+	nmi_enter();
+	++*this_cpu_ptr(&nmi_count);
+	xtensa_pmu_irq_handler(0, NULL);
+	nmi_exit();
+	set_irq_regs(old_regs);
+}
+#endif
+
 void do_interrupt(struct pt_regs *regs)
 {
 	static const unsigned int_level_mask[] = {
@@ -211,8 +237,11 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL6_MASK,
 		XCHAL_INTLEVEL7_MASK,
 	};
-	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct pt_regs *old_regs;
+
+	trace_hardirqs_off();
 
+	old_regs = set_irq_regs(regs);
 	irq_enter();
 
 	for (;;) {
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 1b397a9..abcdb52 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -627,7 +627,11 @@ ENTRY(_Level\level\()InterruptVector)
 	wsr	a0, excsave2
 	rsr	a0, epc\level
 	wsr	a0, epc1
+	.if	\level <= LOCKLEVEL
 	movi	a0, EXCCAUSE_LEVEL1_INTERRUPT
+	.else
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	.endif
 	wsr	a0, exccause
 	rsr	a0, eps\level
 					# branch to user or kernel vector
@@ -682,11 +686,13 @@ ENDPROC(_WindowOverflow4)
 	.align 4
 _SimulateUserKernelVectorException:
 	addi	a0, a0, (1 << PS_EXCM_BIT)
+#if !XTENSA_FAKE_NMI
 	wsr	a0, ps
+#endif
 	bbsi.l	a0, PS_UM_BIT, 1f	# branch if user mode
-	rsr	a0, excsave2		# restore a0
+	xsr	a0, excsave2		# restore a0
 	j	_KernelExceptionVector	# simulate kernel vector exception
-1:	rsr	a0, excsave2		# restore a0
+1:	xsr	a0, excsave2		# restore a0
 	j	_UserExceptionVector	# simulate user vector exception
 #endif
 
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 83a44a3..c9784c1 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/perf_event.h>
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
@@ -142,6 +143,12 @@ good_area:
 	}
 
 	up_read(&mm->mmap_sem);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	if (flags & VM_FAULT_MAJOR)
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+	else if (flags & VM_FAULT_MINOR)
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+
 	return;
 
 	/* Something tried to access memory that isn't in our memory map..
diff --git a/arch/xtensa/oprofile/backtrace.c b/arch/xtensa/oprofile/backtrace.c
index 5f03a59..8f95203 100644
--- a/arch/xtensa/oprofile/backtrace.c
+++ b/arch/xtensa/oprofile/backtrace.c
@@ -2,168 +2,26 @@
  * @file backtrace.c
  *
  * @remark Copyright 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  * @remark Read the file COPYING
  *
  */
 
 #include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
+#include <asm/stacktrace.h>
 
-/* Address of common_exception_return, used to check the
- * transition from kernel to user space.
- */
-extern int common_exception_return;
-
-/* A struct that maps to the part of the frame containing the a0 and
- * a1 registers.
- */
-struct frame_start {
-	unsigned long a0;
-	unsigned long a1;
-};
-
-static void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth)
-{
-	unsigned long windowstart = regs->windowstart;
-	unsigned long windowbase = regs->windowbase;
-	unsigned long a0 = regs->areg[0];
-	unsigned long a1 = regs->areg[1];
-	unsigned long pc = MAKE_PC_FROM_RA(a0, regs->pc);
-	int index;
-
-	/* First add the current PC to the trace. */
-	if (pc != 0 && pc <= TASK_SIZE)
-		oprofile_add_trace(pc);
-	else
-		return;
-
-	/* Two steps:
-	 *
-	 * 1. Look through the register window for the
-	 * previous PCs in the call trace.
-	 *
-	 * 2. Look on the stack.
-	 */
-
-	/* Step 1.  */
-	/* Rotate WINDOWSTART to move the bit corresponding to
-	 * the current window to the bit #0.
-	 */
-	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
-
-	/* Look for bits that are set, they correspond to
-	 * valid windows.
-	 */
-	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
-		if (windowstart & (1 << index)) {
-			/* Read a0 and a1 from the
-			 * corresponding position in AREGs.
-			 */
-			a0 = regs->areg[index * 4];
-			a1 = regs->areg[index * 4 + 1];
-			/* Get the PC from a0 and a1. */
-			pc = MAKE_PC_FROM_RA(a0, pc);
-
-			/* Add the PC to the trace. */
-			if (pc != 0 && pc <= TASK_SIZE)
-				oprofile_add_trace(pc);
-			else
-				return;
-		}
-
-	/* Step 2. */
-	/* We are done with the register window, we need to
-	 * look through the stack.
-	 */
-	if (depth > 0) {
-		/* Start from the a1 register. */
-		/* a1 = regs->areg[1]; */
-		while (a0 != 0 && depth--) {
-
-			struct frame_start frame_start;
-			/* Get the location for a1, a0 for the
-			 * previous frame from the current a1.
-			 */
-			unsigned long *psp = (unsigned long *)a1;
-			psp -= 4;
-
-			/* Check if the region is OK to access. */
-			if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
-				return;
-			/* Copy a1, a0 from user space stack frame. */
-			if (__copy_from_user_inatomic(&frame_start, psp,
-						sizeof(frame_start)))
-				return;
-
-			a0 = frame_start.a0;
-			a1 = frame_start.a1;
-			pc = MAKE_PC_FROM_RA(a0, pc);
-
-			if (pc != 0 && pc <= TASK_SIZE)
-				oprofile_add_trace(pc);
-			else
-				return;
-		}
-	}
-}
-
-static void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth)
+static int xtensa_backtrace_cb(struct stackframe *frame, void *data)
 {
-	unsigned long pc = regs->pc;
-	unsigned long *psp;
-	unsigned long sp_start, sp_end;
-	unsigned long a0 = regs->areg[0];
-	unsigned long a1 = regs->areg[1];
-
-	sp_start = a1 & ~(THREAD_SIZE-1);
-	sp_end = sp_start + THREAD_SIZE;
-
-	/* Spill the register window to the stack first. */
-	spill_registers();
-
-	/* Read the stack frames one by one and create the PC
-	 * from the a0 and a1 registers saved there.
-	 */
-	while (a1 > sp_start && a1 < sp_end && depth--) {
-		pc = MAKE_PC_FROM_RA(a0, pc);
-
-		/* Add the PC to the trace. */
-		oprofile_add_trace(pc);
-		if (pc == (unsigned long) &common_exception_return) {
-			regs = (struct pt_regs *)a1;
-			if (user_mode(regs)) {
-				pc = regs->pc;
-				if (pc != 0 && pc <= TASK_SIZE)
-					oprofile_add_trace(pc);
-				else
-					return;
-				return xtensa_backtrace_user(regs, depth);
-			}
-			a0 = regs->areg[0];
-			a1 = regs->areg[1];
-			continue;
-		}
-
-		psp = (unsigned long *)a1;
-
-		a0 = *(psp - 4);
-		a1 = *(psp - 3);
-
-		if (a1 <= (unsigned long)psp)
-			return;
-
-	}
-	return;
+	oprofile_add_trace(frame->pc);
+	return 0;
 }
 
 void xtensa_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	if (user_mode(regs))
-		xtensa_backtrace_user(regs, depth);
+		xtensa_backtrace_user(regs, depth, xtensa_backtrace_cb, NULL);
 	else
-		xtensa_backtrace_kernel(regs, depth);
+		xtensa_backtrace_kernel(regs, depth, xtensa_backtrace_cb,
+					xtensa_backtrace_cb, NULL);
 }
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index 8ab021b..976a385 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -105,13 +105,17 @@ static char *split_if_spec(char *str, ...)
 
 	va_start(ap, str);
 	while ((arg = va_arg(ap, char**)) != NULL) {
-		if (*str == '\0')
+		if (*str == '\0') {
+			va_end(ap);
 			return NULL;
+		}
 		end = strchr(str, ',');
 		if (end != str)
 			*arg = str;
-		if (end == NULL)
+		if (end == NULL) {
+			va_end(ap);
 			return NULL;
+		}
 		*end++ = '\0';
 		str = end;
 	}