summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig21
-rw-r--r--arch/arm64/Kconfig.debug25
-rw-r--r--arch/arm64/Makefile6
-rw-r--r--arch/arm64/boot/dts/lg/lg1312.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi2
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/kgdb.h45
-rw-r--r--arch/arm64/include/asm/memory.h3
-rw-r--r--arch/arm64/include/asm/page.h12
-rw-r--r--arch/arm64/include/asm/pgalloc.h2
-rw-r--r--arch/arm64/include/asm/smp.h12
-rw-r--r--arch/arm64/include/asm/spinlock.h42
-rw-r--r--arch/arm64/include/asm/uaccess.h13
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h8
-rw-r--r--arch/arm64/kernel/cpuinfo.c8
-rw-r--r--arch/arm64/kernel/hibernate.c6
-rw-r--r--arch/arm64/kernel/kgdb.c14
-rw-r--r--arch/arm64/kernel/smp.c18
-rw-r--r--arch/arm64/kernel/traps.c31
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c36
-rw-r--r--arch/arm64/kvm/sys_regs.c13
-rw-r--r--arch/arm64/mm/context.c9
-rw-r--r--arch/arm64/mm/dump.c8
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/arm64/mm/flush.c4
-rw-r--r--arch/arm64/mm/hugetlbpage.c14
27 files changed, 264 insertions, 100 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 76747d9..5a0a691 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -113,6 +113,18 @@ config ARCH_PHYS_ADDR_T_64BIT
config MMU
def_bool y
+config ARM64_PAGE_SHIFT
+ int
+ default 16 if ARM64_64K_PAGES
+ default 14 if ARM64_16K_PAGES
+ default 12
+
+config ARM64_CONT_SHIFT
+ int
+ default 5 if ARM64_64K_PAGES
+ default 7 if ARM64_16K_PAGES
+ default 4
+
config ARCH_MMAP_RND_BITS_MIN
default 14 if ARM64_64K_PAGES
default 16 if ARM64_16K_PAGES
@@ -426,6 +438,15 @@ config CAVIUM_ERRATUM_22375
If unsure, say Y.
+config CAVIUM_ERRATUM_23144
+ bool "Cavium erratum 23144: ITS SYNC hang on dual socket system"
+ depends on NUMA
+ default y
+ help
+ ITS SYNC command hang for cross node io and collections/cpu mapping.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_23154
bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
default y
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 710fde4..0cc758c 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -12,7 +12,8 @@ config ARM64_PTDUMP
who are working in architecture specific areas of the kernel.
It is probably not a good idea to enable this feature in a production
kernel.
- If in doubt, say "N"
+
+ If in doubt, say N.
config PID_IN_CONTEXTIDR
bool "Write the current PID to the CONTEXTIDR register"
@@ -38,15 +39,15 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
value.
config DEBUG_SET_MODULE_RONX
- bool "Set loadable kernel module data as NX and text as RO"
- depends on MODULES
- help
- This option helps catch unintended modifications to loadable
- kernel module's text and read-only data. It also prevents execution
- of module data. Such protection may interfere with run-time code
- patching and dynamic kernel tracing - and they might also protect
- against certain classes of kernel exploits.
- If in doubt, say "N".
+ bool "Set loadable kernel module data as NX and text as RO"
+ depends on MODULES
+ default y
+ help
+ Is this is set, kernel module text and rodata will be made read-only.
+ This is to help catch accidental or malicious attempts to change the
+ kernel's executable code.
+
+ If in doubt, say Y.
config DEBUG_RODATA
bool "Make kernel text and rodata read-only"
@@ -56,7 +57,7 @@ config DEBUG_RODATA
is to help catch accidental or malicious attempts to change the
kernel's executable code.
- If in doubt, say Y
+ If in doubt, say Y.
config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA
@@ -69,7 +70,7 @@ config DEBUG_ALIGN_RODATA
alignment and potentially wasted space. Turn on this option if
performance is more important than memory pressure.
- If in doubt, say N
+ If in doubt, say N.
source "drivers/hwtracing/coresight/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 354d754..648a32c 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -60,7 +60,9 @@ head-y := arch/arm64/kernel/head.o
# The byte offset of the kernel image in RAM from the start of RAM.
ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y)
-TEXT_OFFSET := $(shell awk 'BEGIN {srand(); printf "0x%03x000\n", int(512 * rand())}')
+TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \
+ int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \
+ rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}")
else
TEXT_OFFSET := 0x00080000
endif
@@ -93,7 +95,7 @@ boot := arch/arm64/boot
Image: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-Image.%: vmlinux
+Image.%: Image
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
zinstall install:
diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi
index 3a4e9a2..fbafa24 100644
--- a/arch/arm64/boot/dts/lg/lg1312.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1312.dtsi
@@ -125,7 +125,7 @@
#size-cells = <1>;
#interrupts-cells = <3>;
- compatible = "arm,amba-bus";
+ compatible = "simple-bus";
interrupt-parent = <&gic>;
ranges;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 46f325a..d7f8e06 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -163,7 +163,7 @@
};
amba {
- compatible = "arm,amba-bus";
+ compatible = "simple-bus";
#address-cells = <2>;
#size-cells = <2>;
ranges;
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 7a09c48..579b6e6 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -160,14 +160,14 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12))
#endif
-#ifdef CONFIG_COMPAT
-
#ifdef __AARCH64EB__
#define COMPAT_ELF_PLATFORM ("v8b")
#else
#define COMPAT_ELF_PLATFORM ("v8l")
#endif
+#ifdef CONFIG_COMPAT
+
#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3)
/* AArch32 registers. */
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
index f69f69c..da84645 100644
--- a/arch/arm64/include/asm/kgdb.h
+++ b/arch/arm64/include/asm/kgdb.h
@@ -38,25 +38,54 @@ extern int kgdb_fault_expected;
#endif /* !__ASSEMBLY__ */
/*
- * gdb is expecting the following registers layout.
+ * gdb remote procotol (well most versions of it) expects the following
+ * register layout.
*
* General purpose regs:
* r0-r30: 64 bit
* sp,pc : 64 bit
- * pstate : 64 bit
- * Total: 34
+ * pstate : 32 bit
+ * Total: 33 + 1
* FPU regs:
* f0-f31: 128 bit
- * Total: 32
- * Extra regs
* fpsr & fpcr: 32 bit
- * Total: 2
+ * Total: 32 + 2
*
+ * To expand a little on the "most versions of it"... when the gdb remote
+ * protocol for AArch64 was developed it depended on a statement in the
+ * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register".
+ * and, as a result, allocated only 32-bits for the PSTATE in the remote
+ * protocol. In fact this statement is still present in ARM DDI 0487A.i.
+ *
+ * Unfortunately "is a 32-bit register" has a very special meaning for
+ * system registers. It means that "the upper bits, bits[63:32], are
+ * RES0.". RES0 is heavily used in the ARM architecture documents as a
+ * way to leave space for future architecture changes. So to translate a
+ * little for people who don't spend their spare time reading ARM architecture
+ * manuals, what "is a 32-bit register" actually means in this context is
+ * "is a 64-bit register but one with no meaning allocated to any of the
+ * upper 32-bits... *yet*".
+ *
+ * Perhaps then we should not be surprised that this has led to some
+ * confusion. Specifically a patch, influenced by the above translation,
+ * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch
+ * was reverted in gdb-7.8.1 and all later releases, when this was
+ * discovered to be an undocumented protocol change.
+ *
+ * So... it is *not* wrong for us to only allocate 32-bits to PSTATE
+ * here even though the kernel itself allocates 64-bits for the same
+ * state. That is because this bit of code tells the kernel how the gdb
+ * remote protocol (well most versions of it) describes the register state.
+ *
+ * Note that if you are using one of the versions of gdb that supports
+ * the gdb-7.7 version of the protocol you cannot use kgdb directly
+ * without providing a custom register description (gdb can load new
+ * protocol descriptions at runtime).
*/
-#define _GP_REGS 34
+#define _GP_REGS 33
#define _FP_REGS 32
-#define _EXTRA_REGS 2
+#define _EXTRA_REGS 3
/*
* general purpose registers size in bytes.
* pstate is only 4 bytes. subtract 4 bytes
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 72a3025..31b7322 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -55,8 +55,9 @@
#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT))
/*
- * PAGE_OFFSET - the virtual address of the start of the kernel image (top
+ * PAGE_OFFSET - the virtual address of the start of the linear map (top
* (VA_BITS - 1))
+ * KIMAGE_VADDR - the virtual address of the start of the kernel image
* VA_BITS - the maximum number of bits for virtual addresses.
* VA_START - the first kernel virtual address.
* TASK_SIZE - the maximum size of a user space task.
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 17b45f7..8472c6d 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -23,16 +23,8 @@
/* PAGE_SHIFT determines the page size */
/* CONT_SHIFT determines the number of pages which can be tracked together */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define PAGE_SHIFT 16
-#define CONT_SHIFT 5
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define PAGE_SHIFT 14
-#define CONT_SHIFT 7
-#else
-#define PAGE_SHIFT 12
-#define CONT_SHIFT 4
-#endif
+#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index ff98585..d25f4f1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -26,7 +26,7 @@
#define check_pgt_cache() do { } while (0)
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 433e504..0226447 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void)
cpu_park_loop();
}
+/*
+ * If a secondary CPU enters the kernel but fails to come online,
+ * (e.g. due to mismatched features), and cannot exit the kernel,
+ * we increment cpus_stuck_in_kernel and leave the CPU in a
+ * quiesecent loop within the kernel text. The memory containing
+ * this loop must not be re-used for anything else as the 'stuck'
+ * core is executing it.
+ *
+ * This function is used to inhibit features like kexec and hibernate.
+ */
+bool cpus_are_stuck_in_kernel(void);
+
#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index fc9682b..e875a5a 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -30,22 +30,53 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
unsigned int tmp;
arch_spinlock_t lockval;
+ u32 owner;
+
+ /*
+ * Ensure prior spin_lock operations to other locks have completed
+ * on this CPU before we test whether "lock" is locked.
+ */
+ smp_mb();
+ owner = READ_ONCE(lock->owner) << 16;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %2\n"
+ /* Is the lock free? */
" eor %w1, %w0, %w0, ror #16\n"
-" cbnz %w1, 1b\n"
+" cbz %w1, 3f\n"
+ /* Lock taken -- has there been a subsequent unlock->lock transition? */
+" eor %w1, %w3, %w0, lsl #16\n"
+" cbz %w1, 1b\n"
+ /*
+ * The owner has been updated, so there was an unlock->lock
+ * transition that we missed. That means we can rely on the
+ * store-release of the unlock operation paired with the
+ * load-acquire of the lock operation to publish any of our
+ * previous stores to the new lock owner and therefore don't
+ * need to bother with the writeback below.
+ */
+" b 4f\n"
+"3:\n"
+ /*
+ * Serialise against any concurrent lockers by writing back the
+ * unlocked lock value
+ */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" stxr %w1, %w0, %2\n"
-" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */
- /* LSE atomics */
" nop\n"
-" nop\n")
+" nop\n",
+ /* LSE atomics */
+" mov %w1, %w0\n"
+" cas %w0, %w0, %2\n"
+" eor %w1, %w1, %w0\n")
+ /* Somebody else wrote to the lock, GOTO 10 and reload the value */
+" cbnz %w1, 2b\n"
+"4:"
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
- :
+ : "r" (owner)
: "memory");
}
@@ -148,6 +179,7 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
+ smp_mb(); /* See arch_spin_unlock_wait */
return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 0685d74..9e397a5 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -81,19 +81,6 @@ static inline void set_fs(mm_segment_t fs)
#define segment_eq(a, b) ((a) == (b))
/*
- * Return 1 if addr < current->addr_limit, 0 otherwise.
- */
-#define __addr_ok(addr) \
-({ \
- unsigned long flag; \
- asm("cmp %1, %0; cset %0, lo" \
- : "=&r" (flag) \
- : "r" (addr), "0" (current_thread_info()->addr_limit) \
- : "cc"); \
- flag; \
-})
-
-/*
* Test whether a block of memory is a valid user space address.
* Returns 1 if the range is valid, 0 otherwise.
*
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 41e58fe..e78ac26 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
-#define __NR_compat_syscalls 390
+#define __NR_compat_syscalls 394
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5b925b7..b7e8ef1 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -801,6 +801,14 @@ __SYSCALL(__NR_execveat, compat_sys_execveat)
__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
#define __NR_membarrier 389
__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_mlock2 390
+__SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_copy_file_range 391
+__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
+#define __NR_preadv2 392
+__SYSCALL(__NR_preadv2, compat_sys_preadv2)
+#define __NR_pwritev2 393
+__SYSCALL(__NR_pwritev2, compat_sys_pwritev2)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 3808470..c173d32 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -22,6 +22,8 @@
#include <linux/bitops.h>
#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/personality.h>
@@ -104,6 +106,7 @@ static const char *const compat_hwcap2_str[] = {
static int c_show(struct seq_file *m, void *v)
{
int i, j;
+ bool compat = personality(current->personality) == PER_LINUX32;
for_each_online_cpu(i) {
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
@@ -115,6 +118,9 @@ static int c_show(struct seq_file *m, void *v)
* "processor". Give glibc what it expects.
*/
seq_printf(m, "processor\t: %d\n", i);
+ if (compat)
+ seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
+ MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
loops_per_jiffy / (500000UL/HZ),
@@ -127,7 +133,7 @@ static int c_show(struct seq_file *m, void *v)
* software which does already (at least for 32-bit).
*/
seq_puts(m, "Features\t:");
- if (personality(current->personality) == PER_LINUX32) {
+ if (compat) {
#ifdef CONFIG_COMPAT
for (j = 0; compat_hwcap_str[j]; j++)
if (compat_elf_hwcap & (1 << j))
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f8df75d..21ab5df 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -33,6 +33,7 @@
#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
+#include <asm/smp.h>
#include <asm/suspend.h>
#include <asm/virt.h>
@@ -236,6 +237,11 @@ int swsusp_arch_suspend(void)
unsigned long flags;
struct sleep_stack_data state;
+ if (cpus_are_stuck_in_kernel()) {
+ pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
+ return -EBUSY;
+ }
+
local_dbg_save(flags);
if (__cpu_suspend_enter(&state)) {
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index b67531a..b5f063e 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -58,7 +58,17 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
{ "x30", 8, offsetof(struct pt_regs, regs[30])},
{ "sp", 8, offsetof(struct pt_regs, sp)},
{ "pc", 8, offsetof(struct pt_regs, pc)},
- { "pstate", 8, offsetof(struct pt_regs, pstate)},
+ /*
+ * struct pt_regs thinks PSTATE is 64-bits wide but gdb remote
+ * protocol disagrees. Therefore we must extract only the lower
+ * 32-bits. Look for the big comment in asm/kgdb.h for more
+ * detail.
+ */
+ { "pstate", 4, offsetof(struct pt_regs, pstate)
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ + 4
+#endif
+ },
{ "v0", 16, -1 },
{ "v1", 16, -1 },
{ "v2", 16, -1 },
@@ -128,6 +138,8 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
memset((char *)gdb_regs, 0, NUMREGBYTES);
thread_regs = task_pt_regs(task);
memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+ /* Special case for PSTATE (check comments in asm/kgdb.h for details) */
+ dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs);
}
void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 678e084..62ff3c0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
+
+static bool have_cpu_die(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int any_cpu = raw_smp_processor_id();
+
+ if (cpu_ops[any_cpu]->cpu_die)
+ return true;
+#endif
+ return false;
+}
+
+bool cpus_are_stuck_in_kernel(void)
+{
+ bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
+
+ return !!cpus_stuck_in_kernel || smp_spin_tables;
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index c539208..2a43012 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
/*
* We need to switch to kernel mode so that we can use __get_user
- * to safely read from kernel space. Note that we now dump the
- * code first, just in case the backtrace kills us.
+ * to safely read from kernel space.
*/
fs = get_fs();
set_fs(KERNEL_DS);
@@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where)
print_ip_sym(where);
}
-static void dump_instr(const char *lvl, struct pt_regs *regs)
+static void __dump_instr(const char *lvl, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
- mm_segment_t fs;
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
- /*
- * We need to switch to kernel mode so that we can use __get_user
- * to safely read from kernel space. Note that we now dump the
- * code first, just in case the backtrace kills us.
- */
- fs = get_fs();
- set_fs(KERNEL_DS);
-
for (i = -4; i < 1; i++) {
unsigned int val, bad;
@@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
}
}
printk("%sCode: %s\n", lvl, str);
+}
- set_fs(fs);
+static void dump_instr(const char *lvl, struct pt_regs *regs)
+{
+ if (!user_mode(regs)) {
+ mm_segment_t fs = get_fs();
+ set_fs(KERNEL_DS);
+ __dump_instr(lvl, regs);
+ set_fs(fs);
+ } else {
+ __dump_instr(lvl, regs);
+ }
}
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
@@ -477,8 +477,9 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
void __user *pc = (void __user *)instruction_pointer(regs);
console_verbose();
- pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n",
- handler[reason], esr, esr_get_class_string(esr));
+ pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
+ handler[reason], smp_processor_id(), esr,
+ esr_get_class_string(esr));
__show_regs(regs);
info.si_signo = SIGILL;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index fff7cd4..5f8f80b 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -169,7 +169,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
* Make sure stores to the GIC via the memory mapped interface
* are now visible to the system register interface.
*/
- dsb(st);
+ if (!cpu_if->vgic_sre)
+ dsb(st);
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
@@ -190,12 +191,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
continue;
- if (cpu_if->vgic_elrsr & (1 << i)) {
+ if (cpu_if->vgic_elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
- continue;
- }
+ else
+ cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
- cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
__gic_v3_set_lr(0, i);
}
@@ -236,8 +236,12 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
val = read_gicreg(ICC_SRE_EL2);
write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
- isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
- write_gicreg(1, ICC_SRE_EL1);
+
+ if (!cpu_if->vgic_sre) {
+ /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
+ isb();
+ write_gicreg(1, ICC_SRE_EL1);
+ }
}
void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
@@ -256,8 +260,10 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
* been actually programmed with the value we want before
* starting to mess with the rest of the GIC.
*/
- write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
- isb();
+ if (!cpu_if->vgic_sre) {
+ write_gicreg(0, ICC_SRE_EL1);
+ isb();
+ }
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
@@ -306,18 +312,18 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
* (re)distributors. This ensure the guest will read the
* correct values from the memory-mapped interface.
*/
- isb();
- dsb(sy);
+ if (!cpu_if->vgic_sre) {
+ isb();
+ dsb(sy);
+ }
vcpu->arch.vgic_cpu.live_lrs = live_lrs;
/*
* Prevent the guest from touching the GIC system registers if
* SRE isn't enabled for GICv3 emulation.
*/
- if (!cpu_if->vgic_sre) {
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
- }
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
}
void __hyp_text __vgic_v3_init_lrs(void)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7bbe3ff..a57d650 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -134,6 +134,17 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_gic_sre(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
+ return true;
+}
+
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -958,7 +969,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
access_gic_sgi },
/* ICC_SRE_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
- trap_raz_wi },
+ access_gic_sre },
/* CONTEXTIDR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b7b3978..efcf1f7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
&asid_generation);
flush_context(cpu);
- /* We have at least 1 ASID per CPU, so this will always succeed */
+ /* We have more ASIDs than CPUs, so this will always succeed */
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
set_asid:
@@ -227,8 +227,11 @@ switch_mm_fastpath:
static int asids_init(void)
{
asid_bits = get_cpu_asid_bits();
- /* If we end up with more CPUs than ASIDs, expect things to crash */
- WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+ /*
+ * Expect allocation after rollover to fail if we don't have at least
+ * one more ASID than CPUs. ASID #0 is reserved for init_mm.
+ */
+ WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
atomic64_set(&asid_generation, ASID_FIRST_VERSION);
asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
GFP_KERNEL);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 8404190..ccfde23 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -150,6 +150,7 @@ static const struct prot_bits pte_bits[] = {
struct pg_level {
const struct prot_bits *bits;
+ const char *name;
size_t num;
u64 mask;
};
@@ -157,15 +158,19 @@ struct pg_level {
static struct pg_level pg_level[] = {
{
}, { /* pgd */
+ .name = "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pud */
+ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pmd */
+ .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
}, { /* pte */
+ .name = "PTE",
.bits = pte_bits,
.num = ARRAY_SIZE(pte_bits),
},
@@ -214,7 +219,8 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c", delta, *unit);
+ seq_printf(st->seq, "%9lu%c %s", delta, *unit,
+ pg_level[st->level].name);
if (pg_level[st->level].bits)
dump_prot(st, pg_level[st->level].bits,
pg_level[st->level].num);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5954881..013e2cb 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -109,7 +109,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* PTE_RDONLY is cleared by default in the asm below, so set it in
* back if necessary (read-only or clean PTE).
*/
- if (!pte_write(entry) || !dirty)
+ if (!pte_write(entry) || !pte_sw_dirty(entry))
pte_val(entry) |= PTE_RDONLY;
/*
@@ -441,7 +441,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
-static struct fault_info {
+static const struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
int sig;
int code;
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index dbd12ea..43a76b0 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
{
struct page *page = pte_page(pte);
- /* no flushing needed for anonymous pages */
- if (!page_mapping(page))
- return;
-
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
sync_icache_aliases(page_address(page),
PAGE_SIZE << compound_order(page));
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index aa8aee7..2e49bd2 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -306,6 +306,10 @@ static __init int setup_hugepagesz(char *opt)
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ } else if (ps == (PAGE_SIZE * CONT_PTES)) {
+ hugetlb_add_hstate(CONT_PTE_SHIFT);
+ } else if (ps == (PMD_SIZE * CONT_PMDS)) {
+ hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
} else {
hugetlb_bad_size();
pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
@@ -314,3 +318,13 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+ if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+ hugetlb_add_hstate(CONT_PMD_SHIFT);
+ return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif