summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c11
-rw-r--r--arch/arm64/kernel/asm-offsets.c13
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kernel/crash_dump.c71
-rw-r--r--arch/arm64/kernel/entry.S110
-rw-r--r--arch/arm64/kernel/head.S17
-rw-r--r--arch/arm64/kernel/hibernate.c10
-rw-r--r--arch/arm64/kernel/machine_kexec.c170
-rw-r--r--arch/arm64/kernel/process.c36
-rw-r--r--arch/arm64/kernel/return_address.c1
-rw-r--r--arch/arm64/kernel/setup.c16
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/smp.c82
-rw-r--r--arch/arm64/kernel/stacktrace.c7
-rw-r--r--arch/arm64/kernel/suspend.c6
-rw-r--r--arch/arm64/kernel/traps.c28
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S5
18 files changed, 503 insertions, 85 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7d66bba..6a7384e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -50,6 +50,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
cpu-reset.o
+arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c0ede23..29d2ad8 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
-#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
#include <asm/opcodes.h>
@@ -285,10 +284,10 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
#define __SWP_LL_SC_LOOPS 4
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
+do { \
+ uaccess_enable(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
- ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
"0: ldxr"B" %w2, [%4]\n" \
"1: stxr"B" %w0, %w1, [%4]\n" \
" cbz %w0, 2f\n" \
@@ -306,13 +305,13 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
" .popsection" \
_ASM_EXTABLE(0b, 4b) \
_ASM_EXTABLE(1b, 4b) \
- ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \
: "r" ((unsigned long)addr), "i" (-EAGAIN), \
"i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
- : "memory")
+ : "memory"); \
+ uaccess_disable(); \
+} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \
__user_swpX_asm(data, addr, res, temp, temp2, "")
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index c58ddf8..b3bb7ef 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -36,11 +36,13 @@ int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
BLANK();
- DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
- DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
- DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
- DEFINE(TI_TASK, offsetof(struct thread_info, task));
- DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
+ DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+ DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
BLANK();
@@ -123,6 +125,7 @@ int main(void)
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3a129d4..af5a1e3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -47,6 +47,7 @@ unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcaps);
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
new file mode 100644
index 0000000..f46d57c
--- /dev/null
+++ b/arch/arm64/kernel/crash_dump.c
@@ -0,0 +1,71 @@
+/*
+ * Routines for doing kexec-based kdump
+ *
+ * Copyright (C) 2017 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <asm/memory.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset,
+ int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+ memunmap(vaddr);
+ return -EFAULT;
+ }
+ } else {
+ memcpy(buf, vaddr + offset, csize);
+ }
+
+ memunmap(vaddr);
+
+ return csize;
+}
+
+/**
+ * elfcorehdr_read - read from ELF core header
+ * @buf: buffer where the data is placed
+ * @csize: number of bytes to read
+ * @ppos: address in the memory
+ *
+ * This function reads @count bytes from elf core header which exists
+ * on crash dump kernel's memory.
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+ return count;
+}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b4c7db4..bab8334 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,6 +29,8 @@
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
@@ -91,9 +93,8 @@
.if \el == 0
mrs x21, sp_el0
- mov tsk, sp
- and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear,
- ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
+ ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
+ ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
mov x29, xzr // fp pointed to user-space
@@ -101,15 +102,41 @@
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
- ldr x20, [tsk, #TI_ADDR_LIMIT]
+ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #TASK_SIZE_64
- str x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
+ */
+alternative_if ARM64_HAS_PAN
+ b 1f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ mrs x21, ttbr0_el1
+ tst x21, #0xffff << 48 // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+ .endif
+
+ __uaccess_ttbr0_disable x21
+1:
+#endif
+
stp x22, x23, [sp, #S_PC]
/*
@@ -140,7 +167,7 @@
.if \el != 0
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
- str x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
@@ -148,6 +175,40 @@
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
+ .endif
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
+ */
+alternative_if ARM64_HAS_PAN
+ b 2f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ .endif
+
+ __uaccess_ttbr0_enable x0
+
+ .if \el == 0
+ /*
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
+ */
+ post_ttbr0_update_workaround
+ .endif
+1:
+ .if \el != 0
+ and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ .endif
+2:
+#endif
+
+ .if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
#ifdef CONFIG_ARM64_ERRATUM_845719
@@ -163,6 +224,7 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
.endif
+
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
@@ -185,23 +247,20 @@ alternative_else_nop_endif
eret // return to kernel
.endm
- .macro get_thread_info, rd
- mrs \rd, sp_el0
- .endm
-
.macro irq_stack_entry
mov x19, sp // preserve the original sp
/*
- * Compare sp with the current thread_info, if the top
- * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
- * should switch to the irq stack.
+ * Compare sp with the base of the task stack.
+ * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+ * and should switch to the irq stack.
*/
- and x25, x19, #~(THREAD_SIZE - 1)
- cmp x25, tsk
- b.ne 9998f
+ ldr x25, [tsk, TSK_STACK]
+ eor x25, x25, x19
+ and x25, x25, #~(THREAD_SIZE - 1)
+ cbnz x25, 9998f
- this_cpu_ptr irq_stack, x25, x26
+ adr_this_cpu x25, irq_stack, x26
mov x26, #IRQ_STACK_START_SP
add x26, x25, x26
@@ -429,9 +488,9 @@ el1_irq:
irq_handler
#ifdef CONFIG_PREEMPT
- ldr w24, [tsk, #TI_PREEMPT] // get preempt count
+ ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
cbnz w24, 1f // preempt count != 0
- ldr x0, [tsk, #TI_FLAGS] // get flags
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
bl el1_preempt
1:
@@ -446,7 +505,7 @@ ENDPROC(el1_irq)
el1_preempt:
mov x24, lr
1: bl preempt_schedule_irq // irq en/disable is done inside
- ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24
#endif
@@ -676,8 +735,7 @@ ENTRY(cpu_switch_to)
ldp x29, x9, [x8], #16
ldr lr, [x8]
mov sp, x9
- and x9, x9, #~(THREAD_SIZE - 1)
- msr sp_el0, x9
+ msr sp_el0, x1
ret
ENDPROC(cpu_switch_to)
@@ -688,7 +746,7 @@ ENDPROC(cpu_switch_to)
ret_fast_syscall:
disable_irq // disable interrupts
str x0, [sp, #S_X0] // returned x0
- ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
and x2, x1, #_TIF_SYSCALL_WORK
cbnz x2, ret_fast_syscall_trace
and x2, x1, #_TIF_WORK_MASK
@@ -708,14 +766,14 @@ work_pending:
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on // enabled while in userspace
#endif
- ldr x1, [tsk, #TI_FLAGS] // re-check for single-step
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
b finish_ret_to_user
/*
* "slow" syscall return path.
*/
ret_to_user:
disable_irq // disable interrupts
- ldr x1, [tsk, #TI_FLAGS]
+ ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
finish_ret_to_user:
@@ -748,7 +806,7 @@ el0_svc_naked: // compat entry point
enable_dbg_and_irq
ct_user_exit 1
- ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+ ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
tst x16, #_TIF_SYSCALL_WORK
b.ne __sys_trace
cmp scno, sc_nr // check upper syscall limit
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 332e331..7ee6d74 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -326,14 +326,14 @@ __create_page_tables:
* dirty cache lines being evicted.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
*/
adrp x0, idmap_pg_dir
- adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -412,7 +412,7 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
dmb sy
bl __inval_cache_range
@@ -428,7 +428,8 @@ ENDPROC(__create_page_tables)
__primary_switched:
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
- msr sp_el0, x4 // Save thread_info
+ adr_l x5, init_task
+ msr sp_el0, x5 // Save thread_info
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
@@ -699,10 +700,10 @@ __secondary_switched:
isb
adr_l x0, secondary_data
- ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
- mov sp, x0
- and x0, x0, #~(THREAD_SIZE - 1)
- msr sp_el0, x0 // save thread_info
+ ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
+ mov sp, x1
+ ldr x2, [x0, #CPU_BOOT_TASK]
+ msr sp_el0, x2
mov x29, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index d55a7b0..4b10dc1 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -28,6 +28,7 @@
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/irqflags.h>
+#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
@@ -105,7 +106,8 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
- return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+ return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
+ crash_is_nosave(pfn);
}
void notrace save_processor_state(void)
@@ -289,6 +291,9 @@ int swsusp_arch_suspend(void)
local_dbg_save(flags);
if (__cpu_suspend_enter(&state)) {
+ /* make the crash dump kernel image visible/saveable */
+ crash_prepare_suspend();
+
sleep_cpu = smp_processor_id();
ret = swsusp_save();
} else {
@@ -300,6 +305,9 @@ int swsusp_arch_suspend(void)
if (el2_reset_needed())
dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
+ /* make the crash dump kernel image protected again */
+ crash_post_resume();
+
/*
* Tell the hibernation core that we've just restored
* the memory
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index bc96c8a..481f54a 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -9,12 +9,19 @@
* published by the Free Software Foundation.
*/
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <linux/page-flags.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
#include <asm/mmu_context.h>
+#include <asm/page.h>
#include "cpu-reset.h"
@@ -22,8 +29,6 @@
extern const unsigned char arm64_relocate_new_kernel[];
extern const unsigned long arm64_relocate_new_kernel_size;
-static unsigned long kimage_start;
-
/**
* kexec_image_info - For debugging output.
*/
@@ -64,8 +69,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
*/
int machine_kexec_prepare(struct kimage *kimage)
{
- kimage_start = kimage->start;
-
kexec_image_info(kimage);
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
@@ -144,11 +147,15 @@ void machine_kexec(struct kimage *kimage)
{
phys_addr_t reboot_code_buffer_phys;
void *reboot_code_buffer;
+ bool in_kexec_crash = (kimage == kexec_crash_image);
+ bool stuck_cpus = cpus_are_stuck_in_kernel();
/*
* New cpus may have become stuck_in_kernel after we loaded the image.
*/
- BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
+ BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
+ WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
+ "Some CPUs may be stale, kdump will be unreliable.\n");
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
@@ -183,7 +190,7 @@ void machine_kexec(struct kimage *kimage)
kexec_list_flush(kimage);
/* Flush the new image if already in place. */
- if (kimage->head & IND_DONE)
+ if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
kexec_segment_flush(kimage);
pr_info("Bye!\n");
@@ -200,13 +207,158 @@ void machine_kexec(struct kimage *kimage)
* relocation is complete.
*/
- cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
- kimage_start, 0);
+ cpu_soft_restart(kimage != kexec_crash_image,
+ reboot_code_buffer_phys, kimage->head, kimage->start, 0);
BUG(); /* Should never get here. */
}
+static void machine_kexec_mask_interrupts(void)
+{
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+ int ret;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ /*
+ * First try to remove the active state. If this
+ * fails, try to EOI the interrupt.
+ */
+ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+ if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+ chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
+}
+
+/**
+ * machine_crash_shutdown - shutdown non-crashing cpus and save registers
+ */
void machine_crash_shutdown(struct pt_regs *regs)
{
- /* Empty routine needed to avoid build errors. */
+ local_irq_disable();
+
+ /* shutdown non-crashing cpus */
+ smp_send_crash_stop();
+
+ /* for crashing cpu */
+ crash_save_cpu(regs, smp_processor_id());
+ machine_kexec_mask_interrupts();
+
+ pr_info("Starting crashdump kernel...\n");
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+ int i;
+
+ kexec_segment_flush(kexec_crash_image);
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+ int i;
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
+}
+
+#ifdef CONFIG_HIBERNATION
+/*
+ * To preserve the crash dump kernel image, the relevant memory segments
+ * should be mapped again around the hibernation.
+ */
+void crash_prepare_suspend(void)
+{
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+}
+
+void crash_post_resume(void)
+{
+ if (kexec_crash_image)
+ arch_kexec_protect_crashkres();
+}
+
+/*
+ * crash_is_nosave
+ *
+ * Return true only if a page is part of reserved memory for crash dump kernel,
+ * but does not hold any data of loaded kernel image.
+ *
+ * Note that all the pages in crash dump kernel memory have been initially
+ * marked as Reserved in kexec_reserve_crashkres_pages().
+ *
+ * In hibernation, the pages which are Reserved and yet "nosave" are excluded
+ * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * dump kernel and will reduce the total size of hibernation image.
+ */
+
+bool crash_is_nosave(unsigned long pfn)
+{
+ int i;
+ phys_addr_t addr;
+
+ if (!crashk_res.end)
+ return false;
+
+ /* in reserved memory? */
+ addr = __pfn_to_phys(pfn);
+ if ((addr < crashk_res.start) || (crashk_res.end < addr))
+ return false;
+
+ if (!kexec_crash_image)
+ return true;
+
+ /* not part of loaded kernel image? */
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ if (addr >= kexec_crash_image->segment[i].mem &&
+ addr < (kexec_crash_image->segment[i].mem +
+ kexec_crash_image->segment[i].memsz))
+ return false;
+
+ return true;
+}
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ page = phys_to_page(addr);
+ ClearPageReserved(page);
+ free_reserved_page(page);
+ }
+}
+#endif /* CONFIG_HIBERNATION */
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ VMCOREINFO_NUMBER(VA_BITS);
+ /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+ vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
+ kimage_voffset);
+ vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
+ PHYS_OFFSET);
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 01753cd..a98b743 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -45,6 +45,7 @@
#include <linux/personality.h>
#include <linux/notifier.h>
#include <trace/events/power.h>
+#include <linux/percpu.h>
#include <asm/alternative.h>
#include <asm/compat.h>
@@ -322,6 +323,20 @@ void uao_thread_switch(struct task_struct *next)
}
/*
+ * We store our current task in sp_el0, which is clobbered by userspace. Keep a
+ * shadow copy so that we can restore this upon entry from userspace.
+ *
+ * This is *only* for exception entry from EL0, and is not valid until we
+ * __switch_to() a user task.
+ */
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+
+static void entry_task_switch(struct task_struct *next)
+{
+ __this_cpu_write(__entry_task, next);
+}
+
+/*
* Thread switching.
*/
struct task_struct *__switch_to(struct task_struct *prev,
@@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
+ entry_task_switch(next);
uao_thread_switch(next);
/*
@@ -350,27 +366,35 @@ struct task_struct *__switch_to(struct task_struct *prev,
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
- unsigned long stack_page;
+ unsigned long stack_page, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
+ stack_page = (unsigned long)try_get_task_stack(p);
+ if (!stack_page)
+ return 0;
+
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.pc = thread_saved_pc(p);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = p->curr_ret_stack;
#endif
- stack_page = (unsigned long)task_stack_page(p);
do {
if (frame.sp < stack_page ||
frame.sp >= stack_page + THREAD_SIZE ||
unwind_frame(p, &frame))
- return 0;
- if (!in_sched_functions(frame.pc))
- return frame.pc;
+ goto out;
+ if (!in_sched_functions(frame.pc)) {
+ ret = frame.pc;
+ goto out;
+ }
} while (count ++ < 16);
- return 0;
+
+out:
+ put_task_stack(p);
+ return ret;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 1718706..12a87f2 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/ftrace.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
struct return_address_data {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f534f49..e4e2a5a 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@
#include <linux/screen_info.h>
#include <linux/init.h>
#include <linux/kexec.h>
-#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
@@ -224,6 +223,12 @@ static void __init request_standard_resources(void)
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, &kernel_data);
+#ifdef CONFIG_KEXEC_CORE
+ /* Userspace will find "Crash kernel" region in /proc/iomem. */
+ if (crashk_res.end && crashk_res.start >= res->start &&
+ crashk_res.end <= res->end)
+ request_resource(res, &crashk_res);
+#endif
}
}
@@ -291,6 +296,15 @@ void __init setup_arch(char **cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Make sure init_thread_info.ttbr0 always generates translation
+ * faults in case uaccess_enable() is inadvertently called by the init
+ * thread.
+ */
+ init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+#endif
+
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 1bec41b..df67652 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -125,9 +125,6 @@ ENTRY(_cpu_resume)
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
mov sp, x2
- /* save thread_info */
- and x2, x2, #~(THREAD_SIZE - 1)
- msr sp_el0, x2
/*
* cpu_do_resume expects x0 to contain context address pointer
*/
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 8507703..11f2890 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@
#include <linux/completion.h>
#include <linux/of.h>
#include <linux/irq_work.h>
+#include <linux/kexec.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -58,6 +59,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -71,6 +75,7 @@ enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
+ IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
IPI_WAKEUP
@@ -146,6 +151,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
+ secondary_data.task = idle;
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
@@ -170,6 +176,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
+ secondary_data.task = NULL;
secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status);
if (ret && status) {
@@ -208,7 +215,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
asmlinkage void secondary_start_kernel(void)
{
struct mm_struct *mm = &init_mm;
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu;
+
+ cpu = task_cpu(current);
+ set_my_cpu_offset(per_cpu_offset(cpu));
/*
* All kernel threads share the same mm context; grab a
@@ -217,8 +227,6 @@ asmlinkage void secondary_start_kernel(void)
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
@@ -718,6 +726,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
+ per_cpu(cpu_number, cpu) = cpu;
+
if (cpu == smp_processor_id())
continue;
@@ -745,6 +755,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
S(IPI_CALL_FUNC, "Function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
+ S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
S(IPI_WAKEUP, "CPU wake-up interrupts"),
@@ -819,6 +830,29 @@ static void ipi_cpu_stop(unsigned int cpu)
cpu_relax();
}
+#ifdef CONFIG_KEXEC_CORE
+static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
+#endif
+
+static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+#ifdef CONFIG_KEXEC_CORE
+ crash_save_cpu(regs, cpu);
+
+ atomic_dec(&waiting_for_crash_ipi);
+
+ local_irq_disable();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_ops[cpu]->cpu_die)
+ cpu_ops[cpu]->cpu_die(cpu);
+#endif
+
+ /* just in case */
+ cpu_park_loop();
+#endif
+}
+
/*
* Main handler for inter-processor interrupts
*/
@@ -849,6 +883,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
irq_exit();
break;
+ case IPI_CPU_CRASH_STOP:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ irq_enter();
+ ipi_cpu_crash_stop(cpu, regs);
+
+ unreachable();
+ }
+ break;
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
irq_enter();
@@ -921,6 +964,39 @@ void smp_send_stop(void)
cpumask_pr_args(cpu_online_mask));
}
+#ifdef CONFIG_KEXEC_CORE
+void smp_send_crash_stop(void)
+{
+ cpumask_t mask;
+ unsigned long timeout;
+
+ if (num_online_cpus() == 1)
+ return;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+
+ pr_crit("SMP: stopping secondary CPUs\n");
+ smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
+
+ /* Wait up to one second for other CPUs to stop */
+ timeout = USEC_PER_SEC;
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
+ udelay(1);
+
+ if (atomic_read(&waiting_for_crash_ipi) > 0)
+ pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+}
+
+bool smp_crash_stop_failed(void)
+{
+ return (atomic_read(&waiting_for_crash_ipi) > 0);
+}
+#endif
+
/*
* not supported here
*/
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c2efddf..8a552a3 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -22,6 +22,7 @@
#include <linux/stacktrace.h>
#include <asm/irq.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
@@ -128,7 +129,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
break;
}
}
-EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
@@ -181,6 +181,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
struct stack_trace_data data;
struct stackframe frame;
+ if (!try_get_task_stack(tsk))
+ return;
+
data.trace = trace;
data.skip = trace->skip;
@@ -202,6 +205,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
walk_stackframe(tsk, &frame, save_trace, &data);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
+
+ put_task_stack(tsk);
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index bb0cd78..1e3be90 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -47,12 +47,6 @@ void notrace __cpu_suspend_exit(void)
cpu_uninstall_idmap();
/*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
- */
- set_my_cpu_offset(per_cpu_offset(cpu));
-
- /*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f228261..7501b40 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/traps.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
#include <asm/exception.h>
#include <asm/system_misc.h>
@@ -147,6 +148,9 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
if (!tsk)
tsk = current;
+ if (!try_get_task_stack(tsk))
+ return;
+
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
@@ -212,6 +216,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
stack + sizeof(struct pt_regs));
}
}
+
+ put_task_stack(tsk);
}
void show_stack(struct task_struct *tsk, unsigned long *sp)
@@ -227,10 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
#endif
#define S_SMP " SMP"
-static int __die(const char *str, int err, struct thread_info *thread,
- struct pt_regs *regs)
+static int __die(const char *str, int err, struct pt_regs *regs)
{
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
static int die_counter;
int ret;
@@ -245,7 +250,8 @@ static int __die(const char *str, int err, struct thread_info *thread,
print_modules();
__show_regs(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
+ end_of_stack(tsk));
if (!user_mode(regs)) {
dump_mem(KERN_EMERG, "Stack: ", regs->sp,
@@ -264,7 +270,6 @@ static DEFINE_RAW_SPINLOCK(die_lock);
*/
void die(const char *str, struct pt_regs *regs, int err)
{
- struct thread_info *thread = current_thread_info();
int ret;
oops_enter();
@@ -272,9 +277,9 @@ void die(const char *str, struct pt_regs *regs, int err)
raw_spin_lock_irq(&die_lock);
console_verbose();
bust_spinlocks(1);
- ret = __die(str, err, thread, regs);
+ ret = __die(str, err, regs);
- if (regs && kexec_should_crash(thread->task))
+ if (regs && kexec_should_crash(current))
crash_kexec(regs);
bust_spinlocks(0);
@@ -435,9 +440,10 @@ int cpu_enable_cache_maint_trap(void *__unused)
}
#define __user_cache_maint(insn, address, res) \
- if (address >= user_addr_max()) \
+ if (address >= user_addr_max()) { \
res = -EFAULT; \
- else \
+ } else { \
+ uaccess_ttbr0_enable(); \
asm volatile ( \
"1: " insn ", %1\n" \
" mov %w0, #0\n" \
@@ -449,7 +455,9 @@ int cpu_enable_cache_maint_trap(void *__unused)
" .popsection\n" \
_ASM_EXTABLE(1b, 3b) \
: "=r" (res) \
- : "r" (address), "i" (-EFAULT) )
+ : "r" (address), "i" (-EFAULT)); \
+ uaccess_ttbr0_disable(); \
+ }
static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
{
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1105aab..b8deffa 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -216,6 +216,11 @@ SECTIONS
swapper_pg_dir = .;
. += SWAPPER_DIR_SIZE;
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ reserved_ttbr0 = .;
+ . += RESERVED_TTBR0_SIZE;
+#endif
+
_end = .;
STABS_DEBUG