From c17af4dd96aa99e6e58b5d715a7c66db63a15106 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Jun 2015 16:15:43 -0400 Subject: sparc: perf: Disable pagefaults while walking userspace stacks Page faults generated walking userspace stacks can call schedule to switch out the task. When collecting callchains for scheduler tracepoints this causes a deadlock as the tracepoints can be hit with the runqueue lock held: [ 8138.159054] WARNING: CPU: 758 PID: 12488 at /opt/dahern/linux.git/arch/sparc/kernel/nmi.c:80 perfctr_irq+0x1f8/0x2b4() [ 8138.203152] Watchdog detected hard LOCKUP on cpu 758 [ 8138.410969] CPU: 758 PID: 12488 Comm: perf Not tainted 4.0.0-rc6+ #6 [ 8138.437146] Call Trace: [ 8138.447193] [000000000045cdd4] warn_slowpath_common+0x7c/0xa0 [ 8138.471238] [000000000045ce90] warn_slowpath_fmt+0x30/0x40 [ 8138.494189] [0000000000983e38] perfctr_irq+0x1f8/0x2b4 [ 8138.515716] [00000000004209f4] tl0_irq15+0x14/0x20 [ 8138.535791] [00000000009839ec] _raw_spin_trylock_bh+0x68/0x108 [ 8138.560180] [0000000000980018] __schedule+0xcc/0x710 [ 8138.580981] [00000000009806dc] preempt_schedule_common+0x10/0x3c [ 8138.606082] [000000000098077c] _cond_resched+0x34/0x44 [ 8138.627603] [0000000000565990] kmem_cache_alloc_node+0x24/0x1a0 [ 8138.652345] [0000000000450b60] tsb_grow+0xac/0x488 [ 8138.672429] [0000000000985040] do_sparc64_fault+0x4dc/0x6e4 [ 8138.695736] [0000000000407c2c] sparc64_realfault_common+0x10/0x20 [ 8138.721202] [00000000006f2e24] NG4copy_from_user+0xa4/0x3c0 [ 8138.744510] [000000000044f900] perf_callchain_user+0x5c/0x6c [ 8138.768182] [0000000000517b5c] perf_callchain+0x16c/0x19c [ 8138.790774] [0000000000515f84] perf_prepare_sample+0x68/0x218 [ 8138.814801] ---[ end trace 42ca6294b1ff7573 ]--- As with PowerPC (b59a1bfcc240, "powerpc/perf: Disable pagefaults during callchain stack read") disable pagefaults while walking userspace stacks. Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 59cf917..48387be 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include @@ -1803,8 +1803,13 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; flushw_user(); + + pagefault_disable(); + if (test_thread_flag(TIF_32BIT)) perf_callchain_user_32(entry, regs); else perf_callchain_user_64(entry, regs); + + pagefault_enable(); } -- cgit v0.10.2 From 2bf7c3efc393937d1e5f92681501a914dbfbae07 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Jun 2015 16:15:44 -0400 Subject: sparc64: Convert BUG_ON to warning Pagefault handling has a BUG_ON path that panics the system. Convert it to a warning instead. There is no need to bring down the system for this kind of failure. The following was hit while running: perf sched record -g -- make -j 16 [3609412.782801] kernel BUG at /opt/dahern/linux.git/arch/sparc/mm/fault_64.c:416! [3609412.782833] \|/ ____ \|/ [3609412.782833] "@'/ .. \`@" [3609412.782833] /_| \__/ |_\ [3609412.782833] \__U_/ [3609412.782870] cat(4516): Kernel bad sw trap 5 [#1] [3609412.782889] CPU: 0 PID: 4516 Comm: cat Tainted: G E 4.1.0-rc8+ #6 [3609412.782909] task: fff8000126e31f80 ti: fff8000110d90000 task.ti: fff8000110d90000 [3609412.782931] TSTATE: 0000004411001603 TPC: 000000000096b164 TNPC: 000000000096b168 Y: 0000004e Tainted: G E [3609412.782964] TPC: [3609412.782979] g0: 000000000096abe0 g1: 0000000000d314c4 g2: 0000000000000000 g3: 0000000000000001 [3609412.783009] g4: fff8000126e31f80 g5: fff80001302d2000 g6: fff8000110d90000 g7: 00000000000000ff [3609412.783045] o0: 0000000000aff6a8 o1: 00000000000001a0 o2: 0000000000000001 o3: 0000000000000054 [3609412.783080] o4: fff8000100026820 o5: 0000000000000001 sp: fff8000110d935f1 ret_pc: 000000000096b15c [3609412.783117] RPC: [3609412.783137] l0: 000007feff996000 l1: 0000000000030001 l2: 0000000000000004 l3: fff8000127bd0120 [3609412.783174] l4: 0000000000000054 l5: fff8000127bd0188 l6: 0000000000000000 l7: fff8000110d9dba8 [3609412.783210] i0: fff8000110d93f60 i1: fff8000110ca5530 i2: 000000000000003f i3: 0000000000000054 [3609412.783244] i4: fff800010000081a i5: fff8000100000398 i6: fff8000110d936a1 i7: 0000000000407c6c [3609412.783286] I7: [3609412.783308] Call Trace: [3609412.783329] [0000000000407c6c] sparc64_realfault_common+0x10/0x20 [3609412.783353] Disabling lock debugging due to kernel taint [3609412.783379] Caller[0000000000407c6c]: sparc64_realfault_common+0x10/0x20 [3609412.783449] Caller[fff80001002283e4]: 0xfff80001002283e4 [3609412.783471] Instruction DUMP: 921021a0 7feaff91 901222a8 <91d02005> 82086100 02f87f7b 808a2873 81cfe008 01000000 [3609412.783542] Kernel panic - not syncing: Fatal exception [3609412.784605] Press Stop-A (L1-A) to return to the boot prom [3609412.784615] ---[ end Kernel panic - not syncing: Fatal exception With this patch rather than a panic I occasionally get something like this: perf sched record -g -m 1024 -- make -j N where N is based on number of cpus (128 to 1024 for a T7-4 and 8 for an 8 cpu VM on a T5-2). WARNING: CPU: 211 PID: 52565 at /opt/dahern/linux.git/arch/sparc/mm/fault_64.c:417 do_sparc64_fault+0x340/0x70c() address (7feffcd6000) != regs->tpc (fff80001004873c0) Modules linked in: ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 xt_tcpudp nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 cdc_ether usbnet mii ixgbe mdio igb i2c_algo_bit i2c_core ptp crc32c_sparc64 camellia_sparc64 des_sparc64 des_generic md5_sparc64 sha512_sparc64 sha1_sparc64 uio_pdrv_genirq uio usb_storage mpt3sas scsi_transport_sas raid_class aes_sparc64 sunvnet sunvdc sha256_sparc64(E) sha256_generic(E) CPU: 211 PID: 52565 Comm: ld Tainted: G W E 4.1.0-rc8+ #19 Call Trace: [000000000045ce30] warn_slowpath_common+0x7c/0xa0 [000000000045ceec] warn_slowpath_fmt+0x30/0x40 [000000000098ad64] do_sparc64_fault+0x340/0x70c [0000000000407c2c] sparc64_realfault_common+0x10/0x20 ---[ end trace 62ee02065a01a049 ]--- ld[52565]: segfault at fff80001004873c0 ip fff80001004873c0 (rpc fff8000100158868) sp 000007feffcd70e1 error 30002 in libc-2.12.so[fff8000100410000+184000] The segfault is horrible, but better than a system panic. An 8-cpu VM on a T5-2 also showed the above traces from time to time, so it is a general problem and not specific to the T7 or baremetal. Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e9268ea..dbabe57 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -413,8 +413,9 @@ good_area: * that here. */ if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) { - BUG_ON(address != regs->tpc); - BUG_ON(regs->tstate & TSTATE_PRIV); + WARN(address != regs->tpc, + "address (%lx) != regs->tpc (%lx)\n", address, regs->tpc); + WARN_ON(regs->tstate & TSTATE_PRIV); goto bad_area; } -- cgit v0.10.2 From b69fb7699c92f85991672fc144b0adb7c717fbc8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Jun 2015 16:15:45 -0400 Subject: sparc64: perf: Add sanity checking on addresses in user stack Processes are getting killed (sigbus or segv) while walking userspace callchains when using perf. In some instances I have seen ufp = 0x7ff which does not seem like a proper stack address. This patch adds a function to run validity checks against the address before attempting the copy_from_user. The checks are copied from the x86 version as a start point with the addition of a 4-byte alignment check. Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index a35194b..ea6e9a2 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -49,6 +49,28 @@ do { \ __asm__ __volatile__ ("wr %%g0, %0, %%asi" : : "r" ((val).seg)); \ } while(0) +/* + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + */ +static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit) +{ + if (__builtin_constant_p(size)) + return addr > limit - size; + + addr += size; + if (addr < size) + return true; + + return addr > limit; +} + +#define __range_not_ok(addr, size, limit) \ +({ \ + __chk_user_ptr(addr); \ + __chk_range_not_ok((unsigned long __force)(addr), size, limit); \ +}) + static inline int __access_ok(const void __user * addr, unsigned long size) { return 1; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 48387be..a665e3f 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1741,6 +1741,16 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, } while (entry->nr < PERF_MAX_STACK_DEPTH); } +static inline int +valid_user_frame(const void __user *fp, unsigned long size) +{ + /* addresses should be at least 4-byte aligned */ + if (((unsigned long) fp) & 3) + return 0; + + return (__range_not_ok(fp, size, TASK_SIZE) == 0); +} + static void perf_callchain_user_64(struct perf_callchain_entry *entry, struct pt_regs *regs) { @@ -1753,6 +1763,9 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, unsigned long pc; usf = (struct sparc_stackf __user *)ufp; + if (!valid_user_frame(usf, sizeof(sf))) + break; + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) break; -- cgit v0.10.2 From 2d89cd8625c4af01a2683b18c3c8194cc3b3067c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Jun 2015 16:15:46 -0400 Subject: sparc64: perf: Use UREG_FP rather than UREG_I6 perf walks userspace callchains by following frame pointers. Use the UREG_FP macro to make it clearer that the %fp is being used. Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index a665e3f..689db65 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1756,7 +1756,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, { unsigned long ufp; - ufp = regs->u_regs[UREG_I6] + STACK_BIAS; + ufp = regs->u_regs[UREG_FP] + STACK_BIAS; do { struct sparc_stackf __user *usf; struct sparc_stackf sf; @@ -1780,7 +1780,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, { unsigned long ufp; - ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; + ufp = regs->u_regs[UREG_FP] & 0xffffffffUL; do { unsigned long pc; -- cgit v0.10.2