From 9f2dfda2f2f1c6181c3732c16b85c59ab2d195e0 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 16 Dec 2015 21:59:56 +0100 Subject: uml: fix hostfs mknod() An inverted return value check in hostfs_mknod() caused the function to return success after handling it as an error (and cleaning up). It resulted in the following segfault when trying to bind() a named unix socket: Pid: 198, comm: a.out Not tainted 4.4.0-rc4 RIP: 0033:[<0000000061077df6>] RSP: 00000000daae5d60 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 000000006092a460 RCX: 00000000dfc54208 RDX: 0000000061073ef1 RSI: 0000000000000070 RDI: 00000000e027d600 RBP: 00000000daae5de0 R08: 00000000da980ac0 R09: 0000000000000000 R10: 0000000000000003 R11: 00007fb1ae08f72a R12: 0000000000000000 R13: 000000006092a460 R14: 00000000daaa97c0 R15: 00000000daaa9a88 Kernel panic - not syncing: Kernel mode fault at addr 0x40, ip 0x61077df6 CPU: 0 PID: 198 Comm: a.out Not tainted 4.4.0-rc4 #1 Stack: e027d620 dfc54208 0000006f da981398 61bee000 0000c1ed daae5de0 0000006e e027d620 dfcd4208 00000005 6092a460 Call Trace: [<60dedc67>] SyS_bind+0xf7/0x110 [<600587be>] handle_syscall+0x7e/0x80 [<60066ad7>] userspace+0x3e7/0x4e0 [<6006321f>] ? save_registers+0x1f/0x40 [<6006c88e>] ? arch_prctl+0x1be/0x1f0 [<60054985>] fork_handler+0x85/0x90 Let's also get rid of the "cosmic ray protection" while we're at it. Fixes: e9193059b1b3 "hostfs: fix races in dentry_name() and inode_name()" Signed-off-by: Vegard Nossum Cc: Jeff Dike Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2ac99db..5a7b322 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -730,15 +730,13 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, init_special_inode(inode, mode, dev); err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); - if (!err) + if (err) goto out_free; err = read_name(inode, name); __putname(name); if (err) goto out_put; - if (err) - goto out_put; d_instantiate(dentry, inode); return 0; -- cgit v0.10.2 From 0754fb298f2f2719f0393491d010d46cfb25d043 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 18 Dec 2015 21:28:53 +0100 Subject: uml: flush stdout before forking I was seeing some really weird behaviour where piping UML's output somewhere would cause output to get duplicated: $ ./vmlinux | head -n 40 Checking that ptrace can change system call numbers...Core dump limits : soft - 0 hard - NONE OK Checking syscall emulation patch for ptrace...Core dump limits : soft - 0 hard - NONE OK Checking advanced syscall emulation patch for ptrace...Core dump limits : soft - 0 hard - NONE OK Core dump limits : soft - 0 hard - NONE This is because these tests do a fork() which duplicates the non-empty stdout buffer, then glibc flushes the duplicated buffer as each child exits. A simple workaround is to flush before forking. Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Richard Weinberger diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 47f1ff0..22a358e 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -94,6 +94,8 @@ static int start_ptraced_child(void) { int pid, n, status; + fflush(stdout); + pid = fork(); if (pid == 0) ptrace_child(); -- cgit v0.10.2 From d5e3f5cbe5cee7fe6da26566559a978547179b37 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Mon, 21 Dec 2015 11:28:02 +0000 Subject: um: Prevent IRQ handler reentrancy The existing IRQ handler design in UML does not prevent reentrancy This is mitigated by fd-enable/fd-disable semantics for the IO portion of the UML subsystem. The timer, however, can and is re-entered resulting in very deep stack usage and occasional stack exhaustion. This patch prevents this by checking if there is a timer interrupt in-flight before processing any pending timer interrupts. Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c211153..7801666 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -62,6 +62,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) static int signals_enabled; static unsigned int signals_pending; +static unsigned int signals_active = 0; void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { @@ -101,7 +102,12 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) block_signals(); + signals_active |= SIGALRM_MASK; + timer_real_alarm_handler(mc); + + signals_active &= ~SIGALRM_MASK; + set_signals(enabled); } @@ -286,8 +292,16 @@ void unblock_signals(void) if (save_pending & SIGIO_MASK) sig_handler_common(SIGIO, NULL, NULL); - if (save_pending & SIGALRM_MASK) + /* Do not reenter the handler */ + + if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) timer_real_alarm_handler(NULL); + + /* Rerun the loop only if there is still pending SIGIO and not in TIMER handler */ + + if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK)) + return; + } } -- cgit v0.10.2 From 470a166e8c5a4da4be88545b1c4dde308abac5b2 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Mon, 21 Dec 2015 11:28:03 +0000 Subject: um: Do not change hard IRQ flags in soft IRQ processing Software IRQ processing in generic architectures assumes that the exit out of hard IRQ may have re-enabled interrupts (some architectures may have an implicit EOI). It presumes them enabled and toggles the flags once more just in case unless this is turned off in the architecture specific hardirq.h by setting __ARCH_IRQ_EXIT_IRQS_DISABLED This patch adds this to UML where due to the way IRQs are handled it is an optimization (it works fine without it too). Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h new file mode 100644 index 0000000..756f077 --- /dev/null +++ b/arch/um/include/asm/hardirq.h @@ -0,0 +1,23 @@ +#ifndef __ASM_UM_HARDIRQ_H +#define __ASM_UM_HARDIRQ_H + +#include +#include + +typedef struct { + unsigned int __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ +#include + +#ifndef ack_bad_irq +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} +#endif + +#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 + +#endif /* __ASM_UM_HARDIRQ_H */ -- cgit v0.10.2 From 8c6157b6b30a765ec233a1be5f9446f24a5283de Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Mon, 21 Dec 2015 18:54:00 +0000 Subject: um: Update UBD to use pread/pwrite family of functions This decreases the number of syscalls per read/write by half. Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index e8ab93c..39ba207 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -535,11 +535,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len) { int err; - err = os_seek_file(fd, offset); - if (err < 0) - return err; - - err = os_read_file(fd, buf, len); + err = os_pread_file(fd, buf, len, offset); if (err < 0) return err; @@ -1377,14 +1373,8 @@ static int update_bitmap(struct io_thread_req *req) if(req->cow_offset == -1) return 0; - n = os_seek_file(req->fds[1], req->cow_offset); - if(n < 0){ - printk("do_io - bitmap lseek failed : err = %d\n", -n); - return 1; - } - - n = os_write_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); + n = os_pwrite_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words), req->cow_offset); if(n != sizeof(req->bitmap_words)){ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, req->fds[1]); @@ -1399,7 +1389,6 @@ static void do_io(struct io_thread_req *req) char *buf; unsigned long len; int n, nsectors, start, end, bit; - int err; __u64 off; if (req->op == UBD_FLUSH) { @@ -1428,18 +1417,12 @@ static void do_io(struct io_thread_req *req) len = (end - start) * req->sectorsize; buf = &req->buffer[start * req->sectorsize]; - err = os_seek_file(req->fds[bit], off); - if(err < 0){ - printk("do_io - lseek failed : err = %d\n", -err); - req->error = 1; - return; - } if(req->op == UBD_READ){ n = 0; do { buf = &buf[n]; len -= n; - n = os_read_file(req->fds[bit], buf, len); + n = os_pread_file(req->fds[bit], buf, len, off); if (n < 0) { printk("do_io - read failed, err = %d " "fd = %d\n", -n, req->fds[bit]); @@ -1449,7 +1432,7 @@ static void do_io(struct io_thread_req *req) } while((n < len) && (n != 0)); if (n < len) memset(&buf[n], 0, len - n); } else { - n = os_write_file(req->fds[bit], buf, len); + n = os_pwrite_file(req->fds[bit], buf, len, off); if(n != len){ printk("do_io - write failed err = %d " "fd = %d\n", -n, req->fds[bit]); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 868e6c3..7a04ddd 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -146,6 +146,8 @@ extern int os_read_file(int fd, void *buf, int len); extern int os_write_file(int fd, const void *buf, int count); extern int os_sync_file(int fd); extern int os_file_size(const char *file, unsigned long long *size_out); +extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); +extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); extern int os_file_modtime(const char *file, unsigned long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); extern int os_set_fd_async(int fd); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 26e0164..2db18cb 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -264,6 +264,15 @@ int os_read_file(int fd, void *buf, int len) return n; } +int os_pread_file(int fd, void *buf, int len, unsigned long long offset) +{ + int n = pread(fd, buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + int os_write_file(int fd, const void *buf, int len) { int n = write(fd, (void *) buf, len); @@ -282,6 +291,16 @@ int os_sync_file(int fd) return n; } +int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset) +{ + int n = pwrite(fd, (void *) buf, len, offset); + + if (n < 0) + return -errno; + return n; +} + + int os_file_size(const char *file, unsigned long long *size_out) { struct uml_stat buf; -- cgit v0.10.2 From a7df4716d19594b7b3f106f0bc0ca1c548e508e6 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 31 Dec 2015 17:06:17 +0100 Subject: um: link with -lpthread Similarly to commit fb1770aa78a43530940d0c2dd161e77bc705bdac, with gcc 5 on Ubuntu and CONFIG_STATIC_LINK=y I was seeing these linker errors: /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/librt.a(timer_create.o): In function `__timer_create_new': (.text+0xcd): undefined reference to `pthread_once' /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/librt.a(timer_create.o): In function `__timer_create_new': (.text+0x126): undefined reference to `pthread_attr_init' /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/librt.a(timer_create.o): In function `__timer_create_new': (.text+0x168): undefined reference to `pthread_attr_setdetachstate' [...] Obviously we also need -lpthread for librt.a. Cc: stable@vger.kernel.org # 4.4 Signed-off-by: Vegard Nossum Signed-off-by: Richard Weinberger diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index dacf71a..ba6c34e 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -62,7 +62,7 @@ vmlinux_link() -Wl,--start-group \ ${KBUILD_VMLINUX_MAIN} \ -Wl,--end-group \ - -lutil -lrt ${1} + -lutil -lrt -lpthread ${1} rm -f linux fi } -- cgit v0.10.2 From e04c989eb785af61d2895d76d38c09166296f9c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 29 Dec 2015 21:35:44 +0100 Subject: um: Fix ptrace GETREGS/SETREGS bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix two related bugs: * PTRACE_GETREGS doesn't get the right orig_ax (syscall) value * PTRACE_SETREGS can't set the orig_ax value (erased by initial value) Get rid of the now useless and error-prone get_syscall(). Fix inconsistent behavior in the ptrace implementation for i386 when updating orig_eax automatically update the syscall number as well. This is now updated in handle_syscall(). Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Cc: Thomas Gleixner Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Thomas Meyer Cc: Nicolas Iooss Cc: Anton Ivanov Cc: Meredydd Luff Cc: David Drysdale Signed-off-by: Richard Weinberger Acked-by: Kees Cook diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 7a04ddd..de5d572 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -284,7 +284,6 @@ extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); extern void halt_skas(void); extern void reboot_skas(void); -extern int get_syscall(struct uml_pt_regs *regs); /* irq.c */ extern int os_waiting_for_events(struct irq_fd *active_fds); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 1683b8e..6cadce7 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -7,29 +7,31 @@ #include #include #include +#include #include -#include void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); - long result; int syscall; - if (syscall_trace_enter(regs)) { - result = -ENOSYS; + /* Initialize the syscall number and default return value. */ + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); + PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); + + if (syscall_trace_enter(regs)) goto out; - } - syscall = get_syscall(r); + /* Update the syscall number after orig_ax has potentially been updated + * with ptrace. + */ + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); + syscall = UPT_SYSCALL_NR(r); - if ((syscall > __NR_syscall_max) || syscall < 0) - result = -ENOSYS; - else - result = EXECUTE_SYSCALL(syscall, regs); + if (syscall >= 0 && syscall <= __NR_syscall_max) + PT_REGS_SET_SYSCALL_RETURN(regs, + EXECUTE_SYSCALL(syscall, regs)); out: - PT_REGS_SET_SYSCALL_RETURN(regs, result); - syscall_trace_leave(regs); } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index b856c66..23025d6 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -172,13 +172,6 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, handle_syscall(regs); } -int get_syscall(struct uml_pt_regs *regs) -{ - UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); - - return UPT_SYSCALL_NR(regs); -} - extern char __syscall_stub_start[]; static int userspace_tramp(void *stack) diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index a29756f2..47c78d5 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -68,6 +68,7 @@ static const int reg_offsets[] = { [EFL] = HOST_EFLAGS, [UESP] = HOST_SP, [SS] = HOST_SS, + [ORIG_EAX] = HOST_ORIG_AX, }; int putreg(struct task_struct *child, int regno, unsigned long value) @@ -83,6 +84,7 @@ int putreg(struct task_struct *child, int regno, unsigned long value) case EAX: case EIP: case UESP: + case ORIG_EAX: break; case FS: if (value && (value & 3) != 3) @@ -108,9 +110,6 @@ int putreg(struct task_struct *child, int regno, unsigned long value) value &= FLAG_MASK; child->thread.regs.regs.gp[HOST_EFLAGS] |= value; return 0; - case ORIG_EAX: - child->thread.regs.regs.syscall = value; - return 0; default : panic("Bad register in putreg() : %d\n", regno); } @@ -143,8 +142,6 @@ unsigned long getreg(struct task_struct *child, int regno) regno >>= 2; switch (regno) { - case ORIG_EAX: - return child->thread.regs.regs.syscall; case FS: case GS: case DS: @@ -163,6 +160,7 @@ unsigned long getreg(struct task_struct *child, int regno) case EDI: case EBP: case EFL: + case ORIG_EAX: break; default: panic("Bad register in getreg() : %d\n", regno); -- cgit v0.10.2 From 4a0b88070406323487bad730d8945f482151a145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 29 Dec 2015 21:35:45 +0100 Subject: selftests/seccomp: Remove the need for HAVE_ARCH_TRACEHOOK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some architectures do not implement PTRACE_GETREGSET nor PTRACE_SETREGSET (required by HAVE_ARCH_TRACEHOOK) but only implement PTRACE_GETREGS and PTRACE_SETREGS (e.g. User-mode Linux). This improve seccomp selftest portability for architectures without HAVE_ARCH_TRACEHOOK support by defining a new trigger HAVE_GETREGS. For now, this is only enabled for i386 and x86_64 architectures. This is required to be able to run this tests on User-mode Linux. Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Shuah Khan Cc: Meredydd Luff Cc: David Drysdale Signed-off-by: Richard Weinberger Acked-by: Kees Cook diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 882fe83..b9453b8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1246,11 +1246,24 @@ TEST_F(TRACE_poke, getpid_runs_normally) # error "Do not know how to find your architecture's registers and syscalls" #endif +/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for + * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). + */ +#if defined(__x86_64__) || defined(__i386__) +#define HAVE_GETREGS +#endif + /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { - struct iovec iov; ARCH_REGS regs; +#ifdef HAVE_GETREGS + EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { + TH_LOG("PTRACE_GETREGS failed"); + return -1; + } +#else + struct iovec iov; iov.iov_base = ®s; iov.iov_len = sizeof(regs); @@ -1258,6 +1271,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) TH_LOG("PTRACE_GETREGSET failed"); return -1; } +#endif return regs.SYSCALL_NUM; } @@ -1266,13 +1280,16 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall) { - struct iovec iov; int ret; ARCH_REGS regs; - +#ifdef HAVE_GETREGS + ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); +#else + struct iovec iov; iov.iov_base = ®s; iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); +#endif EXPECT_EQ(0, ret); #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ @@ -1312,9 +1329,13 @@ void change_syscall(struct __test_metadata *_metadata, if (syscall == -1) regs.SYSCALL_RET = 1; +#ifdef HAVE_GETREGS + ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); +#else iov.iov_base = ®s; iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); +#endif EXPECT_EQ(0, ret); } -- cgit v0.10.2 From d8f8b8445648c267a24f30a72533e77cb6543f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 29 Dec 2015 21:35:46 +0100 Subject: um: Add full asm/syscall.h support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add subarchitecture-independent implementation of asm-generic/syscall.h allowing access to user system call parameters and results: * syscall_get_nr() * syscall_rollback() * syscall_get_error() * syscall_get_return_value() * syscall_set_return_value() * syscall_get_arguments() * syscall_set_arguments() * syscall_get_arch() provided by arch/x86/um/asm/syscall.h This provides the necessary syscall helpers needed by HAVE_ARCH_SECCOMP_FILTER plus syscall_get_error(). This is inspired from Meredydd Luff's patch (https://gerrit.chromium.org/gerrit/21425). Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Meredydd Luff Cc: David Drysdale Signed-off-by: Richard Weinberger Acked-by: Kees Cook diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h new file mode 100644 index 0000000..9fb9cf8 --- /dev/null +++ b/arch/um/include/asm/syscall-generic.h @@ -0,0 +1,138 @@ +/* + * Access to user system call parameters and results + * + * See asm-generic/syscall.h for function descriptions. + * + * Copyright (C) 2015 Mickaël Salaün + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __UM_SYSCALL_GENERIC_H +#define __UM_SYSCALL_GENERIC_H + +#include +#include +#include +#include + +static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) +{ + + return PT_REGS_SYSCALL_NR(regs); +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + const long error = regs_return_value(regs); + + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs_return_value(regs); +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + PT_REGS_SET_SYSCALL_RETURN(regs, (long) error ?: val); +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + const struct uml_pt_regs *r = ®s->regs; + + switch (i) { + case 0: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG1(r); + case 1: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG2(r); + case 2: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG3(r); + case 3: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG4(r); + case 4: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG5(r); + case 5: + if (!n--) + break; + *args++ = UPT_SYSCALL_ARG6(r); + case 6: + if (!n--) + break; + default: + BUG(); + break; + } +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + struct uml_pt_regs *r = ®s->regs; + + switch (i) { + case 0: + if (!n--) + break; + UPT_SYSCALL_ARG1(r) = *args++; + case 1: + if (!n--) + break; + UPT_SYSCALL_ARG2(r) = *args++; + case 2: + if (!n--) + break; + UPT_SYSCALL_ARG3(r) = *args++; + case 3: + if (!n--) + break; + UPT_SYSCALL_ARG4(r) = *args++; + case 4: + if (!n--) + break; + UPT_SYSCALL_ARG5(r) = *args++; + case 5: + if (!n--) + break; + UPT_SYSCALL_ARG6(r) = *args++; + case 6: + if (!n--) + break; + default: + BUG(); + break; + } +} + +/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ + +#endif /* __UM_SYSCALL_GENERIC_H */ diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index 81d6562..11ab90d 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -1,6 +1,7 @@ #ifndef __UM_ASM_SYSCALL_H #define __UM_ASM_SYSCALL_H +#include #include typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, -- cgit v0.10.2 From c50b4659e444b020657e01bdf769c965e5597cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 29 Dec 2015 21:35:47 +0100 Subject: um: Add seccomp support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This brings SECCOMP_MODE_STRICT and SECCOMP_MODE_FILTER support through prctl(2) and seccomp(2) to User-mode Linux for i386 and x86_64 subarchitectures. secure_computing() is called first in handle_syscall() so that the syscall emulation will be aborted quickly if matching a seccomp rule. This is inspired from Meredydd Luff's patch (https://gerrit.chromium.org/gerrit/21425). Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Cc: Ingo Molnar Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Chris Metcalf Cc: Michael Ellerman Cc: James Hogan Cc: Meredydd Luff Cc: David Drysdale Signed-off-by: Richard Weinberger Acked-by: Kees Cook diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt index 76d39d6..4f66ec1 100644 --- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt +++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt @@ -33,7 +33,7 @@ | sh: | TODO | | sparc: | TODO | | tile: | ok | - | um: | TODO | + | um: | ok | | unicore32: | TODO | | x86: | ok | | xtensa: | TODO | diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index d195a87..cc00134 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -2,6 +2,7 @@ config UML bool default y select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_UID16 select HAVE_FUTEX_CMPXCHG if FUTEX select GENERIC_IRQ_SHOW diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index 28a9885..4b2ed58 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um @@ -104,3 +104,19 @@ config PGTABLE_LEVELS int default 3 if 3_LEVEL_PGTABLES default 2 + +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 53968aa..053baff 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -62,11 +62,13 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 6 #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 +#define TIF_SECCOMP 9 /* secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #endif diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 6cadce7..48b0dcb 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -19,6 +20,10 @@ void handle_syscall(struct uml_pt_regs *r) UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return; + if (syscall_trace_enter(regs)) goto out; -- cgit v0.10.2 From 42d91f612c879627c925d3779c36877cd440f9f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 24 Dec 2015 13:12:11 +0100 Subject: um: Fix build error and kconfig for i386 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error by generating elfcore.o only when ELF_CORE (depending on COREDUMP) is selected: arch/x86/um/built-in.o: In function `elf_core_write_extra_phdrs': (.text+0x3e62): undefined reference to `dump_emit' arch/x86/um/built-in.o: In function `elf_core_write_extra_data': (.text+0x3eef): undefined reference to `dump_emit' Fixes: 5d2acfc7b974 ("kconfig: make allnoconfig disable options behind EMBEDDED and EXPERT") Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Cc: Josh Triplett Cc: Paul E. McKenney Cc: Michal Marek Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Richard Weinberger Reviewed-by: Josh Triplett diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index a8fecc2..3ee2bb6 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -17,7 +17,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ifeq ($(CONFIG_X86_32),y) obj-y += checksum_32.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o -- cgit v0.10.2 From 571d2f0c341fa15dbbb4fb73c80bd740ef37a9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 22 Dec 2015 22:15:09 +0100 Subject: um: Do not set unsecure permission for temporary file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the insecure 0777 mode for temporary file to prohibit other users to change the executable mapped code. An attacker could gain access to the mapped file descriptor from the temporary file (before it is unlinked) in a read-only mode but it should not be accessible in write mode to avoid arbitrary code execution. To not change the hostfs behavior, the temporary file creation permission now depends on the current umask(2) and the implementation of mkstemp(3). Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Acked-by: Tristan Schmelcher Signed-off-by: Richard Weinberger diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 897e9ad..840d573 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -142,12 +142,6 @@ static int __init create_tmp_file(unsigned long long len) if (fd < 0) exit(1); - err = fchmod(fd, 0777); - if (err < 0) { - perror("fchmod"); - exit(1); - } - /* * Seek to len - 1 because writing a character there will * increase the file size by one byte, to the desired length. -- cgit v0.10.2 From 3e46b25376321db119bc8507ce8c8841c580e736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 22 Dec 2015 22:15:10 +0100 Subject: um: Use race-free temporary file creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Open the memory mapped file with the O_TMPFILE flag when available. Signed-off-by: Mickaël Salaün Cc: Jeff Dike Cc: Richard Weinberger Acked-by: Tristan Schmelcher Signed-off-by: Richard Weinberger diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 840d573..8b17676 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -106,6 +106,17 @@ static int __init make_tempfile(const char *template) } } +#ifdef O_TMPFILE + fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700); + /* + * If the running system does not support O_TMPFILE flag then retry + * without it. + */ + if (fd != -1 || (errno != EINVAL && errno != EISDIR && + errno != EOPNOTSUPP)) + return fd; +#endif + tempname = malloc(strlen(tempdir) + strlen(template) + 1); if (tempname == NULL) return -1; -- cgit v0.10.2