From 7721d3c2083c27bfb8e4c1335d343e25ae1a663f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Sep 2012 18:08:40 -0400 Subject: alpha: simplify TIF_NEED_RESCHED handling In case we have both NEED_RESCHED and SIGPENDING/NOTIFY_RESUME, handle the latter first. We'll get to original priorities in the next commit, but now that allows to simplify the treatment of NEED_RESCHED-only case nicely. Namely, now there no need to preserve the data for restarts across the call of schedule() in $work_resched; we can get there only if we had either returned from syscall without SIGPENDING (in which case we should've had no restart-worthy return value and want no restarts) or already got through do_notify_resume() call (in which case we want no restarts anymore). So we can just slap 0 into $19 instead of preserving it (and $20). Signed-off-by: Al Viro diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 2a359c9..802108b 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -372,26 +372,18 @@ $ret_success: .align 4 .ent work_pending work_pending: - and $5, _TIF_NEED_RESCHED, $2 - beq $2, $work_notifysig + and $5, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 + bne $2, $work_notifysig $work_resched: - subq $sp, 16, $sp - stq $19, 0($sp) /* save syscall nr */ - stq $20, 8($sp) /* and error indication (a3) */ + /* + * We can get here only if we returned from syscall without SIGPENDING + * or got through work_notifysig already. Either case means no syscall + * restarts for us, so let $19 and $20 burn. + */ jsr $26, schedule - ldq $19, 0($sp) - ldq $20, 8($sp) - addq $sp, 16, $sp - /* Make sure need_resched and sigpending don't change between - sampling and the rti. */ - lda $16, 7 - call_pal PAL_swpipl - ldl $5, TI_FLAGS($8) - and $5, _TIF_WORK_MASK, $2 - beq $2, restore_all - and $5, _TIF_NEED_RESCHED, $2 - bne $2, $work_resched + mov 0, $19 + br ret_to_user $work_notifysig: mov $sp, $16 -- cgit v0.10.2 From 6972d6f25d21e3da58ff1309256c787078405c7f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Sep 2012 18:30:34 -0400 Subject: alpha: take SIGPENDING/NOTIFY_RESUME loop into signal.c Turn the slow side of work_pending into C function, including all the looping. What we get out of that: * we do _not_ call get_signal_to_deliver() with IRQs disabled anymore * no need to save/restore volatiles on each pass if there turns to be more than one (unlikely, but still) * all double-restart prevention is in C now. * glue gets simpler. Signed-off-by: Al Viro diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 802108b..0c30cce 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -390,15 +390,9 @@ $work_notifysig: bsr $1, do_switch_stack mov $sp, $17 mov $5, $18 - mov $19, $9 /* save old syscall number */ - mov $20, $10 /* save old a3 */ - and $5, _TIF_SIGPENDING, $2 - cmovne $2, 0, $9 /* we don't want double syscall restarts */ - jsr $26, do_notify_resume - mov $9, $19 - mov $10, $20 + jsr $26, do_work_pending bsr $1, undo_switch_stack - br ret_to_user + br restore_all .end work_pending /* diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index a8c97d4..235a867 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -568,15 +568,24 @@ do_signal(struct pt_regs * regs, struct switch_stack * sw, } void -do_notify_resume(struct pt_regs *regs, struct switch_stack *sw, - unsigned long thread_info_flags, +do_work_pending(struct pt_regs *regs, struct switch_stack *sw, + unsigned long thread_flags, unsigned long r0, unsigned long r19) { - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs, sw, r0, r19); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } + do { + if (thread_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + do_signal(regs, sw, r0, r19); + r0 = 0; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); } -- cgit v0.10.2 From d9d0738a898dd26a417d00a6923eef1015d33735 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Sep 2012 18:53:18 -0400 Subject: alpha: don't bother passing switch_stack separately from regs It's needed only in setup_sigcontext() and it's always reg - ; no point passing it all way down through the call chain. This is just the signal.c side of that stuff; next will come the asm glue one... Signed-off-by: Al Viro diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 235a867..d0b3507 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -298,8 +298,9 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, - struct switch_stack *sw, unsigned long mask, unsigned long sp) + unsigned long mask, unsigned long sp) { + struct switch_stack *sw = (struct switch_stack *)regs - 1; long i, err = 0; err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack); @@ -354,7 +355,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, - struct pt_regs *regs, struct switch_stack * sw) + struct pt_regs *regs) { unsigned long oldsp, r26, err = 0; struct sigframe __user *frame; @@ -364,7 +365,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= setup_sigcontext(&frame->sc, regs, sw, set->sig[0], oldsp); + err |= setup_sigcontext(&frame->sc, regs, set->sig[0], oldsp); if (err) return -EFAULT; @@ -401,7 +402,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs, struct switch_stack * sw) + sigset_t *set, struct pt_regs *regs) { unsigned long oldsp, r26, err = 0; struct rt_sigframe __user *frame; @@ -420,7 +421,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(oldsp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], oldsp); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) @@ -464,15 +465,15 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, */ static inline void handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, - struct pt_regs * regs, struct switch_stack *sw) + struct pt_regs * regs) { sigset_t *oldset = sigmask_to_save(); int ret; if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame(sig, ka, info, oldset, regs, sw); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - ret = setup_frame(sig, ka, oldset, regs, sw); + ret = setup_frame(sig, ka, oldset, regs); if (ret) { force_sigsegv(sig, current); @@ -519,8 +520,7 @@ syscall_restart(unsigned long r0, unsigned long r19, * all (if we get here from anything but a syscall return, it will be 0) */ static void -do_signal(struct pt_regs * regs, struct switch_stack * sw, - unsigned long r0, unsigned long r19) +do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) { siginfo_t info; int signr; @@ -537,7 +537,7 @@ do_signal(struct pt_regs * regs, struct switch_stack * sw, /* Whee! Actually deliver the signal. */ if (r0) syscall_restart(r0, r19, regs, &ka); - handle_signal(signr, &ka, &info, regs, sw); + handle_signal(signr, &ka, &info, regs); if (single_stepping) ptrace_set_bpt(current); /* re-set bpt */ return; @@ -568,7 +568,7 @@ do_signal(struct pt_regs * regs, struct switch_stack * sw, } void -do_work_pending(struct pt_regs *regs, struct switch_stack *sw, +do_work_pending(struct pt_regs *regs, struct switch_stack *unused, unsigned long thread_flags, unsigned long r0, unsigned long r19) { @@ -578,7 +578,7 @@ do_work_pending(struct pt_regs *regs, struct switch_stack *sw, } else { local_irq_enable(); if (thread_flags & _TIF_SIGPENDING) { - do_signal(regs, sw, r0, r19); + do_signal(regs, r0, r19); r0 = 0; } else { clear_thread_flag(TIF_NOTIFY_RESUME); -- cgit v0.10.2 From cb450766bcafc7bd7d40e9a5a0050745e8c68b3e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 23:50:59 -0400 Subject: alpha: get rid of switch_stack argument of do_work_pending() ... and now the asm glue side of that. Signed-off-by: Al Viro diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 0c30cce..16e074f 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -311,7 +311,7 @@ entSys: .align 4 ret_from_sys_call: - cmovne $26, 0, $19 /* $19 = 0 => non-restartable */ + cmovne $26, 0, $18 /* $18 = 0 => non-restartable */ ldq $0, SP_OFF($sp) and $0, 8, $0 beq $0, ret_to_kernel @@ -320,8 +320,8 @@ ret_to_user: sampling and the rti. */ lda $16, 7 call_pal PAL_swpipl - ldl $5, TI_FLAGS($8) - and $5, _TIF_WORK_MASK, $2 + ldl $17, TI_FLAGS($8) + and $17, _TIF_WORK_MASK, $2 bne $2, work_pending restore_all: RESTORE_ALL @@ -341,10 +341,10 @@ $syscall_error: * frame to indicate that a negative return value wasn't an * error number.. */ - ldq $19, 0($sp) /* old syscall nr (zero if success) */ - beq $19, $ret_success + ldq $18, 0($sp) /* old syscall nr (zero if success) */ + beq $18, $ret_success - ldq $20, 72($sp) /* .. and this a3 */ + ldq $19, 72($sp) /* .. and this a3 */ subq $31, $0, $0 /* with error in v0 */ addq $31, 1, $1 /* set a3 for errno return */ stq $0, 0($sp) @@ -362,34 +362,32 @@ $ret_success: * Do all cleanup when returning from all interrupts and system calls. * * Arguments: - * $5: TI_FLAGS. * $8: current. - * $19: The old syscall number, or zero if this is not a return + * $17: TI_FLAGS. + * $18: The old syscall number, or zero if this is not a return * from a syscall that errored and is possibly restartable. - * $20: The old a3 value + * $19: The old a3 value */ .align 4 .ent work_pending work_pending: - and $5, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 + and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 bne $2, $work_notifysig $work_resched: /* * We can get here only if we returned from syscall without SIGPENDING * or got through work_notifysig already. Either case means no syscall - * restarts for us, so let $19 and $20 burn. + * restarts for us, so let $18 and $19 burn. */ jsr $26, schedule - mov 0, $19 + mov 0, $18 br ret_to_user $work_notifysig: mov $sp, $16 bsr $1, do_switch_stack - mov $sp, $17 - mov $5, $18 jsr $26, do_work_pending bsr $1, undo_switch_stack br restore_all @@ -440,9 +438,9 @@ $strace_success: .align 3 $strace_error: - ldq $19, 0($sp) /* old syscall nr (zero if success) */ - beq $19, $strace_success - ldq $20, 72($sp) /* .. and this a3 */ + ldq $18, 0($sp) /* old syscall nr (zero if success) */ + beq $18, $strace_success + ldq $19, 72($sp) /* .. and this a3 */ subq $31, $0, $0 /* with error in v0 */ addq $31, 1, $1 /* set a3 for errno return */ @@ -450,11 +448,11 @@ $strace_error: stq $1, 72($sp) /* a3 for return */ bsr $1, do_switch_stack - mov $19, $9 /* save old syscall number */ - mov $20, $10 /* save old a3 */ + mov $18, $9 /* save old syscall number */ + mov $19, $10 /* save old a3 */ jsr $26, syscall_trace_leave - mov $9, $19 - mov $10, $20 + mov $9, $18 + mov $10, $19 bsr $1, undo_switch_stack mov $31, $26 /* tell "ret_from_sys_call" we can restart */ diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index d0b3507..32575f8 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -568,8 +568,7 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) } void -do_work_pending(struct pt_regs *regs, struct switch_stack *unused, - unsigned long thread_flags, +do_work_pending(struct pt_regs *regs, unsigned long thread_flags, unsigned long r0, unsigned long r19) { do { -- cgit v0.10.2 From ba4df2808a86f8b103c4db0b8807649383e9bd13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Oct 2012 15:29:10 -0400 Subject: don't bother with kernel_thread/kernel_execve for launching linuxrc exec_usermodehelper_fns() will do just fine... Signed-off-by: Al Viro diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 135959a2..5e4ded5 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -16,13 +16,13 @@ #include #include #include +#include #include "do_mounts.h" unsigned long initrd_start, initrd_end; int initrd_below_start_ok; unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ -static int __initdata old_fd, root_fd; static int __initdata mount_initrd = 1; static int __init no_initrd(char *str) @@ -33,33 +33,29 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void *_shell) +static int init_linuxrc(struct subprocess_info *info, struct cred *new) { - static const char *argv[] = { "linuxrc", NULL, }; - extern const char *envp_init[]; - const char *shell = _shell; - - sys_close(old_fd);sys_close(root_fd); + sys_unshare(CLONE_FS | CLONE_FILES); + /* move initrd over / and chdir/chroot in initrd root */ + sys_chdir("/root"); + sys_mount(".", "/", NULL, MS_MOVE, NULL); + sys_chroot("."); sys_setsid(); - return kernel_execve(shell, argv, envp_init); + return 0; } static void __init handle_initrd(void) { + static char *argv[] = { "linuxrc", NULL, }; + extern char *envp_init[]; int error; - int pid; real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); sys_mkdir("/old", 0700); - root_fd = sys_open("/", 0, 0); - old_fd = sys_open("/old", 0, 0); - /* move initrd over / and chdir/chroot in initrd root */ - sys_chdir("/root"); - sys_mount(".", "/", NULL, MS_MOVE, NULL); - sys_chroot("."); + sys_chdir("/old"); /* * In case that a resume from disk is carried out by linuxrc or one of @@ -67,27 +63,22 @@ static void __init handle_initrd(void) */ current->flags |= PF_FREEZER_SKIP; - pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); - if (pid > 0) - while (pid != sys_wait4(-1, NULL, 0, NULL)) - yield(); + call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC, + init_linuxrc, NULL, NULL); current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - sys_fchdir(old_fd); - sys_mount("/", ".", NULL, MS_MOVE, NULL); + sys_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ - sys_fchdir(root_fd); - sys_chroot("."); - sys_close(old_fd); - sys_close(root_fd); + sys_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { sys_chdir("/old"); return; } + sys_chdir("/"); ROOT_DEV = new_decode_dev(real_root_dev); mount_root(); -- cgit v0.10.2 From ecf89e581acce83e8cd2a5530858be22c64441f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Oct 2012 15:32:10 -0400 Subject: ppc: eeh_event should just use kthread_run() Signed-off-by: Al Viro diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index fb50631..75e2ecc 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -59,8 +60,6 @@ static int eeh_event_handler(void * dummy) struct eeh_event *event; struct eeh_dev *edev; - set_task_comm(current, "eehd"); - spin_lock_irqsave(&eeh_eventlist_lock, flags); event = NULL; @@ -113,7 +112,7 @@ static int eeh_event_handler(void * dummy) */ static void eeh_thread_launcher(struct work_struct *dummy) { - if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) + if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) printk(KERN_ERR "Failed to start EEH daemon\n"); } -- cgit v0.10.2 From d6b2123802d2b7eee8c62cd0ebd73e8636cbb068 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 19:57:26 -0400 Subject: make sure that we always have a return path from kernel_execve() The only place where kernel_execve() is called without a way to return to the caller of kernel_thread() callback is kernel_post(). Reorganize kernel_init()/kernel_post() - instead of the former calling the latter in the end (and getting freed by it), have the latter *begin* with calling the former (and turn the latter into kernel_thread() callback, of course). Signed-off-by: Al Viro diff --git a/init/main.c b/init/main.c index b286730..a490ffe 100644 --- a/init/main.c +++ b/init/main.c @@ -794,11 +794,11 @@ static void run_init_process(const char *init_filename) kernel_execve(init_filename, argv_init, envp_init); } -/* This is a non __init function. Force it to be noinline otherwise gcc - * makes it inline to init() and it becomes part of init.text section - */ -static noinline int init_post(void) +static void __init kernel_init_freeable(void); + +static int __ref kernel_init(void *unused) { + kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); @@ -835,7 +835,7 @@ static noinline int init_post(void) "See Linux Documentation/init.txt for guidance."); } -static int __init kernel_init(void * unused) +static void __init kernel_init_freeable(void) { /* * Wait until kthreadd is all set-up. @@ -890,7 +890,4 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ - - init_post(); - return 0; } -- cgit v0.10.2 From fb45550d76bb584857cf0ea3be79fa78207a3cff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 20:09:44 -0400 Subject: make sure that kernel_thread() callbacks call do_exit() themselves Most of them never returned anyway - only two functions had to be changed. That allows to simplify their callers a whole lot. Note that this does *not* apply to kthread_run() callbacks - all of those had been called from the same kernel_thread() callback, which did do_exit() already. This is strictly about very few low-level kernel_thread() callbacks (there are only 6 of those, mostly as part of kthread.h and kmod.h exported mechanisms, plus kernel_init() itself). Signed-off-by: Al Viro diff --git a/kernel/kmod.c b/kernel/kmod.c index 6f99aea..b6e5ca9 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -225,7 +225,7 @@ static int ____call_usermodehelper(void *data) /* Exec failed? */ fail: sub_info->retval = retval; - return 0; + do_exit(0); } static int call_helper(void *data) @@ -292,7 +292,7 @@ static int wait_for_helper(void *data) } umh_complete(sub_info); - return 0; + do_exit(0); } /* This is run by khelper thread */ -- cgit v0.10.2 From a74fb73c12398b250fdc5e333a11e15a9e3a84fc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 21:28:25 -0400 Subject: infrastructure for saner ret_from_kernel_thread semantics * allow kernel_execve() leave the actual return to userland to caller (selected by CONFIG_GENERIC_KERNEL_EXECVE). Callers updated accordingly. * architecture that does select GENERIC_KERNEL_EXECVE in its Kconfig should have its ret_from_kernel_thread() do this: call schedule_tail call the callback left for it by copy_thread(); if it ever returns, that's because it has just done successful kernel_execve() jump to return from syscall IOW, its only difference from ret_from_fork() is that it does call the callback. * such an architecture should also get rid of ret_from_kernel_execve() and __ARCH_WANT_KERNEL_EXECVE This is the last part of infrastructure patches in that area - from that point on work on different architectures can live independently. Signed-off-by: Al Viro diff --git a/arch/Kconfig b/arch/Kconfig index d397e11..d27efb9 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -261,6 +261,9 @@ config ARCH_WANT_OLD_COMPAT_IPC config GENERIC_KERNEL_THREAD bool +config GENERIC_KERNEL_EXECVE + bool + config HAVE_ARCH_SECCOMP_FILTER bool help diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 19439c7..727f0cd7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -827,7 +827,15 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, const char __user *pathname); asmlinkage long sys_syncfs(int fd); +#ifndef CONFIG_GENERIC_KERNEL_EXECVE int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); +#else +#define kernel_execve(filename, argv, envp) \ + do_execve(filename, \ + (const char __user *const __user *)argv, \ + (const char __user *const __user *)envp, \ + current_pt_regs()) +#endif asmlinkage long sys_perf_event_open( diff --git a/init/main.c b/init/main.c index a490ffe..02df2dd 100644 --- a/init/main.c +++ b/init/main.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include @@ -788,10 +789,10 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(const char *init_filename) +static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; - kernel_execve(init_filename, argv_init, envp_init); + return kernel_execve(init_filename, argv_init, envp_init); } static void __init kernel_init_freeable(void); @@ -810,7 +811,8 @@ static int __ref kernel_init(void *unused) flush_delayed_fput(); if (ramdisk_execute_command) { - run_init_process(ramdisk_execute_command); + if (!run_init_process(ramdisk_execute_command)) + return 0; printk(KERN_WARNING "Failed to execute %s\n", ramdisk_execute_command); } @@ -822,14 +824,16 @@ static int __ref kernel_init(void *unused) * trying to recover a really broken machine. */ if (execute_command) { - run_init_process(execute_command); + if (!run_init_process(execute_command)) + return 0; printk(KERN_WARNING "Failed to execute %s. Attempting " "defaults...\n", execute_command); } - run_init_process("/sbin/init"); - run_init_process("/etc/init"); - run_init_process("/bin/init"); - run_init_process("/bin/sh"); + if (!run_init_process("/sbin/init") || + !run_init_process("/etc/init") || + !run_init_process("/bin/init") || + !run_init_process("/bin/sh")) + return 0; panic("No init found. Try passing init= option to kernel. " "See Linux Documentation/init.txt for guidance."); diff --git a/kernel/kmod.c b/kernel/kmod.c index b6e5ca9..1c317e3 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -221,6 +222,8 @@ static int ____call_usermodehelper(void *data) retval = kernel_execve(sub_info->path, (const char *const *)sub_info->argv, (const char *const *)sub_info->envp); + if (!retval) + return 0; /* Exec failed? */ fail: diff --git a/kernel/kthread.c b/kernel/kthread.c index b579af5..7ba65c1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -16,6 +16,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); -- cgit v0.10.2 From 22e2430d60dbdfcdd732a086e9ef2dbd74c266d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 21:35:42 -0400 Subject: x86, um: convert to saner kernel_execve() semantics Signed-off-by: Al Viro diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 5d9ab0c..62435a0 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -26,7 +26,6 @@ struct thread_struct { jmp_buf *fault_catcher; struct task_struct *prev_sched; unsigned long temp_stack; - jmp_buf *exec_buf; struct arch_thread arch; jmp_buf switch_buf; int mm_count; @@ -54,7 +53,6 @@ struct thread_struct { .fault_addr = NULL, \ .prev_sched = NULL, \ .temp_stack = 0, \ - .exec_buf = NULL, \ .arch = INIT_ARCH_THREAD, \ .request = { 0 } \ } diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 89b686c1..25dbd37 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -191,7 +191,6 @@ extern int os_getpid(void); extern int os_getpgrp(void); extern void init_new_thread_signals(void); -extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr); extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index e427301..565ca39 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -47,8 +47,3 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) #endif } EXPORT_SYMBOL(start_thread); - -void __noreturn ret_from_kernel_execve(struct pt_regs *unused) -{ - UML_LONGJMP(current->thread.exec_buf, 1); -} diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index a1b50ad..94b0d8b 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -135,14 +135,10 @@ void new_thread_handler(void) arg = current->thread.request.u.thread.arg; /* - * The return value is 1 if the kernel thread execs a process, - * 0 if it just exits + * callback returns only if the kernel thread execs a process */ - n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); - if (n == 1) - userspace(¤t->thread.regs.regs); - else - do_exit(0); + n = fn(arg); + userspace(¤t->thread.regs.regs); } /* Called magically, see new_thread_handler above */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 307f173..a04ec16 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -244,16 +244,3 @@ void init_new_thread_signals(void) signal(SIGWINCH, SIG_IGN); signal(SIGTERM, SIG_DFL); } - -int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr) -{ - jmp_buf buf; - int n; - - *jmp_ptr = &buf; - n = UML_SETJMP(&buf); - if (n != 0) - return n; - (*fn)(arg); - return 0; -} diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d93eb9d..45edcba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -98,6 +98,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS || UPROBES) diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 55d1555..16f3fc6 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -51,7 +51,6 @@ # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_SYS_WAITPID # define __ARCH_WANT_SYS_EXECVE -# define __ARCH_WANT_KERNEL_EXECVE /* * "Conditional" syscalls diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index fe4cc30..91d2959 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -298,12 +298,20 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) -ENTRY(ret_from_kernel_execve) - movl %eax, %esp - movl $0,PT_EAX(%esp) +ENTRY(ret_from_kernel_thread) + CFI_STARTPROC + pushl_cfi %eax + call schedule_tail GET_THREAD_INFO(%ebp) + popl_cfi %eax + pushl_cfi $0x0202 # Reset kernel eflags + popfl_cfi + movl PT_EBP(%esp),%eax + call *PT_EBX(%esp) + movl $0,PT_EAX(%esp) jmp syscall_exit -END(ret_from_kernel_execve) + CFI_ENDPROC +ENDPROC(ret_from_kernel_thread) /* * Interrupt exit functions should be protected against kprobes @@ -994,21 +1002,6 @@ END(spurious_interrupt_bug) */ .popsection -ENTRY(ret_from_kernel_thread) - CFI_STARTPROC - pushl_cfi %eax - call schedule_tail - GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi - movl PT_EBP(%esp),%eax - call *PT_EBX(%esp) - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -ENDPROC(ret_from_kernel_thread) - #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 053c955..e1f98c2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -459,15 +459,13 @@ ENTRY(ret_from_fork) jmp ret_from_sys_call # go to the SYSRET fastpath 1: - subq $REST_SKIP, %rsp # move the stack pointer back + subq $REST_SKIP, %rsp # leave space for volatiles CFI_ADJUST_CFA_OFFSET REST_SKIP movq %rbp, %rdi call *%rbx - # exit - mov %eax, %edi - call do_exit - ud2 # padding for call trace - + movl $0, RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call CFI_ENDPROC END(ret_from_fork) @@ -1214,20 +1212,6 @@ bad_gs: jmp 2b .previous -ENTRY(ret_from_kernel_execve) - movq %rdi, %rsp - movl $0, RAX(%rsp) - // RESTORE_REST - movq 0*8(%rsp), %r15 - movq 1*8(%rsp), %r14 - movq 2*8(%rsp), %r13 - movq 3*8(%rsp), %r12 - movq 4*8(%rsp), %rbp - movq 5*8(%rsp), %rbx - addq $(6*8), %rsp - jmp int_ret_from_sys_call -END(ret_from_kernel_execve) - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index da85b6f..cab8eb8 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -14,6 +14,7 @@ config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config 64BIT bool "64-bit kernel" if SUBARCH = "x86" -- cgit v0.10.2 From 9fff2fa0db911b0b75ec1f9bec72460c0a676ef5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 22:23:29 -0400 Subject: arm: switch to saner kernel_execve() semantics Signed-off-by: Al Viro diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a949eec2..ea3ad06 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -50,6 +50,7 @@ config ARM select GENERIC_STRNLEN_USER select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 6a70aa4..984ad42 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -470,7 +470,6 @@ #define __ARCH_WANT_SYS_SOCKETCALL #endif #define __ARCH_WANT_SYS_EXECVE -#define __ARCH_WANT_KERNEL_EXECVE /* * "Conditional" syscalls diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ed79412..9106966 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -86,35 +86,14 @@ ENDPROC(ret_to_user) */ ENTRY(ret_from_fork) bl schedule_tail + cmp r5, #0 + movne r0, r4 + movne lr, pc + movne pc, r5 get_thread_info tsk - mov why, #1 b ret_slow_syscall ENDPROC(ret_from_fork) -ENTRY(ret_from_kernel_thread) - UNWIND(.fnstart) - UNWIND(.cantunwind) - bl schedule_tail - mov r0, r4 - adr lr, BSYM(1f) @ kernel threads should not exit - mov pc, r5 -1: bl do_exit - nop - UNWIND(.fnend) -ENDPROC(ret_from_kernel_thread) - -/* - * turn a kernel thread into userland process - * use: ret_from_kernel_execve(struct pt_regs *normal) - */ -ENTRY(ret_from_kernel_execve) - mov why, #0 @ not a syscall - str why, [r0, #S_R0] @ ... and we want 0 in ->ARM_r0 as well - get_thread_info tsk @ thread structure - mov sp, r0 @ stack pointer just under pt_regs - b ret_slow_syscall -ENDPROC(ret_from_kernel_execve) - .equ NR_syscalls,0 #define CALL(x) .equ NR_syscalls,NR_syscalls+1 #include "calls.S" diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index c10e439..0f83fa2 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -373,7 +373,6 @@ void release_thread(struct task_struct *dead_task) } asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); int copy_thread(unsigned long clone_flags, unsigned long stack_start, @@ -388,13 +387,13 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, *childregs = *regs; childregs->ARM_r0 = 0; childregs->ARM_sp = stack_start; - thread->cpu_context.pc = (unsigned long)ret_from_fork; } else { + memset(childregs, 0, sizeof(struct pt_regs)); thread->cpu_context.r4 = stk_sz; thread->cpu_context.r5 = stack_start; - thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread; childregs->ARM_cpsr = SVC_MODE; } + thread->cpu_context.pc = (unsigned long)ret_from_fork; thread->cpu_context.sp = (unsigned long)childregs; clear_ptrace_hw_breakpoint(p); -- cgit v0.10.2 From 5522be6a4624a5f505555569e4d9cee946630686 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Oct 2012 23:12:01 -0400 Subject: alpha: switch to saner kernel_execve() semantics Signed-off-by: Al Viro diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 7da9124..7a08cfb 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -21,6 +21,7 @@ config ALPHA select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 3cb6c11..7826e22 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -482,7 +482,6 @@ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_EXECVE -#define __ARCH_WANT_KERNEL_EXECVE /* "Conditional" syscalls. What we want is diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 16e074f..a760783 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -603,24 +603,9 @@ ret_from_kernel_thread: mov $9, $27 mov $10, $16 jsr $26, ($9) - ldgp $gp, 0($26) - mov $0, $16 - mov $31, $26 - jmp $31, sys_exit -.end ret_from_kernel_thread - - .globl ret_from_kernel_execve - .align 4 - .ent ret_from_kernel_execve -ret_from_kernel_execve: - mov $16, $sp - /* Avoid the HAE being gratuitously wrong, to avoid restoring it. */ - ldq $2, alpha_mv+HAE_CACHE - stq $2, 152($sp) /* HAE */ mov $31, $19 /* to disable syscall restarts */ br $31, ret_to_user - -.end ret_from_kernel_execve +.end ret_from_kernel_thread /* -- cgit v0.10.2