From 616d24835eeafa8ef3466479db028abfdfc77531 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 13 Aug 2016 12:38:20 -0400 Subject: sched/x86: Pass kernel thread parameters in 'struct fork_frame' Instead of setting up a fake pt_regs context, put the kernel thread function pointer and arg into the unused callee-restored registers of 'struct fork_frame'. Signed-off-by: Brian Gerst Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bf8f221..b75a8bc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -240,35 +240,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * eax: prev task we switched from + * ebx: kernel thread func (NULL for user thread) + * edi: kernel thread arg */ ENTRY(ret_from_fork) pushl %eax call schedule_tail popl %eax + testl %ebx, %ebx + jnz 1f /* kernel threads are uncommon */ + +2: /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax call syscall_return_slowpath jmp restore_all -END(ret_from_fork) - -ENTRY(ret_from_kernel_thread) - pushl %eax - call schedule_tail - popl %eax - movl PT_EBP(%esp), %eax - call *PT_EBX(%esp) - movl $0, PT_EAX(%esp) + /* kernel thread */ +1: movl %edi, %eax + call *%ebx /* - * Kernel threads return to userspace as if returning from a syscall. - * We should check whether anything actually uses this path and, if so, - * consider switching it over to ret_from_fork. + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() + * syscall. */ - movl %esp, %eax - call syscall_return_slowpath - jmp restore_all -ENDPROC(ret_from_kernel_thread) + movl $0, PT_EAX(%esp) + jmp 2b +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c1af8ac..c0373d6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -407,37 +407,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * rax: prev task we switched from + * rbx: kernel thread func (NULL for user thread) + * r12: kernel thread arg */ ENTRY(ret_from_fork) movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ - testb $3, CS(%rsp) /* from kernel_thread? */ - jnz 1f - - /* - * We came from kernel_thread. This code path is quite twisted, and - * someone should clean it up. - * - * copy_thread_tls stashes the function pointer in RBX and the - * parameter to be passed in RBP. The called function is permitted - * to call do_execve and thereby jump to user mode. - */ - movq RBP(%rsp), %rdi - call *RBX(%rsp) - movl $0, RAX(%rsp) - - /* - * Fall through as though we're exiting a syscall. This makes a - * twisted sort of sense if we just called do_execve. - */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ -1: +2: movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS jmp restore_regs_and_iret + +1: + /* kernel thread */ + movq %r12, %rdi + call *%rbx + /* + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() + * syscall. + */ + movq $0, RAX(%rsp) + jmp 2b END(ret_from_fork) /* diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 886d5ea..5cb436a 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev, #endif } +asmlinkage void ret_from_fork(void); + /* data that is pointed to by thread.sp */ struct inactive_task_frame { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4bedbc0..18714a1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,9 +55,6 @@ #include #include -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); - /* * Return saved PC of a blocked thread. */ @@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, int err; frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; p->thread.sp0 = (unsigned long) (childregs+1); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - frame->ret_addr = (unsigned long) ret_from_kernel_thread; - task_user_gs(p) = __KERNEL_STACK_CANARY; - childregs->ds = __USER_DS; - childregs->es = __USER_DS; - childregs->fs = __KERNEL_PERCPU; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->di = arg; p->thread.io_bitmap_ptr = NULL; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; if (sp) childregs->sp = sp; - frame->ret_addr = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); p->thread.io_bitmap_ptr = NULL; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 827eeed..b812cd0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -50,8 +50,6 @@ #include #include -asmlinkage extern void ret_from_fork(void); - __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ @@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->sp = (unsigned long)childregs; - childregs->ss = __KERNEL_DS; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->r12 = arg; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; -- cgit v0.10.2