From 0e087a7329f7bbcbe14d936da93d0026d4ea3860 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 29 Aug 2013 18:21:04 +0200 Subject: ptrace: fix ptrace vs tasklist_lock race As explained by Alexander Fyodorov : |read_lock(&tasklist_lock) in ptrace_stop() is converted to mutex on RT kernel, |and it can remove __TASK_TRACED from task->state (by moving it to |task->saved_state). If parent does wait() on child followed by a sys_ptrace |call, the following race can happen: | |- child sets __TASK_TRACED in ptrace_stop() |- parent does wait() which eventually calls wait_task_stopped() and returns | child's pid |- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves | __TASK_TRACED flag to saved_state |- parent calls sys_ptrace, which calls ptrace_check_attach() and wait_task_inactive() The patch is based on his initial patch where an additional check is added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is taken in case the caller is interrupted between looking into ->state and ->saved_state. Signed-off-by: Sebastian Andrzej Siewior diff --git a/include/linux/sched.h b/include/linux/sched.h index dffe621..0ab4eab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -169,11 +169,8 @@ extern char ___assert_task_state[1 - 2*!!( TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ __TASK_TRACED) -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) #define task_is_dead(task) ((task)->exit_state != 0) -#define task_is_stopped_or_traced(task) \ - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0) @@ -2453,6 +2450,51 @@ static inline int need_resched(void) return unlikely(test_thread_flag(TIF_NEED_RESCHED)); } +static inline bool __task_is_stopped_or_traced(struct task_struct *task) +{ + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) + return true; +#ifdef CONFIG_PREEMPT_RT_FULL + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) + return true; +#endif + return false; +} + +static inline bool task_is_stopped_or_traced(struct task_struct *task) +{ + bool traced_stopped; + +#ifdef CONFIG_PREEMPT_RT_FULL + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + traced_stopped = __task_is_stopped_or_traced(task); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); +#else + traced_stopped = __task_is_stopped_or_traced(task); +#endif + return traced_stopped; +} + +static inline bool task_is_traced(struct task_struct *task) +{ + bool traced = false; + + if (task->state & __TASK_TRACED) + return true; +#ifdef CONFIG_PREEMPT_RT_FULL + /* in case the task is sleeping on tasklist_lock */ + raw_spin_lock_irq(&task->pi_lock); + if (task->state & __TASK_TRACED) + traced = true; + else if (task->saved_state & __TASK_TRACED) + traced = true; + raw_spin_unlock_irq(&task->pi_lock); +#endif + return traced; +} + /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f4bcb3..fddaf65 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -135,7 +135,12 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !__fatal_signal_pending(task)) { - task->state = __TASK_TRACED; + raw_spin_lock_irq(&task->pi_lock); + if (task->state & __TASK_TRACED) + task->state = __TASK_TRACED; + else + task->saved_state = __TASK_TRACED; + raw_spin_unlock_irq(&task->pi_lock); ret = true; } spin_unlock_irq(&task->sighand->siglock); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 72801e0..ae6b232 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1024,6 +1024,18 @@ struct migration_arg { static int migration_cpu_stop(void *data); +static bool check_task_state(struct task_struct *p, long match_state) +{ + bool match = false; + + raw_spin_lock_irq(&p->pi_lock); + if (p->state == match_state || p->saved_state == match_state) + match = true; + raw_spin_unlock_irq(&p->pi_lock); + + return match; +} + /* * wait_task_inactive - wait for a thread to unschedule. * @@ -1068,7 +1080,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && unlikely(p->state != match_state)) + if (match_state && !check_task_state(p, match_state)) return 0; cpu_relax(); } @@ -1083,7 +1095,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) running = task_running(rq, p); on_rq = p->on_rq; ncsw = 0; - if (!match_state || p->state == match_state) + if (!match_state || p->state == match_state + || p->saved_state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &flags); @@ -1579,7 +1592,7 @@ out: */ int wake_up_process(struct task_struct *p) { - WARN_ON(task_is_stopped_or_traced(p)); + WARN_ON(__task_is_stopped_or_traced(p)); return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); -- cgit v0.10.2