diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 569 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 62 | ||||
-rw-r--r-- | kernel/sched/debug.c | 7 | ||||
-rw-r--r-- | kernel/sched/fair.c | 16 | ||||
-rw-r--r-- | kernel/sched/features.h | 7 | ||||
-rw-r--r-- | kernel/sched/rt.c | 1 | ||||
-rw-r--r-- | kernel/sched/sched.h | 10 |
7 files changed, 131 insertions, 541 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8749d20..a494ace 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -272,11 +272,7 @@ late_initcall(sched_init_debug); * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ -#ifndef CONFIG_PREEMPT_RT_FULL const_debug unsigned int sysctl_sched_nr_migrate = 32; -#else -const_debug unsigned int sysctl_sched_nr_migrate = 8; -#endif /* * period over which we average the RT time consumption, measured @@ -495,7 +491,6 @@ static void init_rq_hrtick(struct rq *rq) hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; - rq->hrtick_timer.irqsafe = 1; } #else /* CONFIG_SCHED_HRTICK */ static inline void hrtick_clear(struct rq *rq) @@ -540,37 +535,6 @@ void resched_task(struct task_struct *p) smp_send_reschedule(cpu); } -#ifdef CONFIG_PREEMPT_LAZY -void resched_task_lazy(struct task_struct *p) -{ - int cpu; - - if (!sched_feat(PREEMPT_LAZY)) { - resched_task(p); - return; - } - - assert_raw_spin_locked(&task_rq(p)->lock); - - if (test_tsk_need_resched(p)) - return; - - if (test_tsk_need_resched_lazy(p)) - return; - - set_tsk_need_resched_lazy(p); - - cpu = task_cpu(p); - if (cpu == smp_processor_id()) - return; - - /* NEED_RESCHED_LAZY must be visible before we test polling */ - smp_mb(); - if (!tsk_is_polling(p)) - smp_send_reschedule(cpu); -} -#endif - void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -735,17 +699,6 @@ void resched_task(struct task_struct *p) assert_raw_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); } -#ifdef CONFIG_PREEMPT_LAZY -void resched_task_lazy(struct task_struct *p) -{ - if (!sched_feat(PREEMPT_LAZY)) { - resched_task(p); - return; - } - assert_raw_spin_locked(&task_rq(p)->lock); - set_tsk_need_resched_lazy(p); -} -#endif #endif /* CONFIG_SMP */ #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ @@ -1071,18 +1024,6 @@ struct migration_arg { static int migration_cpu_stop(void *data); -static bool check_task_state(struct task_struct *p, long match_state) -{ - bool match = false; - - raw_spin_lock_irq(&p->pi_lock); - if (p->state == match_state || p->saved_state == match_state) - match = true; - raw_spin_unlock_irq(&p->pi_lock); - - return match; -} - /* * wait_task_inactive - wait for a thread to unschedule. * @@ -1127,7 +1068,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && !check_task_state(p, match_state)) + if (match_state && unlikely(p->state != match_state)) return 0; cpu_relax(); } @@ -1142,8 +1083,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) running = task_running(rq, p); on_rq = p->on_rq; ncsw = 0; - if (!match_state || p->state == match_state - || p->saved_state == match_state) + if (!match_state || p->state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &flags); @@ -1289,12 +1229,6 @@ out: } } - /* - * Clear PF_NO_SETAFFINITY, otherwise we wreckage - * migrate_disable/enable. See optimization for - * PF_NO_SETAFFINITY tasks there. - */ - p->flags &= ~PF_NO_SETAFFINITY; return dest_cpu; } @@ -1374,6 +1308,10 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) { activate_task(rq, p, en_flags); p->on_rq = 1; + + /* if a worker is waking up, notify workqueue */ + if (p->flags & PF_WQ_WORKER) + wq_worker_waking_up(p, cpu_of(rq)); } /* @@ -1548,27 +1486,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_mb__before_spinlock(); raw_spin_lock_irqsave(&p->pi_lock, flags); - if (!(p->state & state)) { - /* - * The task might be running due to a spinlock sleeper - * wakeup. Check the saved state and set it to running - * if the wakeup condition is true. - */ - if (!(wake_flags & WF_LOCK_SLEEPER)) { - if (p->saved_state & state) { - p->saved_state = TASK_RUNNING; - success = 1; - } - } + if (!(p->state & state)) goto out; - } - - /* - * If this is a regular wakeup, then we can unconditionally - * clear the saved state of a "lock sleeper". - */ - if (!(wake_flags & WF_LOCK_SLEEPER)) - p->saved_state = TASK_RUNNING; success = 1; /* we're going to change ->state */ cpu = task_cpu(p); @@ -1611,6 +1530,42 @@ out: } /** + * try_to_wake_up_local - try to wake up a local task with rq lock held + * @p: the thread to be awakened + * + * Put @p on the run-queue if it's not already there. The caller must + * ensure that this_rq() is locked, @p is bound to this_rq() and not + * the current task. + */ +static void try_to_wake_up_local(struct task_struct *p) +{ + struct rq *rq = task_rq(p); + + if (WARN_ON_ONCE(rq != this_rq()) || + WARN_ON_ONCE(p == current)) + return; + + lockdep_assert_held(&rq->lock); + + if (!raw_spin_trylock(&p->pi_lock)) { + raw_spin_unlock(&rq->lock); + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); + } + + if (!(p->state & TASK_NORMAL)) + goto out; + + if (!p->on_rq) + ttwu_activate(rq, p, ENQUEUE_WAKEUP); + + ttwu_do_wakeup(rq, p, 0); + ttwu_stat(p, smp_processor_id(), 0); +out: + raw_spin_unlock(&p->pi_lock); +} + +/** * wake_up_process - Wake up a specific process * @p: The process to be woken up. * @@ -1624,23 +1579,11 @@ out: */ int wake_up_process(struct task_struct *p) { - WARN_ON(__task_is_stopped_or_traced(p)); + WARN_ON(task_is_stopped_or_traced(p)); return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); -/** - * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" - * @p: The process to be woken up. - * - * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate - * the nature of the wakeup. - */ -int wake_up_lock_sleeper(struct task_struct *p) -{ - return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); -} - int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); @@ -1778,9 +1721,6 @@ void sched_fork(struct task_struct *p) /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif -#ifdef CONFIG_HAVE_PREEMPT_LAZY - task_thread_info(p)->preempt_lazy_count = 0; -#endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); #endif @@ -1947,12 +1887,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) finish_arch_post_lock_switch(); fire_sched_in_preempt_notifiers(current); - /* - * We use mmdrop_delayed() here so we don't have to do the - * full __mmdrop() when we are the last user. - */ if (mm) - mmdrop_delayed(mm); + mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { /* * Remove function-return probe instances associated with this @@ -2296,13 +2232,8 @@ void __kprobes add_preempt_count(int val) DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK - 10); #endif - if (preempt_count() == val) { - unsigned long ip = get_parent_ip(CALLER_ADDR1); -#ifdef CONFIG_DEBUG_PREEMPT - current->preempt_disable_ip = ip; -#endif - trace_preempt_off(CALLER_ADDR0, ip); - } + if (preempt_count() == val) + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } EXPORT_SYMBOL(add_preempt_count); @@ -2345,13 +2276,6 @@ static noinline void __schedule_bug(struct task_struct *prev) print_modules(); if (irqs_disabled()) print_irqtrace_events(prev); -#ifdef CONFIG_DEBUG_PREEMPT - if (in_atomic_preempt_off()) { - pr_err("Preemption disabled at:"); - print_ip_sym(current->preempt_disable_ip); - pr_cont("\n"); - } -#endif dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } @@ -2375,133 +2299,6 @@ static inline void schedule_debug(struct task_struct *prev) schedstat_inc(this_rq(), sched_count); } -#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) -#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */ -#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN) -#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN) - -static inline void update_migrate_disable(struct task_struct *p) -{ - const struct cpumask *mask; - - if (likely(!p->migrate_disable)) - return; - - /* Did we already update affinity? */ - if (unlikely(migrate_disabled_updated(p))) - return; - - /* - * Since this is always current we can get away with only locking - * rq->lock, the ->cpus_allowed value can normally only be changed - * while holding both p->pi_lock and rq->lock, but seeing that this - * is current, we cannot actually be waking up, so all code that - * relies on serialization against p->pi_lock is out of scope. - * - * Having rq->lock serializes us against things like - * set_cpus_allowed_ptr() that can still happen concurrently. - */ - mask = tsk_cpus_allowed(p); - - if (p->sched_class->set_cpus_allowed) - p->sched_class->set_cpus_allowed(p, mask); - /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */ - p->nr_cpus_allowed = 1; - - /* Let migrate_enable know to fix things back up */ - p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN; -} - -void migrate_disable(void) -{ - struct task_struct *p = current; - - if (in_atomic()) { -#ifdef CONFIG_SCHED_DEBUG - p->migrate_disable_atomic++; -#endif - return; - } - -#ifdef CONFIG_SCHED_DEBUG - if (unlikely(p->migrate_disable_atomic)) { - tracing_off(); - WARN_ON_ONCE(1); - } -#endif - - if (p->migrate_disable) { - p->migrate_disable++; - return; - } - - preempt_disable(); - preempt_lazy_disable(); - pin_current_cpu(); - p->migrate_disable = 1; - preempt_enable(); -} -EXPORT_SYMBOL(migrate_disable); - -void migrate_enable(void) -{ - struct task_struct *p = current; - const struct cpumask *mask; - unsigned long flags; - struct rq *rq; - - if (in_atomic()) { -#ifdef CONFIG_SCHED_DEBUG - p->migrate_disable_atomic--; -#endif - return; - } - -#ifdef CONFIG_SCHED_DEBUG - if (unlikely(p->migrate_disable_atomic)) { - tracing_off(); - WARN_ON_ONCE(1); - } -#endif - WARN_ON_ONCE(p->migrate_disable <= 0); - - if (migrate_disable_count(p) > 1) { - p->migrate_disable--; - return; - } - - preempt_disable(); - if (unlikely(migrate_disabled_updated(p))) { - /* - * Undo whatever update_migrate_disable() did, also see there - * about locking. - */ - rq = this_rq(); - raw_spin_lock_irqsave(&rq->lock, flags); - - /* - * Clearing migrate_disable causes tsk_cpus_allowed to - * show the tasks original cpu affinity. - */ - p->migrate_disable = 0; - mask = tsk_cpus_allowed(p); - if (p->sched_class->set_cpus_allowed) - p->sched_class->set_cpus_allowed(p, mask); - p->nr_cpus_allowed = cpumask_weight(mask); - raw_spin_unlock_irqrestore(&rq->lock, flags); - } else - p->migrate_disable = 0; - - unpin_current_cpu(); - preempt_enable(); - preempt_lazy_enable(); -} -EXPORT_SYMBOL(migrate_enable); -#else -static inline void update_migrate_disable(struct task_struct *p) { } -#define migrate_disabled_updated(p) 0 -#endif - static void put_prev_task(struct rq *rq, struct task_struct *prev) { if (prev->on_rq || rq->skip_clock_update < 0) @@ -2601,8 +2398,6 @@ need_resched: smp_mb__before_spinlock(); raw_spin_lock_irq(&rq->lock); - update_migrate_disable(prev); - switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) { @@ -2610,6 +2405,19 @@ need_resched: } else { deactivate_task(rq, prev, DEQUEUE_SLEEP); prev->on_rq = 0; + + /* + * If a worker went to sleep, notify and ask workqueue + * whether it wants to wake up a task to maintain + * concurrency. + */ + if (prev->flags & PF_WQ_WORKER) { + struct task_struct *to_wakeup; + + to_wakeup = wq_worker_sleeping(prev, cpu); + if (to_wakeup) + try_to_wake_up_local(to_wakeup); + } } switch_count = &prev->nvcsw; } @@ -2622,7 +2430,6 @@ need_resched: put_prev_task(rq, prev); next = pick_next_task(rq); clear_tsk_need_resched(prev); - clear_tsk_need_resched_lazy(prev); rq->skip_clock_update = 0; if (likely(prev != next)) { @@ -2653,14 +2460,6 @@ static inline void sched_submit_work(struct task_struct *tsk) { if (!tsk->state || tsk_is_pi_blocked(tsk)) return; - - /* - * If a worker went to sleep, notify and ask workqueue whether - * it wants to wake up a task to maintain concurrency. - */ - if (tsk->flags & PF_WQ_WORKER) - wq_worker_sleeping(tsk); - /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. @@ -2669,19 +2468,12 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } -static inline void sched_update_worker(struct task_struct *tsk) -{ - if (tsk->flags & PF_WQ_WORKER) - wq_worker_running(tsk); -} - asmlinkage void __sched schedule(void) { struct task_struct *tsk = current; sched_submit_work(tsk); __schedule(); - sched_update_worker(tsk); } EXPORT_SYMBOL(schedule); @@ -2727,26 +2519,9 @@ asmlinkage void __sched notrace preempt_schedule(void) if (likely(!preemptible())) return; -#ifdef CONFIG_PREEMPT_LAZY - /* - * Check for lazy preemption - */ - if (current_thread_info()->preempt_lazy_count && - !test_thread_flag(TIF_NEED_RESCHED)) - return; -#endif do { add_preempt_count_notrace(PREEMPT_ACTIVE); - /* - * The add/subtract must not be traced by the function - * tracer. But we still want to account for the - * preempt off latency tracer. Since the _notrace versions - * of add/subtract skip the accounting for latency tracer - * we must force it manually. - */ - start_critical_timings(); __schedule(); - stop_critical_timings(); sub_preempt_count_notrace(PREEMPT_ACTIVE); /* @@ -2919,10 +2694,10 @@ void complete(struct completion *x) { unsigned long flags; - raw_spin_lock_irqsave(&x->wait.lock, flags); + spin_lock_irqsave(&x->wait.lock, flags); x->done++; - __swait_wake_locked(&x->wait, TASK_NORMAL, 1); - raw_spin_unlock_irqrestore(&x->wait.lock, flags); + __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL); + spin_unlock_irqrestore(&x->wait.lock, flags); } EXPORT_SYMBOL(complete); @@ -2939,10 +2714,10 @@ void complete_all(struct completion *x) { unsigned long flags; - raw_spin_lock_irqsave(&x->wait.lock, flags); + spin_lock_irqsave(&x->wait.lock, flags); x->done += UINT_MAX/2; - __swait_wake_locked(&x->wait, TASK_NORMAL, 0); - raw_spin_unlock_irqrestore(&x->wait.lock, flags); + __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL); + spin_unlock_irqrestore(&x->wait.lock, flags); } EXPORT_SYMBOL(complete_all); @@ -2951,20 +2726,20 @@ do_wait_for_common(struct completion *x, long (*action)(long), long timeout, int state) { if (!x->done) { - DEFINE_SWAITER(wait); + DECLARE_WAITQUEUE(wait, current); - swait_prepare_locked(&x->wait, &wait); + __add_wait_queue_tail_exclusive(&x->wait, &wait); do { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; } __set_current_state(state); - raw_spin_unlock_irq(&x->wait.lock); + spin_unlock_irq(&x->wait.lock); timeout = action(timeout); - raw_spin_lock_irq(&x->wait.lock); + spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); - swait_finish_locked(&x->wait, &wait); + __remove_wait_queue(&x->wait, &wait); if (!x->done) return timeout; } @@ -2978,9 +2753,9 @@ __wait_for_common(struct completion *x, { might_sleep(); - raw_spin_lock_irq(&x->wait.lock); + spin_lock_irq(&x->wait.lock); timeout = do_wait_for_common(x, action, timeout, state); - raw_spin_unlock_irq(&x->wait.lock); + spin_unlock_irq(&x->wait.lock); return timeout; } @@ -3156,12 +2931,12 @@ bool try_wait_for_completion(struct completion *x) unsigned long flags; int ret = 1; - raw_spin_lock_irqsave(&x->wait.lock, flags); + spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; else x->done--; - raw_spin_unlock_irqrestore(&x->wait.lock, flags); + spin_unlock_irqrestore(&x->wait.lock, flags); return ret; } EXPORT_SYMBOL(try_wait_for_completion); @@ -3179,10 +2954,10 @@ bool completion_done(struct completion *x) unsigned long flags; int ret = 1; - raw_spin_lock_irqsave(&x->wait.lock, flags); + spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; - raw_spin_unlock_irqrestore(&x->wait.lock, flags); + spin_unlock_irqrestore(&x->wait.lock, flags); return ret; } EXPORT_SYMBOL(completion_done); @@ -3243,8 +3018,7 @@ EXPORT_SYMBOL(sleep_on_timeout); * This function changes the 'effective' priority of a task. It does * not touch ->normal_prio like __setscheduler(). * - * Used by the rt_mutex code to implement priority inheritance - * logic. Call site only calls if the priority of the task changed. + * Used by the rt_mutex code to implement priority inheritance logic. */ void rt_mutex_setprio(struct task_struct *p, int prio) { @@ -3475,25 +3249,20 @@ static struct task_struct *find_process_by_pid(pid_t pid) return pid ? find_task_by_vpid(pid) : current; } -static void __setscheduler_params(struct task_struct *p, int policy, int prio) -{ - p->policy = policy; - p->rt_priority = prio; - p->normal_prio = normal_prio(p); - set_load_weight(p); -} - /* Actually do priority change: must hold rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) { - __setscheduler_params(p, policy, prio); + p->policy = policy; + p->rt_priority = prio; + p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); if (rt_prio(p->prio)) p->sched_class = &rt_sched_class; else p->sched_class = &fair_sched_class; + set_load_weight(p); } /* @@ -3515,7 +3284,6 @@ static bool check_same_owner(struct task_struct *p) static int __sched_setscheduler(struct task_struct *p, int policy, const struct sched_param *param, bool user) { - int newprio = MAX_RT_PRIO - 1 - param->sched_priority; int retval, oldprio, oldpolicy = -1, on_rq, running; unsigned long flags; const struct sched_class *prev_class; @@ -3611,13 +3379,10 @@ recheck: } /* - * If not changing anything there's no need to proceed - * further, but store a possible modification of - * reset_on_fork. + * If not changing anything there's no need to proceed further: */ if (unlikely(policy == p->policy && (!rt_policy(policy) || param->sched_priority == p->rt_priority))) { - p->sched_reset_on_fork = reset_on_fork; task_rq_unlock(rq, p, &flags); return 0; } @@ -3643,25 +3408,6 @@ recheck: task_rq_unlock(rq, p, &flags); goto recheck; } - - p->sched_reset_on_fork = reset_on_fork; - oldprio = p->prio; - - /* - * Special case for priority boosted tasks. - * - * If the new priority is lower or equal (user space view) - * than the current (boosted) priority, we just store the new - * normal parameters and do not touch the scheduler class and - * the runqueue. This will be done when the task deboost - * itself. - */ - if (rt_mutex_check_prio(p, newprio)) { - __setscheduler_params(p, policy, param->sched_priority); - task_rq_unlock(rq, p, &flags); - return 0; - } - on_rq = p->on_rq; running = task_current(rq, p); if (on_rq) @@ -3669,18 +3415,17 @@ recheck: if (running) p->sched_class->put_prev_task(rq, p); + p->sched_reset_on_fork = reset_on_fork; + + oldprio = p->prio; prev_class = p->sched_class; __setscheduler(rq, p, policy, param->sched_priority); if (running) p->sched_class->set_curr_task(rq); - if (on_rq) { - /* - * We enqueue to tail when the priority of a task is - * increased (user space view). - */ - enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0); - } + if (on_rq) + enqueue_task(rq, p, 0); + check_class_changed(rq, p, prev_class, oldprio); task_rq_unlock(rq, p, &flags); @@ -4056,17 +3801,9 @@ static inline int should_resched(void) static void __cond_resched(void) { - do { - add_preempt_count(PREEMPT_ACTIVE); - __schedule(); - sub_preempt_count(PREEMPT_ACTIVE); - /* - * Check again in case we missed a preemption - * opportunity between schedule and now. - */ - barrier(); - - } while (need_resched()); + add_preempt_count(PREEMPT_ACTIVE); + __schedule(); + sub_preempt_count(PREEMPT_ACTIVE); } int __sched _cond_resched(void) @@ -4107,7 +3844,6 @@ int __cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(__cond_resched_lock); -#ifndef CONFIG_PREEMPT_RT_FULL int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); @@ -4121,7 +3857,6 @@ int __sched __cond_resched_softirq(void) return 0; } EXPORT_SYMBOL(__cond_resched_softirq); -#endif /** * yield - yield the current processor to other threads. @@ -4471,7 +4206,6 @@ void init_idle(struct task_struct *idle, int cpu) rcu_read_unlock(); rq->curr = rq->idle = idle; - idle->on_rq = 1; #if defined(CONFIG_SMP) idle->on_cpu = 1; #endif @@ -4479,9 +4213,7 @@ void init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ task_thread_info(idle)->preempt_count = 0; -#ifdef CONFIG_HAVE_PREEMPT_LAZY - task_thread_info(idle)->preempt_lazy_count = 0; -#endif + /* * The idle tasks have their own, simple scheduling class: */ @@ -4496,90 +4228,11 @@ void init_idle(struct task_struct *idle, int cpu) #ifdef CONFIG_SMP void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { - if (!migrate_disabled_updated(p)) { - if (p->sched_class && p->sched_class->set_cpus_allowed) - p->sched_class->set_cpus_allowed(p, new_mask); - p->nr_cpus_allowed = cpumask_weight(new_mask); - } - cpumask_copy(&p->cpus_allowed, new_mask); -} - -static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); -static DEFINE_MUTEX(sched_down_mutex); -static cpumask_t sched_down_cpumask; - -void tell_sched_cpu_down_begin(int cpu) -{ - mutex_lock(&sched_down_mutex); - cpumask_set_cpu(cpu, &sched_down_cpumask); - mutex_unlock(&sched_down_mutex); -} - -void tell_sched_cpu_down_done(int cpu) -{ - mutex_lock(&sched_down_mutex); - cpumask_clear_cpu(cpu, &sched_down_cpumask); - mutex_unlock(&sched_down_mutex); -} + if (p->sched_class && p->sched_class->set_cpus_allowed) + p->sched_class->set_cpus_allowed(p, new_mask); -/** - * migrate_me - try to move the current task off this cpu - * - * Used by the pin_current_cpu() code to try to get tasks - * to move off the current CPU as it is going down. - * It will only move the task if the task isn't pinned to - * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) - * and the task has to be in a RUNNING state. Otherwise the - * movement of the task will wake it up (change its state - * to running) when the task did not expect it. - * - * Returns 1 if it succeeded in moving the current task - * 0 otherwise. - */ -int migrate_me(void) -{ - struct task_struct *p = current; - struct migration_arg arg; - struct cpumask *cpumask; - struct cpumask *mask; - unsigned long flags; - unsigned int dest_cpu; - struct rq *rq; - - /* - * We can not migrate tasks bounded to a CPU or tasks not - * running. The movement of the task will wake it up. - */ - if (p->flags & PF_NO_SETAFFINITY || p->state) - return 0; - - mutex_lock(&sched_down_mutex); - rq = task_rq_lock(p, &flags); - - cpumask = &__get_cpu_var(sched_cpumasks); - mask = &p->cpus_allowed; - - cpumask_andnot(cpumask, mask, &sched_down_cpumask); - - if (!cpumask_weight(cpumask)) { - /* It's only on this CPU? */ - task_rq_unlock(rq, p, &flags); - mutex_unlock(&sched_down_mutex); - return 0; - } - - dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); - - arg.task = p; - arg.dest_cpu = dest_cpu; - - task_rq_unlock(rq, p, &flags); - - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); - tlb_migrate_finish(p->mm); - mutex_unlock(&sched_down_mutex); - - return 1; + cpumask_copy(&p->cpus_allowed, new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); } /* @@ -4625,7 +4278,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) do_set_cpus_allowed(p, new_mask); /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) + if (cpumask_test_cpu(task_cpu(p), new_mask)) goto out; dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); @@ -4714,8 +4367,6 @@ static int migration_cpu_stop(void *data) #ifdef CONFIG_HOTPLUG_CPU -static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); - /* * Ensures that the idle task is using init_mm right before its cpu goes * offline. @@ -4728,12 +4379,7 @@ void idle_task_exit(void) if (mm != &init_mm) switch_mm(mm, &init_mm, current); - - /* - * Defer the cleanup to an alive cpu. On RT we can neither - * call mmdrop() nor mmdrop_delayed() from here. - */ - per_cpu(idle_last_mm, smp_processor_id()) = mm; + mmdrop(mm); } /* @@ -5057,10 +4703,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD: calc_load_migrate(rq); - if (per_cpu(idle_last_mm, cpu)) { - mmdrop(per_cpu(idle_last_mm, cpu)); - per_cpu(idle_last_mm, cpu) = NULL; - } break; #endif } @@ -6933,8 +6575,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { - int nested = (preempt_count() & ~PREEMPT_ACTIVE) + - sched_rcu_preempt_depth(); + int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); return (nested == preempt_offset); } @@ -6944,8 +6585,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) static unsigned long prev_jiffy; /* ratelimiting */ rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ - if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && - !is_idle_task(current)) || + if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || system_state != SYSTEM_RUNNING || oops_in_progress) return; if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) @@ -6963,13 +6603,6 @@ void __might_sleep(const char *file, int line, int preempt_offset) debug_show_held_locks(current); if (irqs_disabled()) print_irqtrace_events(current); -#ifdef CONFIG_DEBUG_PREEMPT - if (!preempt_count_equals(preempt_offset)) { - pr_err("Preemption disabled at:"); - print_ip_sym(current->preempt_disable_ip); - pr_cont("\n"); - } -#endif dump_stack(); } EXPORT_SYMBOL(__might_sleep); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 1681f49..9994791 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -655,45 +655,37 @@ static void __vtime_account_system(struct task_struct *tsk) void vtime_account_system(struct task_struct *tsk) { - raw_spin_lock(&tsk->vtime_lock); - write_seqcount_begin(&tsk->vtime_seq); + write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); - write_seqcount_end(&tsk->vtime_seq); - raw_spin_unlock(&tsk->vtime_lock); + write_sequnlock(&tsk->vtime_seqlock); } void vtime_gen_account_irq_exit(struct task_struct *tsk) { - raw_spin_lock(&tsk->vtime_lock); - write_seqcount_begin(&tsk->vtime_seq); + write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); if (context_tracking_in_user()) tsk->vtime_snap_whence = VTIME_USER; - write_seqcount_end(&tsk->vtime_seq); - raw_spin_unlock(&tsk->vtime_lock); + write_sequnlock(&tsk->vtime_seqlock); } void vtime_account_user(struct task_struct *tsk) { cputime_t delta_cpu; - raw_spin_lock(&tsk->vtime_lock); - write_seqcount_begin(&tsk->vtime_seq); + write_seqlock(&tsk->vtime_seqlock); delta_cpu = get_vtime_delta(tsk); tsk->vtime_snap_whence = VTIME_SYS; account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); - write_seqcount_end(&tsk->vtime_seq); - raw_spin_unlock(&tsk->vtime_lock); + write_sequnlock(&tsk->vtime_seqlock); } void vtime_user_enter(struct task_struct *tsk) { - raw_spin_lock(&tsk->vtime_lock); - write_seqcount_begin(&tsk->vtime_seq); + write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); tsk->vtime_snap_whence = VTIME_USER; - write_seqcount_end(&tsk->vtime_seq); - raw_spin_unlock(&tsk->vtime_lock); + write_sequnlock(&tsk->vtime_seqlock); } void vtime_guest_enter(struct task_struct *tsk) @@ -705,23 +697,19 @@ void vtime_guest_enter(struct task_struct *tsk) * synchronization against the reader (task_gtime()) * that can thus safely catch up with a tickless delta. */ - raw_spin_lock(&tsk->vtime_lock); - write_seqcount_begin(&tsk->vtime_seq); + write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); current->flags |= PF_VCPU; - write_seqcount_end(&tsk->vtime_seq); - raw_spin_unlock(&tsk->vtime_lock); + write_sequnlock(&tsk->vtime_seqlock); } EXPORT_SYMBOL_GPL(vtime_guest_enter); void vtime_guest_exit(struct task_struct *tsk) { - raw_spin_lock(&tsk->vtime_lock); - write_seqcount_begin(&tsk->vtime_seq); + write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); current->flags &= ~PF_VCPU; - write_seqcount_end(&tsk->vtime_seq); - raw_spin_unlock(&tsk->vtime_lock); + write_sequnlock(&tsk->vtime_seqlock); } EXPORT_SYMBOL_GPL(vtime_guest_exit); @@ -734,30 +722,24 @@ void vtime_account_idle(struct task_struct *tsk) void arch_vtime_task_switch(struct task_struct *prev) { - raw_spin_lock(&prev->vtime_lock); - write_seqcount_begin(&prev->vtime_seq); + write_seqlock(&prev->vtime_seqlock); prev->vtime_snap_whence = VTIME_SLEEPING; - write_seqcount_end(&prev->vtime_seq); - raw_spin_unlock(&prev->vtime_lock); + write_sequnlock(&prev->vtime_seqlock); - raw_spin_lock(¤t->vtime_lock); - write_seqcount_begin(¤t->vtime_seq); + write_seqlock(¤t->vtime_seqlock); current->vtime_snap_whence = VTIME_SYS; current->vtime_snap = sched_clock_cpu(smp_processor_id()); - write_seqcount_end(¤t->vtime_seq); - raw_spin_unlock(¤t->vtime_lock); + write_sequnlock(¤t->vtime_seqlock); } void vtime_init_idle(struct task_struct *t, int cpu) { unsigned long flags; - raw_spin_lock_irqsave(&t->vtime_lock, flags); - write_seqcount_begin(&t->vtime_seq); + write_seqlock_irqsave(&t->vtime_seqlock, flags); t->vtime_snap_whence = VTIME_SYS; t->vtime_snap = sched_clock_cpu(cpu); - write_seqcount_end(&t->vtime_seq); - raw_spin_unlock_irqrestore(&t->vtime_lock, flags); + write_sequnlock_irqrestore(&t->vtime_seqlock, flags); } cputime_t task_gtime(struct task_struct *t) @@ -766,13 +748,13 @@ cputime_t task_gtime(struct task_struct *t) cputime_t gtime; do { - seq = read_seqcount_begin(&t->vtime_seq); + seq = read_seqbegin(&t->vtime_seqlock); gtime = t->gtime; if (t->flags & PF_VCPU) gtime += vtime_delta(t); - } while (read_seqcount_retry(&t->vtime_seq, seq)); + } while (read_seqretry(&t->vtime_seqlock, seq)); return gtime; } @@ -795,7 +777,7 @@ fetch_task_cputime(struct task_struct *t, *udelta = 0; *sdelta = 0; - seq = read_seqcount_begin(&t->vtime_seq); + seq = read_seqbegin(&t->vtime_seqlock); if (u_dst) *u_dst = *u_src; @@ -819,7 +801,7 @@ fetch_task_cputime(struct task_struct *t, if (t->vtime_snap_whence == VTIME_SYS) *sdelta = delta; } - } while (read_seqcount_retry(&t->vtime_seq, seq)); + } while (read_seqretry(&t->vtime_seqlock, seq)); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 70812af..fd9ca1d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -256,9 +256,6 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) P(rt_throttled); PN(rt_time); PN(rt_runtime); -#ifdef CONFIG_SMP - P(rt_nr_migratory); -#endif #undef PN #undef P @@ -588,10 +585,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) #endif P(policy); P(prio); -#ifdef CONFIG_PREEMPT_RT_FULL - P(migrate_disable); -#endif - P(nr_cpus_allowed); #undef PN #undef __PN #undef P diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0af1448..790e2fc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1902,7 +1902,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { - resched_task_lazy(rq_of(cfs_rq)->curr); + resched_task(rq_of(cfs_rq)->curr); /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. @@ -1926,7 +1926,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) - resched_task_lazy(rq_of(cfs_rq)->curr); + resched_task(rq_of(cfs_rq)->curr); } static void @@ -2047,7 +2047,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { - resched_task_lazy(rq_of(cfs_rq)->curr); + resched_task(rq_of(cfs_rq)->curr); return; } /* @@ -2237,7 +2237,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) - resched_task_lazy(rq_of(cfs_rq)->curr); + resched_task(rq_of(cfs_rq)->curr); } static __always_inline @@ -2837,7 +2837,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (rq->curr == p) - resched_task_lazy(p); + resched_task(p); return; } @@ -3704,7 +3704,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: - resched_task_lazy(curr); + resched_task(curr); /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved @@ -5979,7 +5979,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); - resched_task_lazy(rq->curr); + resched_task(rq->curr); } se->vruntime -= cfs_rq->min_vruntime; @@ -6004,7 +6004,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (rq->curr == p) { if (p->prio > oldprio) - resched_task_lazy(rq->curr); + resched_task(rq->curr); } else check_preempt_curr(rq, p, 0); } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 4594051..99399f8 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -50,18 +50,11 @@ SCHED_FEAT(LB_BIAS, true) */ SCHED_FEAT(NONTASK_POWER, true) -#ifndef CONFIG_PREEMPT_RT_FULL /* * Queue remote wakeups on the target CPU and process them * using the scheduler IPI. Reduces rq->lock contention/bounces. */ SCHED_FEAT(TTWU_QUEUE, true) -#else -SCHED_FEAT(TTWU_QUEUE, false) -# ifdef CONFIG_PREEMPT_LAZY -SCHED_FEAT(PREEMPT_LAZY, true) -# endif -#endif SCHED_FEAT(FORCE_SD_OVERLAP, false) SCHED_FEAT(RT_RUNTIME_SHARE, true) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 240fc60..ff04e1a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -43,7 +43,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - rt_b->rt_period_timer.irqsafe = 1; rt_b->rt_period_timer.function = sched_rt_period_timer; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 2843303..4f31059 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -898,7 +898,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x4 /* internal use, task got migrated */ -#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ /* * To aid in avoiding the subversion of "niceness" due to uneven distribution @@ -1046,15 +1045,6 @@ extern void init_sched_fair_class(void); extern void resched_task(struct task_struct *p); extern void resched_cpu(int cpu); -#ifdef CONFIG_PREEMPT_LAZY -extern void resched_task_lazy(struct task_struct *tsk); -#else -static inline void resched_task_lazy(struct task_struct *tsk) -{ - resched_task(tsk); -} -#endif - extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); |