summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c569
-rw-r--r--kernel/sched/cputime.c62
-rw-r--r--kernel/sched/debug.c7
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/features.h7
-rw-r--r--kernel/sched/rt.c1
-rw-r--r--kernel/sched/sched.h10
7 files changed, 131 insertions, 541 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8749d20..a494ace 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -272,11 +272,7 @@ late_initcall(sched_init_debug);
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
-#ifndef CONFIG_PREEMPT_RT_FULL
const_debug unsigned int sysctl_sched_nr_migrate = 32;
-#else
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
-#endif
/*
* period over which we average the RT time consumption, measured
@@ -495,7 +491,6 @@ static void init_rq_hrtick(struct rq *rq)
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
- rq->hrtick_timer.irqsafe = 1;
}
#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
@@ -540,37 +535,6 @@ void resched_task(struct task_struct *p)
smp_send_reschedule(cpu);
}
-#ifdef CONFIG_PREEMPT_LAZY
-void resched_task_lazy(struct task_struct *p)
-{
- int cpu;
-
- if (!sched_feat(PREEMPT_LAZY)) {
- resched_task(p);
- return;
- }
-
- assert_raw_spin_locked(&task_rq(p)->lock);
-
- if (test_tsk_need_resched(p))
- return;
-
- if (test_tsk_need_resched_lazy(p))
- return;
-
- set_tsk_need_resched_lazy(p);
-
- cpu = task_cpu(p);
- if (cpu == smp_processor_id())
- return;
-
- /* NEED_RESCHED_LAZY must be visible before we test polling */
- smp_mb();
- if (!tsk_is_polling(p))
- smp_send_reschedule(cpu);
-}
-#endif
-
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -735,17 +699,6 @@ void resched_task(struct task_struct *p)
assert_raw_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p);
}
-#ifdef CONFIG_PREEMPT_LAZY
-void resched_task_lazy(struct task_struct *p)
-{
- if (!sched_feat(PREEMPT_LAZY)) {
- resched_task(p);
- return;
- }
- assert_raw_spin_locked(&task_rq(p)->lock);
- set_tsk_need_resched_lazy(p);
-}
-#endif
#endif /* CONFIG_SMP */
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
@@ -1071,18 +1024,6 @@ struct migration_arg {
static int migration_cpu_stop(void *data);
-static bool check_task_state(struct task_struct *p, long match_state)
-{
- bool match = false;
-
- raw_spin_lock_irq(&p->pi_lock);
- if (p->state == match_state || p->saved_state == match_state)
- match = true;
- raw_spin_unlock_irq(&p->pi_lock);
-
- return match;
-}
-
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -1127,7 +1068,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
- if (match_state && !check_task_state(p, match_state))
+ if (match_state && unlikely(p->state != match_state))
return 0;
cpu_relax();
}
@@ -1142,8 +1083,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
running = task_running(rq, p);
on_rq = p->on_rq;
ncsw = 0;
- if (!match_state || p->state == match_state
- || p->saved_state == match_state)
+ if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &flags);
@@ -1289,12 +1229,6 @@ out:
}
}
- /*
- * Clear PF_NO_SETAFFINITY, otherwise we wreckage
- * migrate_disable/enable. See optimization for
- * PF_NO_SETAFFINITY tasks there.
- */
- p->flags &= ~PF_NO_SETAFFINITY;
return dest_cpu;
}
@@ -1374,6 +1308,10 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
{
activate_task(rq, p, en_flags);
p->on_rq = 1;
+
+ /* if a worker is waking up, notify workqueue */
+ if (p->flags & PF_WQ_WORKER)
+ wq_worker_waking_up(p, cpu_of(rq));
}
/*
@@ -1548,27 +1486,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
smp_mb__before_spinlock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
- if (!(p->state & state)) {
- /*
- * The task might be running due to a spinlock sleeper
- * wakeup. Check the saved state and set it to running
- * if the wakeup condition is true.
- */
- if (!(wake_flags & WF_LOCK_SLEEPER)) {
- if (p->saved_state & state) {
- p->saved_state = TASK_RUNNING;
- success = 1;
- }
- }
+ if (!(p->state & state))
goto out;
- }
-
- /*
- * If this is a regular wakeup, then we can unconditionally
- * clear the saved state of a "lock sleeper".
- */
- if (!(wake_flags & WF_LOCK_SLEEPER))
- p->saved_state = TASK_RUNNING;
success = 1; /* we're going to change ->state */
cpu = task_cpu(p);
@@ -1611,6 +1530,42 @@ out:
}
/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not already there. The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+ struct rq *rq = task_rq(p);
+
+ if (WARN_ON_ONCE(rq != this_rq()) ||
+ WARN_ON_ONCE(p == current))
+ return;
+
+ lockdep_assert_held(&rq->lock);
+
+ if (!raw_spin_trylock(&p->pi_lock)) {
+ raw_spin_unlock(&rq->lock);
+ raw_spin_lock(&p->pi_lock);
+ raw_spin_lock(&rq->lock);
+ }
+
+ if (!(p->state & TASK_NORMAL))
+ goto out;
+
+ if (!p->on_rq)
+ ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+
+ ttwu_do_wakeup(rq, p, 0);
+ ttwu_stat(p, smp_processor_id(), 0);
+out:
+ raw_spin_unlock(&p->pi_lock);
+}
+
+/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
@@ -1624,23 +1579,11 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
- WARN_ON(__task_is_stopped_or_traced(p));
+ WARN_ON(task_is_stopped_or_traced(p));
return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
-/**
- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
- * @p: The process to be woken up.
- *
- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
- * the nature of the wakeup.
- */
-int wake_up_lock_sleeper(struct task_struct *p)
-{
- return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
-}
-
int wake_up_state(struct task_struct *p, unsigned int state)
{
return try_to_wake_up(p, state, 0);
@@ -1778,9 +1721,6 @@ void sched_fork(struct task_struct *p)
/* Want to start with kernel preemption disabled. */
task_thread_info(p)->preempt_count = 1;
#endif
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
- task_thread_info(p)->preempt_lazy_count = 0;
-#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
#endif
@@ -1947,12 +1887,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
finish_arch_post_lock_switch();
fire_sched_in_preempt_notifiers(current);
- /*
- * We use mmdrop_delayed() here so we don't have to do the
- * full __mmdrop() when we are the last user.
- */
if (mm)
- mmdrop_delayed(mm);
+ mmdrop(mm);
if (unlikely(prev_state == TASK_DEAD)) {
/*
* Remove function-return probe instances associated with this
@@ -2296,13 +2232,8 @@ void __kprobes add_preempt_count(int val)
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
#endif
- if (preempt_count() == val) {
- unsigned long ip = get_parent_ip(CALLER_ADDR1);
-#ifdef CONFIG_DEBUG_PREEMPT
- current->preempt_disable_ip = ip;
-#endif
- trace_preempt_off(CALLER_ADDR0, ip);
- }
+ if (preempt_count() == val)
+ trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
EXPORT_SYMBOL(add_preempt_count);
@@ -2345,13 +2276,6 @@ static noinline void __schedule_bug(struct task_struct *prev)
print_modules();
if (irqs_disabled())
print_irqtrace_events(prev);
-#ifdef CONFIG_DEBUG_PREEMPT
- if (in_atomic_preempt_off()) {
- pr_err("Preemption disabled at:");
- print_ip_sym(current->preempt_disable_ip);
- pr_cont("\n");
- }
-#endif
dump_stack();
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
@@ -2375,133 +2299,6 @@ static inline void schedule_debug(struct task_struct *prev)
schedstat_inc(this_rq(), sched_count);
}
-#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
-#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
-#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
-#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
-
-static inline void update_migrate_disable(struct task_struct *p)
-{
- const struct cpumask *mask;
-
- if (likely(!p->migrate_disable))
- return;
-
- /* Did we already update affinity? */
- if (unlikely(migrate_disabled_updated(p)))
- return;
-
- /*
- * Since this is always current we can get away with only locking
- * rq->lock, the ->cpus_allowed value can normally only be changed
- * while holding both p->pi_lock and rq->lock, but seeing that this
- * is current, we cannot actually be waking up, so all code that
- * relies on serialization against p->pi_lock is out of scope.
- *
- * Having rq->lock serializes us against things like
- * set_cpus_allowed_ptr() that can still happen concurrently.
- */
- mask = tsk_cpus_allowed(p);
-
- if (p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, mask);
- /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
- p->nr_cpus_allowed = 1;
-
- /* Let migrate_enable know to fix things back up */
- p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
-}
-
-void migrate_disable(void)
-{
- struct task_struct *p = current;
-
- if (in_atomic()) {
-#ifdef CONFIG_SCHED_DEBUG
- p->migrate_disable_atomic++;
-#endif
- return;
- }
-
-#ifdef CONFIG_SCHED_DEBUG
- if (unlikely(p->migrate_disable_atomic)) {
- tracing_off();
- WARN_ON_ONCE(1);
- }
-#endif
-
- if (p->migrate_disable) {
- p->migrate_disable++;
- return;
- }
-
- preempt_disable();
- preempt_lazy_disable();
- pin_current_cpu();
- p->migrate_disable = 1;
- preempt_enable();
-}
-EXPORT_SYMBOL(migrate_disable);
-
-void migrate_enable(void)
-{
- struct task_struct *p = current;
- const struct cpumask *mask;
- unsigned long flags;
- struct rq *rq;
-
- if (in_atomic()) {
-#ifdef CONFIG_SCHED_DEBUG
- p->migrate_disable_atomic--;
-#endif
- return;
- }
-
-#ifdef CONFIG_SCHED_DEBUG
- if (unlikely(p->migrate_disable_atomic)) {
- tracing_off();
- WARN_ON_ONCE(1);
- }
-#endif
- WARN_ON_ONCE(p->migrate_disable <= 0);
-
- if (migrate_disable_count(p) > 1) {
- p->migrate_disable--;
- return;
- }
-
- preempt_disable();
- if (unlikely(migrate_disabled_updated(p))) {
- /*
- * Undo whatever update_migrate_disable() did, also see there
- * about locking.
- */
- rq = this_rq();
- raw_spin_lock_irqsave(&rq->lock, flags);
-
- /*
- * Clearing migrate_disable causes tsk_cpus_allowed to
- * show the tasks original cpu affinity.
- */
- p->migrate_disable = 0;
- mask = tsk_cpus_allowed(p);
- if (p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, mask);
- p->nr_cpus_allowed = cpumask_weight(mask);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- } else
- p->migrate_disable = 0;
-
- unpin_current_cpu();
- preempt_enable();
- preempt_lazy_enable();
-}
-EXPORT_SYMBOL(migrate_enable);
-#else
-static inline void update_migrate_disable(struct task_struct *p) { }
-#define migrate_disabled_updated(p) 0
-#endif
-
static void put_prev_task(struct rq *rq, struct task_struct *prev)
{
if (prev->on_rq || rq->skip_clock_update < 0)
@@ -2601,8 +2398,6 @@ need_resched:
smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
- update_migrate_disable(prev);
-
switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev))) {
@@ -2610,6 +2405,19 @@ need_resched:
} else {
deactivate_task(rq, prev, DEQUEUE_SLEEP);
prev->on_rq = 0;
+
+ /*
+ * If a worker went to sleep, notify and ask workqueue
+ * whether it wants to wake up a task to maintain
+ * concurrency.
+ */
+ if (prev->flags & PF_WQ_WORKER) {
+ struct task_struct *to_wakeup;
+
+ to_wakeup = wq_worker_sleeping(prev, cpu);
+ if (to_wakeup)
+ try_to_wake_up_local(to_wakeup);
+ }
}
switch_count = &prev->nvcsw;
}
@@ -2622,7 +2430,6 @@ need_resched:
put_prev_task(rq, prev);
next = pick_next_task(rq);
clear_tsk_need_resched(prev);
- clear_tsk_need_resched_lazy(prev);
rq->skip_clock_update = 0;
if (likely(prev != next)) {
@@ -2653,14 +2460,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
{
if (!tsk->state || tsk_is_pi_blocked(tsk))
return;
-
- /*
- * If a worker went to sleep, notify and ask workqueue whether
- * it wants to wake up a task to maintain concurrency.
- */
- if (tsk->flags & PF_WQ_WORKER)
- wq_worker_sleeping(tsk);
-
/*
* If we are going to sleep and we have plugged IO queued,
* make sure to submit it to avoid deadlocks.
@@ -2669,19 +2468,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
blk_schedule_flush_plug(tsk);
}
-static inline void sched_update_worker(struct task_struct *tsk)
-{
- if (tsk->flags & PF_WQ_WORKER)
- wq_worker_running(tsk);
-}
-
asmlinkage void __sched schedule(void)
{
struct task_struct *tsk = current;
sched_submit_work(tsk);
__schedule();
- sched_update_worker(tsk);
}
EXPORT_SYMBOL(schedule);
@@ -2727,26 +2519,9 @@ asmlinkage void __sched notrace preempt_schedule(void)
if (likely(!preemptible()))
return;
-#ifdef CONFIG_PREEMPT_LAZY
- /*
- * Check for lazy preemption
- */
- if (current_thread_info()->preempt_lazy_count &&
- !test_thread_flag(TIF_NEED_RESCHED))
- return;
-#endif
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- /*
- * The add/subtract must not be traced by the function
- * tracer. But we still want to account for the
- * preempt off latency tracer. Since the _notrace versions
- * of add/subtract skip the accounting for latency tracer
- * we must force it manually.
- */
- start_critical_timings();
__schedule();
- stop_critical_timings();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -2919,10 +2694,10 @@ void complete(struct completion *x)
{
unsigned long flags;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
@@ -2939,10 +2714,10 @@ void complete_all(struct completion *x)
{
unsigned long flags;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
- __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
@@ -2951,20 +2726,20 @@ do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
- DEFINE_SWAITER(wait);
+ DECLARE_WAITQUEUE(wait, current);
- swait_prepare_locked(&x->wait, &wait);
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
- raw_spin_unlock_irq(&x->wait.lock);
+ spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
- raw_spin_lock_irq(&x->wait.lock);
+ spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
- swait_finish_locked(&x->wait, &wait);
+ __remove_wait_queue(&x->wait, &wait);
if (!x->done)
return timeout;
}
@@ -2978,9 +2753,9 @@ __wait_for_common(struct completion *x,
{
might_sleep();
- raw_spin_lock_irq(&x->wait.lock);
+ spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
- raw_spin_unlock_irq(&x->wait.lock);
+ spin_unlock_irq(&x->wait.lock);
return timeout;
}
@@ -3156,12 +2931,12 @@ bool try_wait_for_completion(struct completion *x)
unsigned long flags;
int ret = 1;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
else
x->done--;
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
@@ -3179,10 +2954,10 @@ bool completion_done(struct completion *x)
unsigned long flags;
int ret = 1;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(completion_done);
@@ -3243,8 +3018,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
* This function changes the 'effective' priority of a task. It does
* not touch ->normal_prio like __setscheduler().
*
- * Used by the rt_mutex code to implement priority inheritance
- * logic. Call site only calls if the priority of the task changed.
+ * Used by the rt_mutex code to implement priority inheritance logic.
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
@@ -3475,25 +3249,20 @@ static struct task_struct *find_process_by_pid(pid_t pid)
return pid ? find_task_by_vpid(pid) : current;
}
-static void __setscheduler_params(struct task_struct *p, int policy, int prio)
-{
- p->policy = policy;
- p->rt_priority = prio;
- p->normal_prio = normal_prio(p);
- set_load_weight(p);
-}
-
/* Actually do priority change: must hold rq lock. */
static void
__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
{
- __setscheduler_params(p, policy, prio);
+ p->policy = policy;
+ p->rt_priority = prio;
+ p->normal_prio = normal_prio(p);
/* we are holding p->pi_lock already */
p->prio = rt_mutex_getprio(p);
if (rt_prio(p->prio))
p->sched_class = &rt_sched_class;
else
p->sched_class = &fair_sched_class;
+ set_load_weight(p);
}
/*
@@ -3515,7 +3284,6 @@ static bool check_same_owner(struct task_struct *p)
static int __sched_setscheduler(struct task_struct *p, int policy,
const struct sched_param *param, bool user)
{
- int newprio = MAX_RT_PRIO - 1 - param->sched_priority;
int retval, oldprio, oldpolicy = -1, on_rq, running;
unsigned long flags;
const struct sched_class *prev_class;
@@ -3611,13 +3379,10 @@ recheck:
}
/*
- * If not changing anything there's no need to proceed
- * further, but store a possible modification of
- * reset_on_fork.
+ * If not changing anything there's no need to proceed further:
*/
if (unlikely(policy == p->policy && (!rt_policy(policy) ||
param->sched_priority == p->rt_priority))) {
- p->sched_reset_on_fork = reset_on_fork;
task_rq_unlock(rq, p, &flags);
return 0;
}
@@ -3643,25 +3408,6 @@ recheck:
task_rq_unlock(rq, p, &flags);
goto recheck;
}
-
- p->sched_reset_on_fork = reset_on_fork;
- oldprio = p->prio;
-
- /*
- * Special case for priority boosted tasks.
- *
- * If the new priority is lower or equal (user space view)
- * than the current (boosted) priority, we just store the new
- * normal parameters and do not touch the scheduler class and
- * the runqueue. This will be done when the task deboost
- * itself.
- */
- if (rt_mutex_check_prio(p, newprio)) {
- __setscheduler_params(p, policy, param->sched_priority);
- task_rq_unlock(rq, p, &flags);
- return 0;
- }
-
on_rq = p->on_rq;
running = task_current(rq, p);
if (on_rq)
@@ -3669,18 +3415,17 @@ recheck:
if (running)
p->sched_class->put_prev_task(rq, p);
+ p->sched_reset_on_fork = reset_on_fork;
+
+ oldprio = p->prio;
prev_class = p->sched_class;
__setscheduler(rq, p, policy, param->sched_priority);
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
- /*
- * We enqueue to tail when the priority of a task is
- * increased (user space view).
- */
- enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
- }
+ if (on_rq)
+ enqueue_task(rq, p, 0);
+
check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, p, &flags);
@@ -4056,17 +3801,9 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
- do {
- add_preempt_count(PREEMPT_ACTIVE);
- __schedule();
- sub_preempt_count(PREEMPT_ACTIVE);
- /*
- * Check again in case we missed a preemption
- * opportunity between schedule and now.
- */
- barrier();
-
- } while (need_resched());
+ add_preempt_count(PREEMPT_ACTIVE);
+ __schedule();
+ sub_preempt_count(PREEMPT_ACTIVE);
}
int __sched _cond_resched(void)
@@ -4107,7 +3844,6 @@ int __cond_resched_lock(spinlock_t *lock)
}
EXPORT_SYMBOL(__cond_resched_lock);
-#ifndef CONFIG_PREEMPT_RT_FULL
int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
@@ -4121,7 +3857,6 @@ int __sched __cond_resched_softirq(void)
return 0;
}
EXPORT_SYMBOL(__cond_resched_softirq);
-#endif
/**
* yield - yield the current processor to other threads.
@@ -4471,7 +4206,6 @@ void init_idle(struct task_struct *idle, int cpu)
rcu_read_unlock();
rq->curr = rq->idle = idle;
- idle->on_rq = 1;
#if defined(CONFIG_SMP)
idle->on_cpu = 1;
#endif
@@ -4479,9 +4213,7 @@ void init_idle(struct task_struct *idle, int cpu)
/* Set the preempt count _outside_ the spinlocks! */
task_thread_info(idle)->preempt_count = 0;
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
- task_thread_info(idle)->preempt_lazy_count = 0;
-#endif
+
/*
* The idle tasks have their own, simple scheduling class:
*/
@@ -4496,90 +4228,11 @@ void init_idle(struct task_struct *idle, int cpu)
#ifdef CONFIG_SMP
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
- if (!migrate_disabled_updated(p)) {
- if (p->sched_class && p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, new_mask);
- p->nr_cpus_allowed = cpumask_weight(new_mask);
- }
- cpumask_copy(&p->cpus_allowed, new_mask);
-}
-
-static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
-static DEFINE_MUTEX(sched_down_mutex);
-static cpumask_t sched_down_cpumask;
-
-void tell_sched_cpu_down_begin(int cpu)
-{
- mutex_lock(&sched_down_mutex);
- cpumask_set_cpu(cpu, &sched_down_cpumask);
- mutex_unlock(&sched_down_mutex);
-}
-
-void tell_sched_cpu_down_done(int cpu)
-{
- mutex_lock(&sched_down_mutex);
- cpumask_clear_cpu(cpu, &sched_down_cpumask);
- mutex_unlock(&sched_down_mutex);
-}
+ if (p->sched_class && p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, new_mask);
-/**
- * migrate_me - try to move the current task off this cpu
- *
- * Used by the pin_current_cpu() code to try to get tasks
- * to move off the current CPU as it is going down.
- * It will only move the task if the task isn't pinned to
- * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
- * and the task has to be in a RUNNING state. Otherwise the
- * movement of the task will wake it up (change its state
- * to running) when the task did not expect it.
- *
- * Returns 1 if it succeeded in moving the current task
- * 0 otherwise.
- */
-int migrate_me(void)
-{
- struct task_struct *p = current;
- struct migration_arg arg;
- struct cpumask *cpumask;
- struct cpumask *mask;
- unsigned long flags;
- unsigned int dest_cpu;
- struct rq *rq;
-
- /*
- * We can not migrate tasks bounded to a CPU or tasks not
- * running. The movement of the task will wake it up.
- */
- if (p->flags & PF_NO_SETAFFINITY || p->state)
- return 0;
-
- mutex_lock(&sched_down_mutex);
- rq = task_rq_lock(p, &flags);
-
- cpumask = &__get_cpu_var(sched_cpumasks);
- mask = &p->cpus_allowed;
-
- cpumask_andnot(cpumask, mask, &sched_down_cpumask);
-
- if (!cpumask_weight(cpumask)) {
- /* It's only on this CPU? */
- task_rq_unlock(rq, p, &flags);
- mutex_unlock(&sched_down_mutex);
- return 0;
- }
-
- dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
-
- arg.task = p;
- arg.dest_cpu = dest_cpu;
-
- task_rq_unlock(rq, p, &flags);
-
- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
- tlb_migrate_finish(p->mm);
- mutex_unlock(&sched_down_mutex);
-
- return 1;
+ cpumask_copy(&p->cpus_allowed, new_mask);
+ p->nr_cpus_allowed = cpumask_weight(new_mask);
}
/*
@@ -4625,7 +4278,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
do_set_cpus_allowed(p, new_mask);
/* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
@@ -4714,8 +4367,6 @@ static int migration_cpu_stop(void *data)
#ifdef CONFIG_HOTPLUG_CPU
-static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
-
/*
* Ensures that the idle task is using init_mm right before its cpu goes
* offline.
@@ -4728,12 +4379,7 @@ void idle_task_exit(void)
if (mm != &init_mm)
switch_mm(mm, &init_mm, current);
-
- /*
- * Defer the cleanup to an alive cpu. On RT we can neither
- * call mmdrop() nor mmdrop_delayed() from here.
- */
- per_cpu(idle_last_mm, smp_processor_id()) = mm;
+ mmdrop(mm);
}
/*
@@ -5057,10 +4703,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD:
calc_load_migrate(rq);
- if (per_cpu(idle_last_mm, cpu)) {
- mmdrop(per_cpu(idle_last_mm, cpu));
- per_cpu(idle_last_mm, cpu) = NULL;
- }
break;
#endif
}
@@ -6933,8 +6575,7 @@ void __init sched_init(void)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
- int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
- sched_rcu_preempt_depth();
+ int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
return (nested == preempt_offset);
}
@@ -6944,8 +6585,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
static unsigned long prev_jiffy; /* ratelimiting */
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
- if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
- !is_idle_task(current)) ||
+ if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
system_state != SYSTEM_RUNNING || oops_in_progress)
return;
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
@@ -6963,13 +6603,6 @@ void __might_sleep(const char *file, int line, int preempt_offset)
debug_show_held_locks(current);
if (irqs_disabled())
print_irqtrace_events(current);
-#ifdef CONFIG_DEBUG_PREEMPT
- if (!preempt_count_equals(preempt_offset)) {
- pr_err("Preemption disabled at:");
- print_ip_sym(current->preempt_disable_ip);
- pr_cont("\n");
- }
-#endif
dump_stack();
}
EXPORT_SYMBOL(__might_sleep);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1681f49..9994791 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -655,45 +655,37 @@ static void __vtime_account_system(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_gen_account_irq_exit(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu;
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
delta_cpu = get_vtime_delta(tsk);
tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_user_enter(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
tsk->vtime_snap_whence = VTIME_USER;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_guest_enter(struct task_struct *tsk)
@@ -705,23 +697,19 @@ void vtime_guest_enter(struct task_struct *tsk)
* synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta.
*/
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags |= PF_VCPU;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags &= ~PF_VCPU;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
@@ -734,30 +722,24 @@ void vtime_account_idle(struct task_struct *tsk)
void arch_vtime_task_switch(struct task_struct *prev)
{
- raw_spin_lock(&prev->vtime_lock);
- write_seqcount_begin(&prev->vtime_seq);
+ write_seqlock(&prev->vtime_seqlock);
prev->vtime_snap_whence = VTIME_SLEEPING;
- write_seqcount_end(&prev->vtime_seq);
- raw_spin_unlock(&prev->vtime_lock);
+ write_sequnlock(&prev->vtime_seqlock);
- raw_spin_lock(&current->vtime_lock);
- write_seqcount_begin(&current->vtime_seq);
+ write_seqlock(&current->vtime_seqlock);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock_cpu(smp_processor_id());
- write_seqcount_end(&current->vtime_seq);
- raw_spin_unlock(&current->vtime_lock);
+ write_sequnlock(&current->vtime_seqlock);
}
void vtime_init_idle(struct task_struct *t, int cpu)
{
unsigned long flags;
- raw_spin_lock_irqsave(&t->vtime_lock, flags);
- write_seqcount_begin(&t->vtime_seq);
+ write_seqlock_irqsave(&t->vtime_seqlock, flags);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock_cpu(cpu);
- write_seqcount_end(&t->vtime_seq);
- raw_spin_unlock_irqrestore(&t->vtime_lock, flags);
+ write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
}
cputime_t task_gtime(struct task_struct *t)
@@ -766,13 +748,13 @@ cputime_t task_gtime(struct task_struct *t)
cputime_t gtime;
do {
- seq = read_seqcount_begin(&t->vtime_seq);
+ seq = read_seqbegin(&t->vtime_seqlock);
gtime = t->gtime;
if (t->flags & PF_VCPU)
gtime += vtime_delta(t);
- } while (read_seqcount_retry(&t->vtime_seq, seq));
+ } while (read_seqretry(&t->vtime_seqlock, seq));
return gtime;
}
@@ -795,7 +777,7 @@ fetch_task_cputime(struct task_struct *t,
*udelta = 0;
*sdelta = 0;
- seq = read_seqcount_begin(&t->vtime_seq);
+ seq = read_seqbegin(&t->vtime_seqlock);
if (u_dst)
*u_dst = *u_src;
@@ -819,7 +801,7 @@ fetch_task_cputime(struct task_struct *t,
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
- } while (read_seqcount_retry(&t->vtime_seq, seq));
+ } while (read_seqretry(&t->vtime_seqlock, seq));
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 70812af..fd9ca1d 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -256,9 +256,6 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
P(rt_throttled);
PN(rt_time);
PN(rt_runtime);
-#ifdef CONFIG_SMP
- P(rt_nr_migratory);
-#endif
#undef PN
#undef P
@@ -588,10 +585,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
#endif
P(policy);
P(prio);
-#ifdef CONFIG_PREEMPT_RT_FULL
- P(migrate_disable);
-#endif
- P(nr_cpus_allowed);
#undef PN
#undef __PN
#undef P
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0af1448..790e2fc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1902,7 +1902,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
@@ -1926,7 +1926,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return;
if (delta > ideal_runtime)
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
}
static void
@@ -2047,7 +2047,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* validating it and just reschedule.
*/
if (queued) {
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
return;
}
/*
@@ -2237,7 +2237,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
}
static __always_inline
@@ -2837,7 +2837,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
if (delta < 0) {
if (rq->curr == p)
- resched_task_lazy(p);
+ resched_task(p);
return;
}
@@ -3704,7 +3704,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
preempt:
- resched_task_lazy(curr);
+ resched_task(curr);
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
@@ -5979,7 +5979,7 @@ static void task_fork_fair(struct task_struct *p)
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
- resched_task_lazy(rq->curr);
+ resched_task(rq->curr);
}
se->vruntime -= cfs_rq->min_vruntime;
@@ -6004,7 +6004,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
*/
if (rq->curr == p) {
if (p->prio > oldprio)
- resched_task_lazy(rq->curr);
+ resched_task(rq->curr);
} else
check_preempt_curr(rq, p, 0);
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 4594051..99399f8 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -50,18 +50,11 @@ SCHED_FEAT(LB_BIAS, true)
*/
SCHED_FEAT(NONTASK_POWER, true)
-#ifndef CONFIG_PREEMPT_RT_FULL
/*
* Queue remote wakeups on the target CPU and process them
* using the scheduler IPI. Reduces rq->lock contention/bounces.
*/
SCHED_FEAT(TTWU_QUEUE, true)
-#else
-SCHED_FEAT(TTWU_QUEUE, false)
-# ifdef CONFIG_PREEMPT_LAZY
-SCHED_FEAT(PREEMPT_LAZY, true)
-# endif
-#endif
SCHED_FEAT(FORCE_SD_OVERLAP, false)
SCHED_FEAT(RT_RUNTIME_SHARE, true)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 240fc60..ff04e1a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -43,7 +43,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- rt_b->rt_period_timer.irqsafe = 1;
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2843303..4f31059 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -898,7 +898,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* child wakeup after fork */
#define WF_MIGRATED 0x4 /* internal use, task got migrated */
-#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
@@ -1046,15 +1045,6 @@ extern void init_sched_fair_class(void);
extern void resched_task(struct task_struct *p);
extern void resched_cpu(int cpu);
-#ifdef CONFIG_PREEMPT_LAZY
-extern void resched_task_lazy(struct task_struct *tsk);
-#else
-static inline void resched_task_lazy(struct task_struct *tsk)
-{
- resched_task(tsk);
-}
-#endif
-
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);