7 files changed, 131 insertions, 541 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8749d20..a494ace 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -272,11 +272,7 @@ late_initcall(sched_init_debug);
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
  */
-#ifndef CONFIG_PREEMPT_RT_FULL
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
-#else
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
-#endif
 
 /*
  * period over which we average the RT time consumption, measured
@@ -495,7 +491,6 @@ static void init_rq_hrtick(struct rq *rq)
 
 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rq->hrtick_timer.function = hrtick;
-	rq->hrtick_timer.irqsafe = 1;
 }
 #else	/* CONFIG_SCHED_HRTICK */
 static inline void hrtick_clear(struct rq *rq)
@@ -540,37 +535,6 @@ void resched_task(struct task_struct *p)
 		smp_send_reschedule(cpu);
 }
 
-#ifdef CONFIG_PREEMPT_LAZY
-void resched_task_lazy(struct task_struct *p)
-{
-	int cpu;
-
-	if (!sched_feat(PREEMPT_LAZY)) {
-		resched_task(p);
-		return;
-	}
-
-	assert_raw_spin_locked(&task_rq(p)->lock);
-
-	if (test_tsk_need_resched(p))
-		return;
-
-	if (test_tsk_need_resched_lazy(p))
-		return;
-
-	set_tsk_need_resched_lazy(p);
-
-	cpu = task_cpu(p);
-	if (cpu == smp_processor_id())
-		return;
-
-	/* NEED_RESCHED_LAZY must be visible before we test polling */
-	smp_mb();
-	if (!tsk_is_polling(p))
-		smp_send_reschedule(cpu);
-}
-#endif
-
 void resched_cpu(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -735,17 +699,6 @@ void resched_task(struct task_struct *p)
 	assert_raw_spin_locked(&task_rq(p)->lock);
 	set_tsk_need_resched(p);
 }
-#ifdef CONFIG_PREEMPT_LAZY
-void resched_task_lazy(struct task_struct *p)
-{
-	if (!sched_feat(PREEMPT_LAZY)) {
-		resched_task(p);
-		return;
-	}
-	assert_raw_spin_locked(&task_rq(p)->lock);
-	set_tsk_need_resched_lazy(p);
-}
-#endif
 #endif /* CONFIG_SMP */
 
 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
@@ -1071,18 +1024,6 @@ struct migration_arg {
 
 static int migration_cpu_stop(void *data);
 
-static bool check_task_state(struct task_struct *p, long match_state)
-{
-	bool match = false;
-
-	raw_spin_lock_irq(&p->pi_lock);
-	if (p->state == match_state || p->saved_state == match_state)
-		match = true;
-	raw_spin_unlock_irq(&p->pi_lock);
-
-	return match;
-}
-
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -1127,7 +1068,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * is actually now running somewhere else!
 		 */
 		while (task_running(rq, p)) {
-			if (match_state && !check_task_state(p, match_state))
+			if (match_state && unlikely(p->state != match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -1142,8 +1083,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		running = task_running(rq, p);
 		on_rq = p->on_rq;
 		ncsw = 0;
-		if (!match_state || p->state == match_state
-		    || p->saved_state == match_state)
+		if (!match_state || p->state == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
 		task_rq_unlock(rq, p, &flags);
 
@@ -1289,12 +1229,6 @@ out:
 		}
 	}
 
-	/*
-	 * Clear PF_NO_SETAFFINITY, otherwise we wreckage
-	 * migrate_disable/enable. See optimization for
-	 * PF_NO_SETAFFINITY tasks there.
-	 */
-	p->flags &= ~PF_NO_SETAFFINITY;
 	return dest_cpu;
 }
 
@@ -1374,6 +1308,10 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
 {
 	activate_task(rq, p, en_flags);
 	p->on_rq = 1;
+
+	/* if a worker is waking up, notify workqueue */
+	if (p->flags & PF_WQ_WORKER)
+		wq_worker_waking_up(p, cpu_of(rq));
 }
 
 /*
@@ -1548,27 +1486,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 */
 	smp_mb__before_spinlock();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	if (!(p->state & state)) {
-		/*
-		 * The task might be running due to a spinlock sleeper
-		 * wakeup. Check the saved state and set it to running
-		 * if the wakeup condition is true.
-		 */
-		if (!(wake_flags & WF_LOCK_SLEEPER)) {
-			if (p->saved_state & state) {
-				p->saved_state = TASK_RUNNING;
-				success = 1;
-			}
-		}
+	if (!(p->state & state))
 		goto out;
-	}
-
-	/*
-	 * If this is a regular wakeup, then we can unconditionally
-	 * clear the saved state of a "lock sleeper".
-	 */
-	if (!(wake_flags & WF_LOCK_SLEEPER))
-		p->saved_state = TASK_RUNNING;
 
 	success = 1; /* we're going to change ->state */
 	cpu = task_cpu(p);
@@ -1611,6 +1530,42 @@ out:
 }
 
 /**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not already there. The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+	struct rq *rq = task_rq(p);
+
+	if (WARN_ON_ONCE(rq != this_rq()) ||
+	    WARN_ON_ONCE(p == current))
+		return;
+
+	lockdep_assert_held(&rq->lock);
+
+	if (!raw_spin_trylock(&p->pi_lock)) {
+		raw_spin_unlock(&rq->lock);
+		raw_spin_lock(&p->pi_lock);
+		raw_spin_lock(&rq->lock);
+	}
+
+	if (!(p->state & TASK_NORMAL))
+		goto out;
+
+	if (!p->on_rq)
+		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+
+	ttwu_do_wakeup(rq, p, 0);
+	ttwu_stat(p, smp_processor_id(), 0);
+out:
+	raw_spin_unlock(&p->pi_lock);
+}
+
+/**
  * wake_up_process - Wake up a specific process
  * @p: The process to be woken up.
  *
@@ -1624,23 +1579,11 @@ out:
  */
 int wake_up_process(struct task_struct *p)
 {
-	WARN_ON(__task_is_stopped_or_traced(p));
+	WARN_ON(task_is_stopped_or_traced(p));
 	return try_to_wake_up(p, TASK_NORMAL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
 
-/**
- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
- * @p: The process to be woken up.
- *
- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
- * the nature of the wakeup.
- */
-int wake_up_lock_sleeper(struct task_struct *p)
-{
-	return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
-}
-
 int wake_up_state(struct task_struct *p, unsigned int state)
 {
 	return try_to_wake_up(p, state, 0);
@@ -1778,9 +1721,6 @@ void sched_fork(struct task_struct *p)
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
 #endif
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
-	task_thread_info(p)->preempt_lazy_count = 0;
-#endif
 #ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
 #endif
@@ -1947,12 +1887,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	finish_arch_post_lock_switch();
 
 	fire_sched_in_preempt_notifiers(current);
-	/*
-	 * We use mmdrop_delayed() here so we don't have to do the
-	 * full __mmdrop() when we are the last user.
-	 */
 	if (mm)
-		mmdrop_delayed(mm);
+		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
 		/*
 		 * Remove function-return probe instances associated with this
@@ -2296,13 +2232,8 @@ void __kprobes add_preempt_count(int val)
 	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
 				PREEMPT_MASK - 10);
 #endif
-	if (preempt_count() == val) {
-		unsigned long ip = get_parent_ip(CALLER_ADDR1);
-#ifdef CONFIG_DEBUG_PREEMPT
-		current->preempt_disable_ip = ip;
-#endif
-		trace_preempt_off(CALLER_ADDR0, ip);
-	}
+	if (preempt_count() == val)
+		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 }
 EXPORT_SYMBOL(add_preempt_count);
 
@@ -2345,13 +2276,6 @@ static noinline void __schedule_bug(struct task_struct *prev)
 	print_modules();
 	if (irqs_disabled())
 		print_irqtrace_events(prev);
-#ifdef CONFIG_DEBUG_PREEMPT
-	if (in_atomic_preempt_off()) {
-		pr_err("Preemption disabled at:");
-		print_ip_sym(current->preempt_disable_ip);
-		pr_cont("\n");
-	}
-#endif
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
@@ -2375,133 +2299,6 @@ static inline void schedule_debug(struct task_struct *prev)
 	schedstat_inc(this_rq(), sched_count);
 }
 
-#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
-#define MIGRATE_DISABLE_SET_AFFIN	(1<<30) /* Can't make a negative */
-#define migrate_disabled_updated(p)	((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
-#define migrate_disable_count(p)	((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
-
-static inline void update_migrate_disable(struct task_struct *p)
-{
-	const struct cpumask *mask;
-
-	if (likely(!p->migrate_disable))
-		return;
-
-	/* Did we already update affinity? */
-	if (unlikely(migrate_disabled_updated(p)))
-		return;
-
-	/*
-	 * Since this is always current we can get away with only locking
-	 * rq->lock, the ->cpus_allowed value can normally only be changed
-	 * while holding both p->pi_lock and rq->lock, but seeing that this
-	 * is current, we cannot actually be waking up, so all code that
-	 * relies on serialization against p->pi_lock is out of scope.
-	 *
-	 * Having rq->lock serializes us against things like
-	 * set_cpus_allowed_ptr() that can still happen concurrently.
-	 */
-	mask = tsk_cpus_allowed(p);
-
-	if (p->sched_class->set_cpus_allowed)
-		p->sched_class->set_cpus_allowed(p, mask);
-	/* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
-	p->nr_cpus_allowed = 1;
-
-	/* Let migrate_enable know to fix things back up */
-	p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
-}
-
-void migrate_disable(void)
-{
-	struct task_struct *p = current;
-
-	if (in_atomic()) {
-#ifdef CONFIG_SCHED_DEBUG
-		p->migrate_disable_atomic++;
-#endif
-		return;
-	}
-
-#ifdef CONFIG_SCHED_DEBUG
-	if (unlikely(p->migrate_disable_atomic)) {
-		tracing_off();
-		WARN_ON_ONCE(1);
-	}
-#endif
-
-	if (p->migrate_disable) {
-		p->migrate_disable++;
-		return;
-	}
-
-	preempt_disable();
-	preempt_lazy_disable();
-	pin_current_cpu();
-	p->migrate_disable = 1;
-	preempt_enable();
-}
-EXPORT_SYMBOL(migrate_disable);
-
-void migrate_enable(void)
-{
-	struct task_struct *p = current;
-	const struct cpumask *mask;
-	unsigned long flags;
-	struct rq *rq;
-
-	if (in_atomic()) {
-#ifdef CONFIG_SCHED_DEBUG
-		p->migrate_disable_atomic--;
-#endif
-		return;
-	}
-
-#ifdef CONFIG_SCHED_DEBUG
-	if (unlikely(p->migrate_disable_atomic)) {
-		tracing_off();
-		WARN_ON_ONCE(1);
-	}
-#endif
-	WARN_ON_ONCE(p->migrate_disable <= 0);
-
-	if (migrate_disable_count(p) > 1) {
-		p->migrate_disable--;
-		return;
-	}
-
-	preempt_disable();
-	if (unlikely(migrate_disabled_updated(p))) {
-		/*
-		 * Undo whatever update_migrate_disable() did, also see there
-		 * about locking.
-		 */
-		rq = this_rq();
-		raw_spin_lock_irqsave(&rq->lock, flags);
-
-		/*
-		 * Clearing migrate_disable causes tsk_cpus_allowed to
-		 * show the tasks original cpu affinity.
-		 */
-		p->migrate_disable = 0;
-		mask = tsk_cpus_allowed(p);
-		if (p->sched_class->set_cpus_allowed)
-			p->sched_class->set_cpus_allowed(p, mask);
-		p->nr_cpus_allowed = cpumask_weight(mask);
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		p->migrate_disable = 0;
-
-	unpin_current_cpu();
-	preempt_enable();
-	preempt_lazy_enable();
-}
-EXPORT_SYMBOL(migrate_enable);
-#else
-static inline void update_migrate_disable(struct task_struct *p) { }
-#define migrate_disabled_updated(p)		0
-#endif
-
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
 	if (prev->on_rq || rq->skip_clock_update < 0)
@@ -2601,8 +2398,6 @@ need_resched:
 	smp_mb__before_spinlock();
 	raw_spin_lock_irq(&rq->lock);
 
-	update_migrate_disable(prev);
-
 	switch_count = &prev->nivcsw;
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev))) {
@@ -2610,6 +2405,19 @@ need_resched:
 		} else {
 			deactivate_task(rq, prev, DEQUEUE_SLEEP);
 			prev->on_rq = 0;
+
+			/*
+			 * If a worker went to sleep, notify and ask workqueue
+			 * whether it wants to wake up a task to maintain
+			 * concurrency.
+			 */
+			if (prev->flags & PF_WQ_WORKER) {
+				struct task_struct *to_wakeup;
+
+				to_wakeup = wq_worker_sleeping(prev, cpu);
+				if (to_wakeup)
+					try_to_wake_up_local(to_wakeup);
+			}
 		}
 		switch_count = &prev->nvcsw;
 	}
@@ -2622,7 +2430,6 @@ need_resched:
 	put_prev_task(rq, prev);
 	next = pick_next_task(rq);
 	clear_tsk_need_resched(prev);
-	clear_tsk_need_resched_lazy(prev);
 	rq->skip_clock_update = 0;
 
 	if (likely(prev != next)) {
@@ -2653,14 +2460,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
 {
 	if (!tsk->state || tsk_is_pi_blocked(tsk))
 		return;
-
-	/*
-	 * If a worker went to sleep, notify and ask workqueue whether
-	 * it wants to wake up a task to maintain concurrency.
-	 */
-	if (tsk->flags & PF_WQ_WORKER)
-		wq_worker_sleeping(tsk);
-
 	/*
 	 * If we are going to sleep and we have plugged IO queued,
 	 * make sure to submit it to avoid deadlocks.
@@ -2669,19 +2468,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
 		blk_schedule_flush_plug(tsk);
 }
 
-static inline void sched_update_worker(struct task_struct *tsk)
-{
-	if (tsk->flags & PF_WQ_WORKER)
-		wq_worker_running(tsk);
-}
-
 asmlinkage void __sched schedule(void)
 {
 	struct task_struct *tsk = current;
 
 	sched_submit_work(tsk);
 	__schedule();
-	sched_update_worker(tsk);
 }
 EXPORT_SYMBOL(schedule);
 
@@ -2727,26 +2519,9 @@ asmlinkage void __sched notrace preempt_schedule(void)
 	if (likely(!preemptible()))
 		return;
 
-#ifdef CONFIG_PREEMPT_LAZY
-	/*
-	 * Check for lazy preemption
-	 */
-	if (current_thread_info()->preempt_lazy_count &&
-			!test_thread_flag(TIF_NEED_RESCHED))
-		return;
-#endif
 	do {
 		add_preempt_count_notrace(PREEMPT_ACTIVE);
-		/*
-		 * The add/subtract must not be traced by the function
-		 * tracer. But we still want to account for the
-		 * preempt off latency tracer. Since the _notrace versions
-		 * of add/subtract skip the accounting for latency tracer
-		 * we must force it manually.
-		 */
-		start_critical_timings();
 		__schedule();
-		stop_critical_timings();
 		sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
 		/*
@@ -2919,10 +2694,10 @@ void complete(struct completion *x)
 {
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&x->wait.lock, flags);
+	spin_lock_irqsave(&x->wait.lock, flags);
 	x->done++;
-	__swait_wake_locked(&x->wait, TASK_NORMAL, 1);
-	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
 }
 EXPORT_SYMBOL(complete);
 
@@ -2939,10 +2714,10 @@ void complete_all(struct completion *x)
 {
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&x->wait.lock, flags);
+	spin_lock_irqsave(&x->wait.lock, flags);
 	x->done += UINT_MAX/2;
-	__swait_wake_locked(&x->wait, TASK_NORMAL, 0);
-	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
 }
 EXPORT_SYMBOL(complete_all);
 
@@ -2951,20 +2726,20 @@ do_wait_for_common(struct completion *x,
 		   long (*action)(long), long timeout, int state)
 {
 	if (!x->done) {
-		DEFINE_SWAITER(wait);
+		DECLARE_WAITQUEUE(wait, current);
 
-		swait_prepare_locked(&x->wait, &wait);
+		__add_wait_queue_tail_exclusive(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
 				break;
 			}
 			__set_current_state(state);
-			raw_spin_unlock_irq(&x->wait.lock);
+			spin_unlock_irq(&x->wait.lock);
 			timeout = action(timeout);
-			raw_spin_lock_irq(&x->wait.lock);
+			spin_lock_irq(&x->wait.lock);
 		} while (!x->done && timeout);
-		swait_finish_locked(&x->wait, &wait);
+		__remove_wait_queue(&x->wait, &wait);
 		if (!x->done)
 			return timeout;
 	}
@@ -2978,9 +2753,9 @@ __wait_for_common(struct completion *x,
 {
 	might_sleep();
 
-	raw_spin_lock_irq(&x->wait.lock);
+	spin_lock_irq(&x->wait.lock);
 	timeout = do_wait_for_common(x, action, timeout, state);
-	raw_spin_unlock_irq(&x->wait.lock);
+	spin_unlock_irq(&x->wait.lock);
 	return timeout;
 }
 
@@ -3156,12 +2931,12 @@ bool try_wait_for_completion(struct completion *x)
 	unsigned long flags;
 	int ret = 1;
 
-	raw_spin_lock_irqsave(&x->wait.lock, flags);
+	spin_lock_irqsave(&x->wait.lock, flags);
 	if (!x->done)
 		ret = 0;
 	else
 		x->done--;
-	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(try_wait_for_completion);
@@ -3179,10 +2954,10 @@ bool completion_done(struct completion *x)
 	unsigned long flags;
 	int ret = 1;
 
-	raw_spin_lock_irqsave(&x->wait.lock, flags);
+	spin_lock_irqsave(&x->wait.lock, flags);
 	if (!x->done)
 		ret = 0;
-	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(completion_done);
@@ -3243,8 +3018,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
  * This function changes the 'effective' priority of a task. It does
  * not touch ->normal_prio like __setscheduler().
  *
- * Used by the rt_mutex code to implement priority inheritance
- * logic. Call site only calls if the priority of the task changed.
+ * Used by the rt_mutex code to implement priority inheritance logic.
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
@@ -3475,25 +3249,20 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 	return pid ? find_task_by_vpid(pid) : current;
 }
 
-static void __setscheduler_params(struct task_struct *p, int policy, int prio)
-{
-	p->policy = policy;
-	p->rt_priority = prio;
-	p->normal_prio = normal_prio(p);
-	set_load_weight(p);
-}
-
 /* Actually do priority change: must hold rq lock. */
 static void
 __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 {
-	__setscheduler_params(p, policy, prio);
+	p->policy = policy;
+	p->rt_priority = prio;
+	p->normal_prio = normal_prio(p);
 	/* we are holding p->pi_lock already */
 	p->prio = rt_mutex_getprio(p);
 	if (rt_prio(p->prio))
 		p->sched_class = &rt_sched_class;
 	else
 		p->sched_class = &fair_sched_class;
+	set_load_weight(p);
 }
 
 /*
@@ -3515,7 +3284,6 @@ static bool check_same_owner(struct task_struct *p)
 static int __sched_setscheduler(struct task_struct *p, int policy,
 				const struct sched_param *param, bool user)
 {
-	int newprio = MAX_RT_PRIO - 1 - param->sched_priority;
 	int retval, oldprio, oldpolicy = -1, on_rq, running;
 	unsigned long flags;
 	const struct sched_class *prev_class;
@@ -3611,13 +3379,10 @@ recheck:
 	}
 
 	/*
-	 * If not changing anything there's no need to proceed
-	 * further, but store a possible modification of
-	 * reset_on_fork.
+	 * If not changing anything there's no need to proceed further:
 	 */
 	if (unlikely(policy == p->policy && (!rt_policy(policy) ||
 			param->sched_priority == p->rt_priority))) {
-		p->sched_reset_on_fork = reset_on_fork;
 		task_rq_unlock(rq, p, &flags);
 		return 0;
 	}
@@ -3643,25 +3408,6 @@ recheck:
 		task_rq_unlock(rq, p, &flags);
 		goto recheck;
 	}
-
-	p->sched_reset_on_fork = reset_on_fork;
-	oldprio = p->prio;
-
-	/*
-	 * Special case for priority boosted tasks.
-	 *
-	 * If the new priority is lower or equal (user space view)
-	 * than the current (boosted) priority, we just store the new
-	 * normal parameters and do not touch the scheduler class and
-	 * the runqueue. This will be done when the task deboost
-	 * itself.
-	 */
-	if (rt_mutex_check_prio(p, newprio)) {
-		__setscheduler_params(p, policy, param->sched_priority);
-		task_rq_unlock(rq, p, &flags);
-		return 0;
-	}
-
 	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
@@ -3669,18 +3415,17 @@ recheck:
 	if (running)
 		p->sched_class->put_prev_task(rq, p);
 
+	p->sched_reset_on_fork = reset_on_fork;
+
+	oldprio = p->prio;
 	prev_class = p->sched_class;
 	__setscheduler(rq, p, policy, param->sched_priority);
 
 	if (running)
 		p->sched_class->set_curr_task(rq);
-	if (on_rq) {
-		/*
-		 * We enqueue to tail when the priority of a task is
-		 * increased (user space view).
-		 */
-		enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
-	}
+	if (on_rq)
+		enqueue_task(rq, p, 0);
+
 	check_class_changed(rq, p, prev_class, oldprio);
 	task_rq_unlock(rq, p, &flags);
 
@@ -4056,17 +3801,9 @@ static inline int should_resched(void)
 
 static void __cond_resched(void)
 {
-	do {
-		add_preempt_count(PREEMPT_ACTIVE);
-		__schedule();
-		sub_preempt_count(PREEMPT_ACTIVE);
-		/*
-		 * Check again in case we missed a preemption
-		 * opportunity between schedule and now.
-		 */
-		barrier();
-
-	} while (need_resched());
+	add_preempt_count(PREEMPT_ACTIVE);
+	__schedule();
+	sub_preempt_count(PREEMPT_ACTIVE);
 }
 
 int __sched _cond_resched(void)
@@ -4107,7 +3844,6 @@ int __cond_resched_lock(spinlock_t *lock)
 }
 EXPORT_SYMBOL(__cond_resched_lock);
 
-#ifndef CONFIG_PREEMPT_RT_FULL
 int __sched __cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
@@ -4121,7 +3857,6 @@ int __sched __cond_resched_softirq(void)
 	return 0;
 }
 EXPORT_SYMBOL(__cond_resched_softirq);
-#endif
 
 /**
  * yield - yield the current processor to other threads.
@@ -4471,7 +4206,6 @@ void init_idle(struct task_struct *idle, int cpu)
 	rcu_read_unlock();
 
 	rq->curr = rq->idle = idle;
-	idle->on_rq = 1;
 #if defined(CONFIG_SMP)
 	idle->on_cpu = 1;
 #endif
@@ -4479,9 +4213,7 @@ void init_idle(struct task_struct *idle, int cpu)
 
 	/* Set the preempt count _outside_ the spinlocks! */
 	task_thread_info(idle)->preempt_count = 0;
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
-	task_thread_info(idle)->preempt_lazy_count = 0;
-#endif
+
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
@@ -4496,90 +4228,11 @@ void init_idle(struct task_struct *idle, int cpu)
 #ifdef CONFIG_SMP
 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
-	if (!migrate_disabled_updated(p)) {
-		if (p->sched_class && p->sched_class->set_cpus_allowed)
-			p->sched_class->set_cpus_allowed(p, new_mask);
-		p->nr_cpus_allowed = cpumask_weight(new_mask);
-	}
-	cpumask_copy(&p->cpus_allowed, new_mask);
-}
-
-static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
-static DEFINE_MUTEX(sched_down_mutex);
-static cpumask_t sched_down_cpumask;
-
-void tell_sched_cpu_down_begin(int cpu)
-{
-	mutex_lock(&sched_down_mutex);
-	cpumask_set_cpu(cpu, &sched_down_cpumask);
-	mutex_unlock(&sched_down_mutex);
-}
-
-void tell_sched_cpu_down_done(int cpu)
-{
-	mutex_lock(&sched_down_mutex);
-	cpumask_clear_cpu(cpu, &sched_down_cpumask);
-	mutex_unlock(&sched_down_mutex);
-}
+	if (p->sched_class && p->sched_class->set_cpus_allowed)
+		p->sched_class->set_cpus_allowed(p, new_mask);
 
-/**
- * migrate_me - try to move the current task off this cpu
- *
- * Used by the pin_current_cpu() code to try to get tasks
- * to move off the current CPU as it is going down.
- * It will only move the task if the task isn't pinned to
- * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
- * and the task has to be in a RUNNING state. Otherwise the
- * movement of the task will wake it up (change its state
- * to running) when the task did not expect it.
- *
- * Returns 1 if it succeeded in moving the current task
- *         0 otherwise.
- */
-int migrate_me(void)
-{
-	struct task_struct *p = current;
-	struct migration_arg arg;
-	struct cpumask *cpumask;
-	struct cpumask *mask;
-	unsigned long flags;
-	unsigned int dest_cpu;
-	struct rq *rq;
-
-	/*
-	 * We can not migrate tasks bounded to a CPU or tasks not
-	 * running. The movement of the task will wake it up.
-	 */
-	if (p->flags & PF_NO_SETAFFINITY || p->state)
-		return 0;
-
-	mutex_lock(&sched_down_mutex);
-	rq = task_rq_lock(p, &flags);
-
-	cpumask = &__get_cpu_var(sched_cpumasks);
-	mask = &p->cpus_allowed;
-
-	cpumask_andnot(cpumask, mask, &sched_down_cpumask);
-
-	if (!cpumask_weight(cpumask)) {
-		/* It's only on this CPU? */
-		task_rq_unlock(rq, p, &flags);
-		mutex_unlock(&sched_down_mutex);
-		return 0;
-	}
-
-	dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
-
-	arg.task = p;
-	arg.dest_cpu = dest_cpu;
-
-	task_rq_unlock(rq, p, &flags);
-
-	stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-	tlb_migrate_finish(p->mm);
-	mutex_unlock(&sched_down_mutex);
-
-	return 1;
+	cpumask_copy(&p->cpus_allowed, new_mask);
+	p->nr_cpus_allowed = cpumask_weight(new_mask);
 }
 
 /*
@@ -4625,7 +4278,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	do_set_cpus_allowed(p, new_mask);
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
+	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
 	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
@@ -4714,8 +4367,6 @@ static int migration_cpu_stop(void *data)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
-
 /*
  * Ensures that the idle task is using init_mm right before its cpu goes
  * offline.
@@ -4728,12 +4379,7 @@ void idle_task_exit(void)
 
 	if (mm != &init_mm)
 		switch_mm(mm, &init_mm, current);
-
-	/*
-	 * Defer the cleanup to an alive cpu. On RT we can neither
-	 * call mmdrop() nor mmdrop_delayed() from here.
-	 */
-	per_cpu(idle_last_mm, smp_processor_id()) = mm;
+	mmdrop(mm);
 }
 
 /*
@@ -5057,10 +4703,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 	case CPU_DEAD:
 		calc_load_migrate(rq);
-		if (per_cpu(idle_last_mm, cpu)) {
-			mmdrop(per_cpu(idle_last_mm, cpu));
-			per_cpu(idle_last_mm, cpu) = NULL;
-		}
 		break;
 #endif
 	}
@@ -6933,8 +6575,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-	int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
-		sched_rcu_preempt_depth();
+	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
 
 	return (nested == preempt_offset);
 }
@@ -6944,8 +6585,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
-	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-	     !is_idle_task(current)) ||
+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
 	    system_state != SYSTEM_RUNNING || oops_in_progress)
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
@@ -6963,13 +6603,6 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 	debug_show_held_locks(current);
 	if (irqs_disabled())
 		print_irqtrace_events(current);
-#ifdef CONFIG_DEBUG_PREEMPT
-	if (!preempt_count_equals(preempt_offset)) {
-		pr_err("Preemption disabled at:");
-		print_ip_sym(current->preempt_disable_ip);
-		pr_cont("\n");
-	}
-#endif
 	dump_stack();
 }
 EXPORT_SYMBOL(__might_sleep);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1681f49..9994791 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -655,45 +655,37 @@ static void __vtime_account_system(struct task_struct *tsk)
 
 void vtime_account_system(struct task_struct *tsk)
 {
-	raw_spin_lock(&tsk->vtime_lock);
-	write_seqcount_begin(&tsk->vtime_seq);
+	write_seqlock(&tsk->vtime_seqlock);
 	__vtime_account_system(tsk);
-	write_seqcount_end(&tsk->vtime_seq);
-	raw_spin_unlock(&tsk->vtime_lock);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_gen_account_irq_exit(struct task_struct *tsk)
 {
-	raw_spin_lock(&tsk->vtime_lock);
-	write_seqcount_begin(&tsk->vtime_seq);
+	write_seqlock(&tsk->vtime_seqlock);
 	__vtime_account_system(tsk);
 	if (context_tracking_in_user())
 		tsk->vtime_snap_whence = VTIME_USER;
-	write_seqcount_end(&tsk->vtime_seq);
-	raw_spin_unlock(&tsk->vtime_lock);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t delta_cpu;
 
-	raw_spin_lock(&tsk->vtime_lock);
-	write_seqcount_begin(&tsk->vtime_seq);
+	write_seqlock(&tsk->vtime_seqlock);
 	delta_cpu = get_vtime_delta(tsk);
 	tsk->vtime_snap_whence = VTIME_SYS;
 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
-	write_seqcount_end(&tsk->vtime_seq);
-	raw_spin_unlock(&tsk->vtime_lock);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_user_enter(struct task_struct *tsk)
 {
-	raw_spin_lock(&tsk->vtime_lock);
-	write_seqcount_begin(&tsk->vtime_seq);
+	write_seqlock(&tsk->vtime_seqlock);
 	__vtime_account_system(tsk);
 	tsk->vtime_snap_whence = VTIME_USER;
-	write_seqcount_end(&tsk->vtime_seq);
-	raw_spin_unlock(&tsk->vtime_lock);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_guest_enter(struct task_struct *tsk)
@@ -705,23 +697,19 @@ void vtime_guest_enter(struct task_struct *tsk)
 	 * synchronization against the reader (task_gtime())
 	 * that can thus safely catch up with a tickless delta.
 	 */
-	raw_spin_lock(&tsk->vtime_lock);
-	write_seqcount_begin(&tsk->vtime_seq);
+	write_seqlock(&tsk->vtime_seqlock);
 	__vtime_account_system(tsk);
 	current->flags |= PF_VCPU;
-	write_seqcount_end(&tsk->vtime_seq);
-	raw_spin_unlock(&tsk->vtime_lock);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 EXPORT_SYMBOL_GPL(vtime_guest_enter);
 
 void vtime_guest_exit(struct task_struct *tsk)
 {
-	raw_spin_lock(&tsk->vtime_lock);
-	write_seqcount_begin(&tsk->vtime_seq);
+	write_seqlock(&tsk->vtime_seqlock);
 	__vtime_account_system(tsk);
 	current->flags &= ~PF_VCPU;
-	write_seqcount_end(&tsk->vtime_seq);
-	raw_spin_unlock(&tsk->vtime_lock);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 EXPORT_SYMBOL_GPL(vtime_guest_exit);
 
@@ -734,30 +722,24 @@ void vtime_account_idle(struct task_struct *tsk)
 
 void arch_vtime_task_switch(struct task_struct *prev)
 {
-	raw_spin_lock(&prev->vtime_lock);
-	write_seqcount_begin(&prev->vtime_seq);
+	write_seqlock(&prev->vtime_seqlock);
 	prev->vtime_snap_whence = VTIME_SLEEPING;
-	write_seqcount_end(&prev->vtime_seq);
-	raw_spin_unlock(&prev->vtime_lock);
+	write_sequnlock(&prev->vtime_seqlock);
 
-	raw_spin_lock(&current->vtime_lock);
-	write_seqcount_begin(&current->vtime_seq);
+	write_seqlock(&current->vtime_seqlock);
 	current->vtime_snap_whence = VTIME_SYS;
 	current->vtime_snap = sched_clock_cpu(smp_processor_id());
-	write_seqcount_end(&current->vtime_seq);
-	raw_spin_unlock(&current->vtime_lock);
+	write_sequnlock(&current->vtime_seqlock);
 }
 
 void vtime_init_idle(struct task_struct *t, int cpu)
 {
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&t->vtime_lock, flags);
-	write_seqcount_begin(&t->vtime_seq);
+	write_seqlock_irqsave(&t->vtime_seqlock, flags);
 	t->vtime_snap_whence = VTIME_SYS;
 	t->vtime_snap = sched_clock_cpu(cpu);
-	write_seqcount_end(&t->vtime_seq);
-	raw_spin_unlock_irqrestore(&t->vtime_lock, flags);
+	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
 }
 
 cputime_t task_gtime(struct task_struct *t)
@@ -766,13 +748,13 @@ cputime_t task_gtime(struct task_struct *t)
 	cputime_t gtime;
 
 	do {
-		seq = read_seqcount_begin(&t->vtime_seq);
+		seq = read_seqbegin(&t->vtime_seqlock);
 
 		gtime = t->gtime;
 		if (t->flags & PF_VCPU)
 			gtime += vtime_delta(t);
 
-	} while (read_seqcount_retry(&t->vtime_seq, seq));
+	} while (read_seqretry(&t->vtime_seqlock, seq));
 
 	return gtime;
 }
@@ -795,7 +777,7 @@ fetch_task_cputime(struct task_struct *t,
 		*udelta = 0;
 		*sdelta = 0;
 
-		seq = read_seqcount_begin(&t->vtime_seq);
+		seq = read_seqbegin(&t->vtime_seqlock);
 
 		if (u_dst)
 			*u_dst = *u_src;
@@ -819,7 +801,7 @@ fetch_task_cputime(struct task_struct *t,
 			if (t->vtime_snap_whence == VTIME_SYS)
 				*sdelta = delta;
 		}
-	} while (read_seqcount_retry(&t->vtime_seq, seq));
+	} while (read_seqretry(&t->vtime_seqlock, seq));
 }
 
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 70812af..fd9ca1d 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -256,9 +256,6 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 	P(rt_throttled);
 	PN(rt_time);
 	PN(rt_runtime);
-#ifdef CONFIG_SMP
-	P(rt_nr_migratory);
-#endif
 
 #undef PN
 #undef P
@@ -588,10 +585,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #endif
 	P(policy);
 	P(prio);
-#ifdef CONFIG_PREEMPT_RT_FULL
-	P(migrate_disable);
-#endif
-	P(nr_cpus_allowed);
 #undef PN
 #undef __PN
 #undef P
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0af1448..790e2fc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1902,7 +1902,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	ideal_runtime = sched_slice(cfs_rq, curr);
 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
 	if (delta_exec > ideal_runtime) {
-		resched_task_lazy(rq_of(cfs_rq)->curr);
+		resched_task(rq_of(cfs_rq)->curr);
 		/*
 		 * The current task ran long enough, ensure it doesn't get
 		 * re-elected due to buddy favours.
@@ -1926,7 +1926,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 		return;
 
 	if (delta > ideal_runtime)
-		resched_task_lazy(rq_of(cfs_rq)->curr);
+		resched_task(rq_of(cfs_rq)->curr);
 }
 
 static void
@@ -2047,7 +2047,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	 * validating it and just reschedule.
 	 */
 	if (queued) {
-		resched_task_lazy(rq_of(cfs_rq)->curr);
+		resched_task(rq_of(cfs_rq)->curr);
 		return;
 	}
 	/*
@@ -2237,7 +2237,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
 	 * hierarchy can be throttled
 	 */
 	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
-		resched_task_lazy(rq_of(cfs_rq)->curr);
+		resched_task(rq_of(cfs_rq)->curr);
 }
 
 static __always_inline
@@ -2837,7 +2837,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 
 		if (delta < 0) {
 			if (rq->curr == p)
-				resched_task_lazy(p);
+				resched_task(p);
 			return;
 		}
 
@@ -3704,7 +3704,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	return;
 
 preempt:
-	resched_task_lazy(curr);
+	resched_task(curr);
 	/*
 	 * Only set the backward buddy when the current task is still
 	 * on the rq. This can happen when a wakeup gets interleaved
@@ -5979,7 +5979,7 @@ static void task_fork_fair(struct task_struct *p)
 		 * 'current' within the tree based on its new key value.
 		 */
 		swap(curr->vruntime, se->vruntime);
-		resched_task_lazy(rq->curr);
+		resched_task(rq->curr);
 	}
 
 	se->vruntime -= cfs_rq->min_vruntime;
@@ -6004,7 +6004,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
 	 */
 	if (rq->curr == p) {
 		if (p->prio > oldprio)
-			resched_task_lazy(rq->curr);
+			resched_task(rq->curr);
 	} else
 		check_preempt_curr(rq, p, 0);
 }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 4594051..99399f8 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -50,18 +50,11 @@ SCHED_FEAT(LB_BIAS, true)
  */
 SCHED_FEAT(NONTASK_POWER, true)
 
-#ifndef CONFIG_PREEMPT_RT_FULL
 /*
  * Queue remote wakeups on the target CPU and process them
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  */
 SCHED_FEAT(TTWU_QUEUE, true)
-#else
-SCHED_FEAT(TTWU_QUEUE, false)
-# ifdef CONFIG_PREEMPT_LAZY
-SCHED_FEAT(PREEMPT_LAZY, true)
-# endif
-#endif
 
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 240fc60..ff04e1a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -43,7 +43,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 
 	hrtimer_init(&rt_b->rt_period_timer,
 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	rt_b->rt_period_timer.irqsafe = 1;
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2843303..4f31059 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -898,7 +898,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 #define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
 #define WF_FORK		0x02		/* child wakeup after fork */
 #define WF_MIGRATED	0x4		/* internal use, task got migrated */
-#define WF_LOCK_SLEEPER	0x08		/* wakeup spinlock "sleeper" */
 
 /*
  * To aid in avoiding the subversion of "niceness" due to uneven distribution
@@ -1046,15 +1045,6 @@ extern void init_sched_fair_class(void);
 extern void resched_task(struct task_struct *p);
 extern void resched_cpu(int cpu);
 
-#ifdef CONFIG_PREEMPT_LAZY
-extern void resched_task_lazy(struct task_struct *tsk);
-#else
-static inline void resched_task_lazy(struct task_struct *tsk)
-{
-	resched_task(tsk);
-}
-#endif
-
 extern struct rt_bandwidth def_rt_bandwidth;
 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);