summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2014-05-14 18:19:12 (GMT)
committerScott Wood <scottwood@freescale.com>2014-05-14 18:37:18 (GMT)
commit86ba38e6f5f2fbfe9b49e153ea89593b26482019 (patch)
treef99d2906b0eafca507f37289e68052fc105cc2dc /kernel
parent07c8b57b111585a617b2b456497fc9b33c00743c (diff)
downloadlinux-fsl-qoriq-86ba38e6f5f2fbfe9b49e153ea89593b26482019.tar.xz
Reset to 3.12.19
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks2
-rw-r--r--kernel/Kconfig.preempt33
-rw-r--r--kernel/Makefile11
-rw-r--r--kernel/cgroup.c26
-rw-r--r--kernel/cpu.c328
-rw-r--r--kernel/debug/kdb/kdb_io.c6
-rw-r--r--kernel/events/core.c1
-rw-r--r--kernel/exit.c23
-rw-r--r--kernel/fork.c42
-rw-r--r--kernel/futex.c96
-rw-r--r--kernel/hrtimer.c370
-rw-r--r--kernel/irq/handle.c8
-rw-r--r--kernel/irq/manage.c100
-rw-r--r--kernel/irq/settings.h12
-rw-r--r--kernel/irq/spurious.c8
-rw-r--r--kernel/irq_work.c27
-rw-r--r--kernel/itimer.c1
-rw-r--r--kernel/ksysfs.c12
-rw-r--r--kernel/lglock.c54
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/pid_namespace.c4
-rw-r--r--kernel/posix-cpu-timers.c198
-rw-r--r--kernel/posix-timers.c37
-rw-r--r--kernel/power/hibernate.c7
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/printk/printk.c148
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/rcupdate.c2
-rw-r--r--kernel/rcutiny.c2
-rw-r--r--kernel/rcutree.c152
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rcutree_plugin.h178
-rw-r--r--kernel/relay.c14
-rw-r--r--kernel/res_counter.c8
-rw-r--r--kernel/rt.c452
-rw-r--r--kernel/rtmutex.c743
-rw-r--r--kernel/rtmutex_common.h12
-rw-r--r--kernel/sched/core.c569
-rw-r--r--kernel/sched/cputime.c62
-rw-r--r--kernel/sched/debug.c7
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/features.h7
-rw-r--r--kernel/sched/rt.c1
-rw-r--r--kernel/sched/sched.h10
-rw-r--r--kernel/signal.c135
-rw-r--r--kernel/softirq.c742
-rw-r--r--kernel/spinlock.c7
-rw-r--r--kernel/stop_machine.c93
-rw-r--r--kernel/time/jiffies.c7
-rw-r--r--kernel/time/ntp.c40
-rw-r--r--kernel/time/tick-common.c10
-rw-r--r--kernel/time/tick-internal.h3
-rw-r--r--kernel/time/tick-sched.c27
-rw-r--r--kernel/time/timekeeping.c6
-rw-r--r--kernel/timer.c141
-rw-r--r--kernel/trace/Kconfig104
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/latency_hist.c1178
-rw-r--r--kernel/trace/trace.c44
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events.c8
-rw-r--r--kernel/trace/trace_export.c7
-rw-r--r--kernel/trace/trace_irqsoff.c11
-rw-r--r--kernel/trace/trace_output.c18
-rw-r--r--kernel/user.c4
-rw-r--r--kernel/user_namespace.c11
-rw-r--r--kernel/wait-simple.c115
-rw-r--r--kernel/watchdog.c16
-rw-r--r--kernel/workqueue.c174
-rw-r--r--kernel/workqueue_internal.h5
71 files changed, 869 insertions, 5858 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 8bb92eb..d2b32ac 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -222,4 +222,4 @@ endif
config MUTEX_SPIN_ON_OWNER
def_bool y
- depends on SMP && !DEBUG_MUTEXES && !PREEMPT_RT_FULL
+ depends on SMP && !DEBUG_MUTEXES
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 11dbe26..3f9c974 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,16 +1,3 @@
-config PREEMPT
- bool
- select PREEMPT_COUNT
-
-config PREEMPT_RT_BASE
- bool
- select PREEMPT
-
-config HAVE_PREEMPT_LAZY
- bool
-
-config PREEMPT_LAZY
- def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
choice
prompt "Preemption Model"
@@ -46,9 +33,9 @@ config PREEMPT_VOLUNTARY
Select this if you are building a kernel for a desktop system.
-config PREEMPT__LL
+config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
- select PREEMPT
+ select PREEMPT_COUNT
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
This option reduces the latency of the kernel by making
@@ -65,22 +52,6 @@ config PREEMPT__LL
embedded system with latency requirements in the milliseconds
range.
-config PREEMPT_RTB
- bool "Preemptible Kernel (Basic RT)"
- select PREEMPT_RT_BASE
- help
- This option is basically the same as (Low-Latency Desktop) but
- enables changes which are preliminary for the full preemptible
- RT kernel.
-
-config PREEMPT_RT_FULL
- bool "Fully Preemptible Kernel (RT)"
- depends on IRQ_FORCED_THREADING
- select PREEMPT_RT_BASE
- select PREEMPT_RCU
- help
- All and everything
-
endchoice
config PREEMPT_COUNT
diff --git a/kernel/Makefile b/kernel/Makefile
index b3ff0a8..1ce4755 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,10 +7,10 @@ obj-y = fork.o exec_domain.o panic.o \
sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
rcupdate.o extable.o params.o posix-timers.o \
- kthread.o wait.o sys_ni.o posix-cpu-timers.o \
- hrtimer.o nsproxy.o srcu.o semaphore.o \
+ kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \
+ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o groups.o lglock.o smpboot.o wait-simple.o
+ async.o range.o groups.o lglock.o smpboot.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
@@ -33,11 +33,7 @@ obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
-ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
-obj-y += mutex.o
obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
-obj-y += rwsem.o
-endif
obj-$(CONFIG_LOCKDEP) += lockdep.o
ifeq ($(CONFIG_PROC_FS),y)
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
@@ -49,7 +45,6 @@ endif
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
-obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += smp.o
ifneq ($(CONFIG_SMP),y)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c4f8bc79..1c204fd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4410,16 +4410,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
rcu_assign_pointer(cgrp->name, name);
/*
- * Temporarily set the pointer to NULL, so idr_find() won't return
- * a half-baked cgroup.
- */
- cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
- if (cgrp->id < 0) {
- err = -ENOMEM;
- goto err_free_name;
- }
-
- /*
* Only live parents can have children. Note that the liveliness
* check isn't strictly necessary because cgroup_mkdir() and
* cgroup_rmdir() are fully synchronized by i_mutex; however, do it
@@ -4428,7 +4418,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
*/
if (!cgroup_lock_live_group(parent)) {
err = -ENODEV;
- goto err_free_id;
+ goto err_free_name;
}
/* Grab a reference on the superblock so the hierarchy doesn't
@@ -4438,6 +4428,16 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
* fs */
atomic_inc(&sb->s_active);
+ /*
+ * Temporarily set the pointer to NULL, so idr_find() won't return
+ * a half-baked cgroup.
+ */
+ cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+ if (cgrp->id < 0) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
+
init_cgroup_housekeeping(cgrp);
dentry->d_fsdata = cgrp;
@@ -4544,11 +4544,11 @@ err_free_all:
ss->css_free(css);
}
}
+ idr_remove(&root->cgroup_idr, cgrp->id);
+err_unlock:
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
-err_free_id:
- idr_remove(&root->cgroup_idr, cgrp->id);
err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
err_free_cgrp:
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ba7416b..d7f07a2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -63,290 +63,6 @@ static struct {
.refcount = 0,
};
-/**
- * hotplug_pcp - per cpu hotplug descriptor
- * @unplug: set when pin_current_cpu() needs to sync tasks
- * @sync_tsk: the task that waits for tasks to finish pinned sections
- * @refcount: counter of tasks in pinned sections
- * @grab_lock: set when the tasks entering pinned sections should wait
- * @synced: notifier for @sync_tsk to tell cpu_down it's finished
- * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
- * @mutex_init: zero if the mutex hasn't been initialized yet.
- *
- * Although @unplug and @sync_tsk may point to the same task, the @unplug
- * is used as a flag and still exists after @sync_tsk has exited and
- * @sync_tsk set to NULL.
- */
-struct hotplug_pcp {
- struct task_struct *unplug;
- struct task_struct *sync_tsk;
- int refcount;
- int grab_lock;
- struct completion synced;
- struct completion unplug_wait;
-#ifdef CONFIG_PREEMPT_RT_FULL
- /*
- * Note, on PREEMPT_RT, the hotplug lock must save the state of
- * the task, otherwise the mutex will cause the task to fail
- * to sleep when required. (Because it's called from migrate_disable())
- *
- * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
- * state.
- */
- spinlock_t lock;
-#else
- struct mutex mutex;
-#endif
- int mutex_init;
-};
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
-# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
-#else
-# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
-# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
-#endif
-
-static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
-
-/**
- * pin_current_cpu - Prevent the current cpu from being unplugged
- *
- * Lightweight version of get_online_cpus() to prevent cpu from being
- * unplugged when code runs in a migration disabled region.
- *
- * Must be called with preemption disabled (preempt_count = 1)!
- */
-void pin_current_cpu(void)
-{
- struct hotplug_pcp *hp;
- int force = 0;
-
-retry:
- hp = &__get_cpu_var(hotplug_pcp);
-
- if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
- hp->unplug == current) {
- hp->refcount++;
- return;
- }
- if (hp->grab_lock) {
- preempt_enable();
- hotplug_lock(hp);
- hotplug_unlock(hp);
- } else {
- preempt_enable();
- /*
- * Try to push this task off of this CPU.
- */
- if (!migrate_me()) {
- preempt_disable();
- hp = &__get_cpu_var(hotplug_pcp);
- if (!hp->grab_lock) {
- /*
- * Just let it continue it's already pinned
- * or about to sleep.
- */
- force = 1;
- goto retry;
- }
- preempt_enable();
- }
- }
- preempt_disable();
- goto retry;
-}
-
-/**
- * unpin_current_cpu - Allow unplug of current cpu
- *
- * Must be called with preemption or interrupts disabled!
- */
-void unpin_current_cpu(void)
-{
- struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
-
- WARN_ON(hp->refcount <= 0);
-
- /* This is safe. sync_unplug_thread is pinned to this cpu */
- if (!--hp->refcount && hp->unplug && hp->unplug != current)
- wake_up_process(hp->unplug);
-}
-
-static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (hp->refcount) {
- schedule_preempt_disabled();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-}
-
-static int sync_unplug_thread(void *data)
-{
- struct hotplug_pcp *hp = data;
-
- wait_for_completion(&hp->unplug_wait);
- preempt_disable();
- hp->unplug = current;
- wait_for_pinned_cpus(hp);
-
- /*
- * This thread will synchronize the cpu_down() with threads
- * that have pinned the CPU. When the pinned CPU count reaches
- * zero, we inform the cpu_down code to continue to the next step.
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
- preempt_enable();
- complete(&hp->synced);
-
- /*
- * If all succeeds, the next step will need tasks to wait till
- * the CPU is offline before continuing. To do this, the grab_lock
- * is set and tasks going into pin_current_cpu() will block on the
- * mutex. But we still need to wait for those that are already in
- * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
- * will kick this thread out.
- */
- while (!hp->grab_lock && !kthread_should_stop()) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- /* Make sure grab_lock is seen before we see a stale completion */
- smp_mb();
-
- /*
- * Now just before cpu_down() enters stop machine, we need to make
- * sure all tasks that are in pinned CPU sections are out, and new
- * tasks will now grab the lock, keeping them from entering pinned
- * CPU sections.
- */
- if (!kthread_should_stop()) {
- preempt_disable();
- wait_for_pinned_cpus(hp);
- preempt_enable();
- complete(&hp->synced);
- }
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (!kthread_should_stop()) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
- set_current_state(TASK_RUNNING);
-
- /*
- * Force this thread off this CPU as it's going down and
- * we don't want any more work on this CPU.
- */
- current->flags &= ~PF_NO_SETAFFINITY;
- do_set_cpus_allowed(current, cpu_present_mask);
- migrate_me();
- return 0;
-}
-
-static void __cpu_unplug_sync(struct hotplug_pcp *hp)
-{
- wake_up_process(hp->sync_tsk);
- wait_for_completion(&hp->synced);
-}
-
-static void __cpu_unplug_wait(unsigned int cpu)
-{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
- complete(&hp->unplug_wait);
- wait_for_completion(&hp->synced);
-}
-
-/*
- * Start the sync_unplug_thread on the target cpu and wait for it to
- * complete.
- */
-static int cpu_unplug_begin(unsigned int cpu)
-{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
- int err;
-
- /* Protected by cpu_hotplug.lock */
- if (!hp->mutex_init) {
-#ifdef CONFIG_PREEMPT_RT_FULL
- spin_lock_init(&hp->lock);
-#else
- mutex_init(&hp->mutex);
-#endif
- hp->mutex_init = 1;
- }
-
- /* Inform the scheduler to migrate tasks off this CPU */
- tell_sched_cpu_down_begin(cpu);
-
- init_completion(&hp->synced);
- init_completion(&hp->unplug_wait);
-
- hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
- if (IS_ERR(hp->sync_tsk)) {
- err = PTR_ERR(hp->sync_tsk);
- hp->sync_tsk = NULL;
- return err;
- }
- kthread_bind(hp->sync_tsk, cpu);
-
- /*
- * Wait for tasks to get out of the pinned sections,
- * it's still OK if new tasks enter. Some CPU notifiers will
- * wait for tasks that are going to enter these sections and
- * we must not have them block.
- */
- wake_up_process(hp->sync_tsk);
- return 0;
-}
-
-static void cpu_unplug_sync(unsigned int cpu)
-{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
- init_completion(&hp->synced);
- /* The completion needs to be initialzied before setting grab_lock */
- smp_wmb();
-
- /* Grab the mutex before setting grab_lock */
- hotplug_lock(hp);
- hp->grab_lock = 1;
-
- /*
- * The CPU notifiers have been completed.
- * Wait for tasks to get out of pinned CPU sections and have new
- * tasks block until the CPU is completely down.
- */
- __cpu_unplug_sync(hp);
-
- /* All done with the sync thread */
- kthread_stop(hp->sync_tsk);
- hp->sync_tsk = NULL;
-}
-
-static void cpu_unplug_done(unsigned int cpu)
-{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
- hp->unplug = NULL;
- /* Let all tasks know cpu unplug is finished before cleaning up */
- smp_wmb();
-
- if (hp->sync_tsk)
- kthread_stop(hp->sync_tsk);
-
- if (hp->grab_lock) {
- hotplug_unlock(hp);
- /* protected by cpu_hotplug.lock */
- hp->grab_lock = 0;
- }
- tell_sched_cpu_down_done(cpu);
-}
-
void get_online_cpus(void)
{
might_sleep();
@@ -363,14 +79,15 @@ void put_online_cpus(void)
{
if (cpu_hotplug.active_writer == current)
return;
-
mutex_lock(&cpu_hotplug.lock);
+
if (WARN_ON(!cpu_hotplug.refcount))
cpu_hotplug.refcount++; /* try to fix things up */
if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
wake_up_process(cpu_hotplug.active_writer);
mutex_unlock(&cpu_hotplug.lock);
+
}
EXPORT_SYMBOL_GPL(put_online_cpus);
@@ -565,15 +282,13 @@ static int __ref take_cpu_down(void *_param)
/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
- int mycpu, err, nr_calls = 0;
+ int err, nr_calls = 0;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
.mod = mod,
.hcpu = hcpu,
};
- cpumask_var_t cpumask;
- cpumask_var_t cpumask_org;
if (num_online_cpus() == 1)
return -EBUSY;
@@ -581,34 +296,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (!cpu_online(cpu))
return -EINVAL;
- /* Move the downtaker off the unplug cpu */
- if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
- return -ENOMEM;
- if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
- free_cpumask_var(cpumask);
- return -ENOMEM;
- }
-
- cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
- cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
- set_cpus_allowed_ptr(current, cpumask);
- free_cpumask_var(cpumask);
- migrate_disable();
- mycpu = smp_processor_id();
- if (mycpu == cpu) {
- printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
- migrate_enable();
- err = -EBUSY;
- goto restore_cpus;
- }
- migrate_enable();
-
cpu_hotplug_begin();
- err = cpu_unplug_begin(cpu);
- if (err) {
- printk("cpu_unplug_begin(%d) failed\n", cpu);
- goto out_cancel;
- }
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
@@ -618,13 +306,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
__func__, cpu);
goto out_release;
}
-
- __cpu_unplug_wait(cpu);
smpboot_park_threads(cpu);
- /* Notifiers are done. Don't let any more tasks pin this CPU. */
- cpu_unplug_sync(cpu);
-
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
@@ -653,14 +336,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu);
out_release:
- cpu_unplug_done(cpu);
-out_cancel:
cpu_hotplug_done();
if (!err)
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
-restore_cpus:
- set_cpus_allowed_ptr(current, cpumask_org);
- free_cpumask_var(cpumask_org);
return err;
}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 399dba6..14ff484 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -554,6 +554,7 @@ int vkdb_printf(const char *fmt, va_list ap)
int linecount;
int colcount;
int logging, saved_loglevel = 0;
+ int saved_trap_printk;
int got_printf_lock = 0;
int retlen = 0;
int fnd, len;
@@ -564,6 +565,8 @@ int vkdb_printf(const char *fmt, va_list ap)
unsigned long uninitialized_var(flags);
preempt_disable();
+ saved_trap_printk = kdb_trap_printk;
+ kdb_trap_printk = 0;
/* Serialize kdb_printf if multiple cpus try to write at once.
* But if any cpu goes recursive in kdb, just print the output,
@@ -830,6 +833,7 @@ kdb_print_out:
} else {
__release(kdb_printf_lock);
}
+ kdb_trap_printk = saved_trap_printk;
preempt_enable();
return retlen;
}
@@ -839,11 +843,9 @@ int kdb_printf(const char *fmt, ...)
va_list ap;
int r;
- kdb_trap_printk++;
va_start(ap, fmt);
r = vkdb_printf(fmt, ap);
va_end(ap);
- kdb_trap_printk--;
return r;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 420de7f..fea4f6c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6029,7 +6029,6 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hwc->hrtimer.function = perf_swevent_hrtimer;
- hwc->hrtimer.irqsafe = 1;
/*
* Since hrtimers have a fixed rate, we can do a static freq->period
diff --git a/kernel/exit.c b/kernel/exit.c
index 7493b32..dcde2c4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -145,7 +145,7 @@ static void __exit_signal(struct task_struct *tsk)
* Do this under ->siglock, we can race with another thread
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
*/
- flush_task_sigqueue(tsk);
+ flush_sigqueue(&tsk->pending);
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
@@ -559,9 +559,6 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
struct list_head *dead)
{
list_move_tail(&p->sibling, &p->real_parent->children);
-
- if (p->exit_state == EXIT_DEAD)
- return;
/*
* If this is a threaded reparent there is no need to
* notify anyone anything has happened.
@@ -569,9 +566,19 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
if (same_thread_group(p->real_parent, father))
return;
- /* We don't want people slaying init. */
+ /*
+ * We don't want people slaying init.
+ *
+ * Note: we do this even if it is EXIT_DEAD, wait_task_zombie()
+ * can change ->exit_state to EXIT_ZOMBIE. If this is the final
+ * state, do_notify_parent() was already called and ->exit_signal
+ * doesn't matter.
+ */
p->exit_signal = SIGCHLD;
+ if (p->exit_state == EXIT_DEAD)
+ return;
+
/* If it has exited notify the new parent about this child's death. */
if (!p->ptrace &&
p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
@@ -783,6 +790,8 @@ void do_exit(long code)
exit_shm(tsk);
exit_files(tsk);
exit_fs(tsk);
+ if (group_dead)
+ disassociate_ctty(1);
exit_task_namespaces(tsk);
exit_task_work(tsk);
check_stack_usage();
@@ -798,13 +807,9 @@ void do_exit(long code)
cgroup_exit(tsk, 1);
- if (group_dead)
- disassociate_ctty(1);
-
module_put(task_thread_info(tsk)->exec_domain->module);
proc_exit_connector(tsk);
-
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index ae9a1a4..458953c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,7 +94,7 @@ int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
-DEFINE_RWLOCK(tasklist_lock); /* outer */
+__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
#ifdef CONFIG_PROVE_RCU
int lockdep_tasklist_lock_is_held(void)
@@ -230,9 +230,7 @@ static inline void put_signal_struct(struct signal_struct *sig)
if (atomic_dec_and_test(&sig->sigcnt))
free_signal_struct(sig);
}
-#ifdef CONFIG_PREEMPT_RT_BASE
-static
-#endif
+
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
@@ -247,18 +245,7 @@ void __put_task_struct(struct task_struct *tsk)
if (!profile_handoff_task(tsk))
free_task(tsk);
}
-#ifndef CONFIG_PREEMPT_RT_BASE
EXPORT_SYMBOL_GPL(__put_task_struct);
-#else
-void __put_task_struct_cb(struct rcu_head *rhp)
-{
- struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
-
- __put_task_struct(tsk);
-
-}
-EXPORT_SYMBOL_GPL(__put_task_struct_cb);
-#endif
void __init __weak arch_task_cache_init(void) { }
@@ -611,19 +598,6 @@ void __mmdrop(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(__mmdrop);
-#ifdef CONFIG_PREEMPT_RT_BASE
-/*
- * RCU callback for delayed mm drop. Not strictly rcu, but we don't
- * want another facility to make this work.
- */
-void __mmdrop_delayed(struct rcu_head *rhp)
-{
- struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
-
- __mmdrop(mm);
-}
-#endif
-
/*
* Decrement the use count and release all resources for an mm.
*/
@@ -1133,9 +1107,6 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
*/
static void posix_cpu_timers_init(struct task_struct *tsk)
{
-#ifdef CONFIG_PREEMPT_RT_BASE
- tsk->posix_timer_list = NULL;
-#endif
tsk->cputime_expires.prof_exp = 0;
tsk->cputime_expires.virt_exp = 0;
tsk->cputime_expires.sched_exp = 0;
@@ -1264,7 +1235,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_lock_init(&p->alloc_lock);
init_sigpending(&p->pending);
- p->sigqueue_cache = NULL;
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
@@ -1272,8 +1242,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- raw_spin_lock_init(&p->vtime_lock);
- seqcount_init(&p->vtime_seq);
+ seqlock_init(&p->vtime_seqlock);
p->vtime_snap = 0;
p->vtime_snap_whence = VTIME_SLEEPING;
#endif
@@ -1326,9 +1295,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->hardirq_context = 0;
p->softirq_context = 0;
#endif
-#ifdef CONFIG_PREEMPT_RT_FULL
- p->pagefault_disabled = 0;
-#endif
#ifdef CONFIG_LOCKDEP
p->lockdep_depth = 0; /* no locks held yet */
p->curr_chain_key = 0;
@@ -1690,7 +1656,7 @@ SYSCALL_DEFINE0(fork)
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
0, NULL, NULL);
}
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 3b85a95..d8347b7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -68,7 +68,9 @@
#include "rtmutex_common.h"
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
int __read_mostly futex_cmpxchg_enabled;
+#endif
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
@@ -573,9 +575,7 @@ void exit_pi_state_list(struct task_struct *curr)
* task still owns the PI-state:
*/
if (head->next != next) {
- raw_spin_unlock_irq(&curr->pi_lock);
spin_unlock(&hb->lock);
- raw_spin_lock_irq(&curr->pi_lock);
continue;
}
@@ -1449,16 +1449,6 @@ retry_private:
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
- } else if (ret == -EAGAIN) {
- /*
- * Waiter was woken by timeout or
- * signal and has set pi_blocked_on to
- * PI_WAKEUP_INPROGRESS before we
- * tried to enqueue it on the rtmutex.
- */
- this->pi_state = NULL;
- free_pi_state(pi_state);
- continue;
} else if (ret) {
/* -EDEADLK */
this->pi_state = NULL;
@@ -2302,7 +2292,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
struct hrtimer_sleeper timeout, *to = NULL;
struct rt_mutex_waiter rt_waiter;
struct rt_mutex *pi_mutex = NULL;
- struct futex_hash_bucket *hb, *hb2;
+ struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
int res, ret;
@@ -2327,7 +2317,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* The waiter is allocated on our stack, manipulated by the requeue
* code while we sleep on uaddr.
*/
- rt_mutex_init_waiter(&rt_waiter, false);
+ debug_rt_mutex_init_waiter(&rt_waiter);
+ rt_waiter.task = NULL;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
@@ -2348,55 +2339,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
- /*
- * On RT we must avoid races with requeue and trying to block
- * on two mutexes (hb->lock and uaddr2's rtmutex) by
- * serializing access to pi_blocked_on with pi_lock.
- */
- raw_spin_lock_irq(&current->pi_lock);
- if (current->pi_blocked_on) {
- /*
- * We have been requeued or are in the process of
- * being requeued.
- */
- raw_spin_unlock_irq(&current->pi_lock);
- } else {
- /*
- * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
- * prevents a concurrent requeue from moving us to the
- * uaddr2 rtmutex. After that we can safely acquire
- * (and possibly block on) hb->lock.
- */
- current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
- raw_spin_unlock_irq(&current->pi_lock);
-
- spin_lock(&hb->lock);
-
- /*
- * Clean up pi_blocked_on. We might leak it otherwise
- * when we succeeded with the hb->lock in the fast
- * path.
- */
- raw_spin_lock_irq(&current->pi_lock);
- current->pi_blocked_on = NULL;
- raw_spin_unlock_irq(&current->pi_lock);
-
- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
- spin_unlock(&hb->lock);
- if (ret)
- goto out_put_keys;
- }
+ spin_lock(&hb->lock);
+ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+ spin_unlock(&hb->lock);
+ if (ret)
+ goto out_put_keys;
/*
- * In order to be here, we have either been requeued, are in
- * the process of being requeued, or requeue successfully
- * acquired uaddr2 on our behalf. If pi_blocked_on was
- * non-null above, we may be racing with a requeue. Do not
- * rely on q->lock_ptr to be hb2->lock until after blocking on
- * hb->lock or hb2->lock. The futex_requeue dropped our key1
- * reference and incremented our key2 reference count.
+ * In order for us to be here, we know our q.key == key2, and since
+ * we took the hb->lock above, we also know that futex_requeue() has
+ * completed and we no longer have to concern ourselves with a wakeup
+ * race with the atomic proxy lock acquisition by the requeue code. The
+ * futex_requeue dropped our key1 reference and incremented our key2
+ * reference count.
*/
- hb2 = hash_futex(&key2);
/* Check if the requeue code acquired the second futex for us. */
if (!q.rt_waiter) {
@@ -2405,10 +2361,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* did a lock-steal - fix up the PI-state in that case.
*/
if (q.pi_state && (q.pi_state->owner != current)) {
- spin_lock(&hb2->lock);
- BUG_ON(&hb2->lock != q.lock_ptr);
+ spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
- spin_unlock(&hb2->lock);
+ spin_unlock(q.lock_ptr);
}
} else {
/*
@@ -2421,8 +2376,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
debug_rt_mutex_free_waiter(&rt_waiter);
- spin_lock(&hb2->lock);
- BUG_ON(&hb2->lock != q.lock_ptr);
+ spin_lock(q.lock_ptr);
/*
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
@@ -2779,10 +2733,10 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
-static int __init futex_init(void)
+static void __init futex_detect_cmpxchg(void)
{
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
u32 curval;
- int i;
/*
* This will fail and we want it. Some arch implementations do
@@ -2796,6 +2750,14 @@ static int __init futex_init(void)
*/
if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
futex_cmpxchg_enabled = 1;
+#endif
+}
+
+static int __init futex_init(void)
+{
+ int i;
+
+ futex_detect_cmpxchg();
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
plist_head_init(&futex_queues[i].chain);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c19183d..383319b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -47,13 +47,11 @@
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/timer.h>
-#include <linux/kthread.h>
#include <linux/freezer.h>
#include <asm/uaccess.h>
#include <trace/events/timer.h>
-#include <trace/events/hist.h>
/*
* The timer bases:
@@ -609,7 +607,8 @@ static int hrtimer_reprogram(struct hrtimer *timer,
* When the callback is running, we do not reprogram the clock event
* device. The timer callback is either running on a different CPU or
* the callback is executed in the hrtimer_interrupt context. The
- * reprogramming is handled at the end of the hrtimer_interrupt.
+ * reprogramming is handled either by the softirq, which called the
+ * callback or at the end of the hrtimer_interrupt.
*/
if (hrtimer_callback_running(timer))
return 0;
@@ -644,9 +643,6 @@ static int hrtimer_reprogram(struct hrtimer *timer,
return res;
}
-static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
-static int hrtimer_rt_defer(struct hrtimer *timer);
-
/*
* Initialize the high resolution related parts of cpu_base
*/
@@ -663,18 +659,9 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
* and expiry check is done in the hrtimer_interrupt or in the softirq.
*/
static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base,
- int wakeup)
+ struct hrtimer_clock_base *base)
{
- if (!(base->cpu_base->hres_active && hrtimer_reprogram(timer, base)))
- return 0;
- if (!wakeup)
- return -ETIME;
-#ifdef CONFIG_PREEMPT_RT_BASE
- if (!hrtimer_rt_defer(timer))
- return -ETIME;
-#endif
- return 1;
+ return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
}
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
@@ -742,44 +729,6 @@ static void clock_was_set_work(struct work_struct *work)
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * RT can not call schedule_work from real interrupt context.
- * Need to make a thread to do the real work.
- */
-static struct task_struct *clock_set_delay_thread;
-static bool do_clock_set_delay;
-
-static int run_clock_set_delay(void *ignore)
-{
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (do_clock_set_delay) {
- do_clock_set_delay = false;
- schedule_work(&hrtimer_work);
- }
- schedule();
- }
- __set_current_state(TASK_RUNNING);
- return 0;
-}
-
-void clock_was_set_delayed(void)
-{
- do_clock_set_delay = true;
- /* Make visible before waking up process */
- smp_wmb();
- wake_up_process(clock_set_delay_thread);
-}
-
-static __init int create_clock_set_delay_thread(void)
-{
- clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
- BUG_ON(!clock_set_delay_thread);
- return 0;
-}
-early_initcall(create_clock_set_delay_thread);
-#else /* PREEMPT_RT_FULL */
/*
* Called from timekeeping and resume code to reprogramm the hrtimer
* interrupt device on all cpus.
@@ -788,7 +737,6 @@ void clock_was_set_delayed(void)
{
schedule_work(&hrtimer_work);
}
-#endif
#else
@@ -798,18 +746,12 @@ static inline int hrtimer_switch_to_hres(void) { return 0; }
static inline void
hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base,
- int wakeup)
+ struct hrtimer_clock_base *base)
{
return 0;
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void retrigger_next_event(void *arg) { }
-static inline int hrtimer_reprogram(struct hrtimer *timer,
- struct hrtimer_clock_base *base)
-{
- return 0;
-}
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -928,32 +870,6 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
}
EXPORT_SYMBOL_GPL(hrtimer_forward);
-#ifdef CONFIG_PREEMPT_RT_BASE
-# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
-
-/**
- * hrtimer_wait_for_timer - Wait for a running timer
- *
- * @timer: timer to wait for
- *
- * The function waits in case the timers callback function is
- * currently executed on the waitqueue of the timer base. The
- * waitqueue is woken up after the timer callback function has
- * finished execution.
- */
-void hrtimer_wait_for_timer(const struct hrtimer *timer)
-{
- struct hrtimer_clock_base *base = timer->base;
-
- if (base && base->cpu_base && !timer->irqsafe)
- wait_event(base->cpu_base->wait,
- !(timer->state & HRTIMER_STATE_CALLBACK));
-}
-
-#else
-# define wake_up_timer_waiters(b) do { } while (0)
-#endif
-
/*
* enqueue_hrtimer - internal function to (re)start a timer
*
@@ -997,11 +913,6 @@ static void __remove_hrtimer(struct hrtimer *timer,
if (!(timer->state & HRTIMER_STATE_ENQUEUED))
goto out;
- if (unlikely(!list_empty(&timer->cb_entry))) {
- list_del_init(&timer->cb_entry);
- goto out;
- }
-
next_timer = timerqueue_getnext(&base->active);
timerqueue_del(&base->active, &timer->node);
if (&timer->node == next_timer) {
@@ -1086,17 +997,6 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
#endif
}
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- {
- ktime_t now = new_base->get_time();
-
- if (ktime_to_ns(tim) < ktime_to_ns(now))
- timer->praecox = now;
- else
- timer->praecox = ktime_set(0, 0);
- }
-#endif
-
hrtimer_set_expires_range_ns(timer, tim, delta_ns);
timer_stats_hrtimer_set_start_info(timer);
@@ -1109,19 +1009,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
*
* XXX send_remote_softirq() ?
*/
- if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) {
- ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
- if (ret < 0) {
- /*
- * In case we failed to reprogram the timer (mostly
- * because out current timer is already elapsed),
- * remove it again and report a failure. This avoids
- * stale base->first entries.
- */
- debug_deactivate(timer);
- __remove_hrtimer(timer, new_base,
- timer->state & HRTIMER_STATE_CALLBACK, 0);
- } else if (ret > 0) {
+ if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
+ && hrtimer_enqueue_reprogram(timer, new_base)) {
+ if (wakeup) {
/*
* We need to drop cpu_base->lock to avoid a
* lock ordering issue vs. rq->lock.
@@ -1129,7 +1019,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
raw_spin_unlock(&new_base->cpu_base->lock);
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
local_irq_restore(flags);
- return 0;
+ return ret;
+ } else {
+ __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}
}
@@ -1219,7 +1111,7 @@ int hrtimer_cancel(struct hrtimer *timer)
if (ret >= 0)
return ret;
- hrtimer_wait_for_timer(timer);
+ cpu_relax();
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1298,7 +1190,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
base = hrtimer_clockid_to_base(clock_id);
timer->base = &cpu_base->clock_base[base];
- INIT_LIST_HEAD(&timer->cb_entry);
timerqueue_init(&timer->node);
#ifdef CONFIG_TIMER_STATS
@@ -1382,126 +1273,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
timer->state &= ~HRTIMER_STATE_CALLBACK;
}
-static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
-
-#ifdef CONFIG_PREEMPT_RT_BASE
-static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
- struct hrtimer_clock_base *base)
-{
- /*
- * Note, we clear the callback flag before we requeue the
- * timer otherwise we trigger the callback_running() check
- * in hrtimer_reprogram().
- */
- timer->state &= ~HRTIMER_STATE_CALLBACK;
-
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(hrtimer_active(timer));
- /*
- * Enqueue the timer, if it's the leftmost timer then
- * we need to reprogram it.
- */
- if (!enqueue_hrtimer(timer, base))
- return;
-
-#ifndef CONFIG_HIGH_RES_TIMERS
- }
-#else
- if (base->cpu_base->hres_active &&
- hrtimer_reprogram(timer, base))
- goto requeue;
-
- } else if (hrtimer_active(timer)) {
- /*
- * If the timer was rearmed on another CPU, reprogram
- * the event device.
- */
- if (&timer->node == base->active.next &&
- base->cpu_base->hres_active &&
- hrtimer_reprogram(timer, base))
- goto requeue;
- }
- return;
-
-requeue:
- /*
- * Timer is expired. Thus move it from tree to pending list
- * again.
- */
- __remove_hrtimer(timer, base, timer->state, 0);
- list_add_tail(&timer->cb_entry, &base->expired);
-#endif
-}
-
-/*
- * The changes in mainline which removed the callback modes from
- * hrtimer are not yet working with -rt. The non wakeup_process()
- * based callbacks which involve sleeping locks need to be treated
- * seperately.
- */
-static void hrtimer_rt_run_pending(void)
-{
- enum hrtimer_restart (*fn)(struct hrtimer *);
- struct hrtimer_cpu_base *cpu_base;
- struct hrtimer_clock_base *base;
- struct hrtimer *timer;
- int index, restart;
-
- local_irq_disable();
- cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
-
- raw_spin_lock(&cpu_base->lock);
-
- for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
- base = &cpu_base->clock_base[index];
-
- while (!list_empty(&base->expired)) {
- timer = list_first_entry(&base->expired,
- struct hrtimer, cb_entry);
-
- /*
- * Same as the above __run_hrtimer function
- * just we run with interrupts enabled.
- */
- debug_hrtimer_deactivate(timer);
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
- timer_stats_account_hrtimer(timer);
- fn = timer->function;
-
- raw_spin_unlock_irq(&cpu_base->lock);
- restart = fn(timer);
- raw_spin_lock_irq(&cpu_base->lock);
-
- hrtimer_rt_reprogram(restart, timer, base);
- }
- }
-
- raw_spin_unlock_irq(&cpu_base->lock);
-
- wake_up_timer_waiters(cpu_base);
-}
-
-static int hrtimer_rt_defer(struct hrtimer *timer)
-{
- if (timer->irqsafe)
- return 0;
-
- __remove_hrtimer(timer, timer->base, timer->state, 0);
- list_add_tail(&timer->cb_entry, &timer->base->expired);
- return 1;
-}
-
-#else
-
-static inline void hrtimer_rt_run_pending(void)
-{
- hrtimer_peek_ahead_timers();
-}
-
-static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
-
-#endif
-
#ifdef CONFIG_HIGH_RES_TIMERS
/*
@@ -1512,7 +1283,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
ktime_t expires_next, now, entry_time, delta;
- int i, retries = 0, raise = 0;
+ int i, retries = 0;
BUG_ON(!cpu_base->hres_active);
cpu_base->nr_events++;
@@ -1547,15 +1318,6 @@ retry:
timer = container_of(node, struct hrtimer, node);
- trace_hrtimer_interrupt(raw_smp_processor_id(),
- ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
- timer->praecox : hrtimer_get_expires(timer),
- basenow)),
- current,
- timer->function == hrtimer_wakeup ?
- container_of(timer, struct hrtimer_sleeper,
- timer)->task : NULL);
-
/*
* The immediate goal for using the softexpires is
* minimizing wakeups, not running timers at the
@@ -1581,10 +1343,7 @@ retry:
break;
}
- if (!hrtimer_rt_defer(timer))
- __run_hrtimer(timer, &basenow);
- else
- raise = 1;
+ __run_hrtimer(timer, &basenow);
}
}
@@ -1599,7 +1358,7 @@ retry:
if (expires_next.tv64 == KTIME_MAX ||
!tick_program_event(expires_next, 0)) {
cpu_base->hang_detected = 0;
- goto out;
+ return;
}
/*
@@ -1643,9 +1402,6 @@ retry:
tick_program_event(expires_next, 1);
printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
ktime_to_ns(delta));
-out:
- if (raise)
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}
/*
@@ -1681,16 +1437,40 @@ void hrtimer_peek_ahead_timers(void)
__hrtimer_peek_ahead_timers();
local_irq_restore(flags);
}
+
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+ hrtimer_peek_ahead_timers();
+}
+
#else /* CONFIG_HIGH_RES_TIMERS */
static inline void __hrtimer_peek_ahead_timers(void) { }
#endif /* !CONFIG_HIGH_RES_TIMERS */
-
-static void run_hrtimer_softirq(struct softirq_action *h)
+/*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void hrtimer_run_pending(void)
{
- hrtimer_rt_run_pending();
+ if (hrtimer_hres_active())
+ return;
+
+ /*
+ * This _is_ ugly: We have to check in the softirq context,
+ * whether we can switch to highres and / or nohz mode. The
+ * clocksource switch happens in the timer interrupt with
+ * xtime_lock held. Notification from there only sets the
+ * check bit in the tick_oneshot code, otherwise we might
+ * deadlock vs. xtime_lock.
+ */
+ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+ hrtimer_switch_to_hres();
}
/*
@@ -1701,18 +1481,11 @@ void hrtimer_run_queues(void)
struct timerqueue_node *node;
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base;
- int index, gettime = 1, raise = 0;
+ int index, gettime = 1;
if (hrtimer_hres_active())
return;
- /*
- * Check whether we can switch to highres mode.
- */
- if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())
- && hrtimer_switch_to_hres())
- return;
-
for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
base = &cpu_base->clock_base[index];
if (!timerqueue_getnext(&base->active))
@@ -1733,16 +1506,10 @@ void hrtimer_run_queues(void)
hrtimer_get_expires_tv64(timer))
break;
- if (!hrtimer_rt_defer(timer))
- __run_hrtimer(timer, &base->softirq_time);
- else
- raise = 1;
+ __run_hrtimer(timer, &base->softirq_time);
}
raw_spin_unlock(&cpu_base->lock);
}
-
- if (raise)
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}
/*
@@ -1764,18 +1531,16 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
{
sl->timer.function = hrtimer_wakeup;
- sl->timer.irqsafe = 1;
sl->task = task;
}
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
-static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
- unsigned long state)
+static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
{
hrtimer_init_sleeper(t, current);
do {
- set_current_state(state);
+ set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start_expires(&t->timer, mode);
if (!hrtimer_active(&t->timer))
t->task = NULL;
@@ -1819,8 +1584,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
HRTIMER_MODE_ABS);
hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
- /* cpu_chill() does not care about restart state. */
- if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
+ if (do_nanosleep(&t, HRTIMER_MODE_ABS))
goto out;
rmtp = restart->nanosleep.rmtp;
@@ -1837,10 +1601,8 @@ out:
return ret;
}
-static long
-__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
- const enum hrtimer_mode mode, const clockid_t clockid,
- unsigned long state)
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
+ const enum hrtimer_mode mode, const clockid_t clockid)
{
struct restart_block *restart;
struct hrtimer_sleeper t;
@@ -1853,7 +1615,7 @@ __hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
hrtimer_init_on_stack(&t.timer, clockid, mode);
hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
- if (do_nanosleep(&t, mode, state))
+ if (do_nanosleep(&t, mode))
goto out;
/* Absolute timers do not update the rmtp value and restart: */
@@ -1880,12 +1642,6 @@ out:
return ret;
}
-long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
- const enum hrtimer_mode mode, const clockid_t clockid)
-{
- return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
-}
-
SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
struct timespec __user *, rmtp)
{
@@ -1900,26 +1656,6 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * Sleep for 1 ms in hope whoever holds what we want will let it go.
- */
-void cpu_chill(void)
-{
- struct timespec tu = {
- .tv_nsec = NSEC_PER_MSEC,
- };
- unsigned int freeze_flag = current->flags & PF_NOFREEZE;
-
- current->flags |= PF_NOFREEZE;
- __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
- TASK_UNINTERRUPTIBLE);
- if (!freeze_flag)
- current->flags &= ~PF_NOFREEZE;
-}
-EXPORT_SYMBOL(cpu_chill);
-#endif
-
/*
* Functions related to boot-time initialization:
*/
@@ -1931,13 +1667,9 @@ static void init_hrtimers_cpu(int cpu)
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
cpu_base->clock_base[i].cpu_base = cpu_base;
timerqueue_init_head(&cpu_base->clock_base[i].active);
- INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
}
hrtimer_init_hres(cpu_base);
-#ifdef CONFIG_PREEMPT_RT_BASE
- init_waitqueue_head(&cpu_base->wait);
-#endif
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -2050,7 +1782,9 @@ void __init hrtimers_init(void)
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
+#ifdef CONFIG_HIGH_RES_TIMERS
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+#endif
}
/**
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 7f50c55..131ca17 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -132,8 +132,6 @@ static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
irqreturn_t
handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
{
- struct pt_regs *regs = get_irq_regs();
- u64 ip = regs ? instruction_pointer(regs) : 0;
irqreturn_t retval = IRQ_NONE;
unsigned int flags = 0, irq = desc->irq_data.irq;
@@ -174,11 +172,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
action = action->next;
} while (action);
-#ifndef CONFIG_PREEMPT_RT_FULL
- add_interrupt_randomness(irq, flags, ip);
-#else
- desc->random_ip = ip;
-#endif
+ add_interrupt_randomness(irq, flags);
if (!noirqdebug)
note_interrupt(irq, desc, retval);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 252bf10..4c84746 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -22,7 +22,6 @@
#include "internals.h"
#ifdef CONFIG_IRQ_FORCED_THREADING
-# ifndef CONFIG_PREEMPT_RT_BASE
__read_mostly bool force_irqthreads;
static int __init setup_forced_irqthreads(char *arg)
@@ -31,7 +30,6 @@ static int __init setup_forced_irqthreads(char *arg)
return 0;
}
early_param("threadirqs", setup_forced_irqthreads);
-# endif
#endif
/**
@@ -164,62 +162,6 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
return ret;
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-static void _irq_affinity_notify(struct irq_affinity_notify *notify);
-static struct task_struct *set_affinity_helper;
-static LIST_HEAD(affinity_list);
-static DEFINE_RAW_SPINLOCK(affinity_list_lock);
-
-static int set_affinity_thread(void *unused)
-{
- while (1) {
- struct irq_affinity_notify *notify;
- int empty;
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- raw_spin_lock_irq(&affinity_list_lock);
- empty = list_empty(&affinity_list);
- raw_spin_unlock_irq(&affinity_list_lock);
-
- if (empty)
- schedule();
- if (kthread_should_stop())
- break;
- set_current_state(TASK_RUNNING);
-try_next:
- notify = NULL;
-
- raw_spin_lock_irq(&affinity_list_lock);
- if (!list_empty(&affinity_list)) {
- notify = list_first_entry(&affinity_list,
- struct irq_affinity_notify, list);
- list_del_init(&notify->list);
- }
- raw_spin_unlock_irq(&affinity_list_lock);
-
- if (!notify)
- continue;
- _irq_affinity_notify(notify);
- goto try_next;
- }
- return 0;
-}
-
-static void init_helper_thread(void)
-{
- if (set_affinity_helper)
- return;
- set_affinity_helper = kthread_run(set_affinity_thread, NULL,
- "affinity-cb");
- WARN_ON(IS_ERR(set_affinity_helper));
-}
-#else
-
-static inline void init_helper_thread(void) { }
-
-#endif
-
int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
{
struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -238,17 +180,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
if (desc->affinity_notify) {
kref_get(&desc->affinity_notify->kref);
-
-#ifdef CONFIG_PREEMPT_RT_FULL
- raw_spin_lock(&affinity_list_lock);
- if (list_empty(&desc->affinity_notify->list))
- list_add_tail(&affinity_list,
- &desc->affinity_notify->list);
- raw_spin_unlock(&affinity_list_lock);
- wake_up_process(set_affinity_helper);
-#else
schedule_work(&desc->affinity_notify->work);
-#endif
}
irqd_set(data, IRQD_AFFINITY_SET);
@@ -289,8 +221,10 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
}
EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
-static void _irq_affinity_notify(struct irq_affinity_notify *notify)
+static void irq_affinity_notify(struct work_struct *work)
{
+ struct irq_affinity_notify *notify =
+ container_of(work, struct irq_affinity_notify, work);
struct irq_desc *desc = irq_to_desc(notify->irq);
cpumask_var_t cpumask;
unsigned long flags;
@@ -312,13 +246,6 @@ out:
kref_put(&notify->kref, notify->release);
}
-static void irq_affinity_notify(struct work_struct *work)
-{
- struct irq_affinity_notify *notify =
- container_of(work, struct irq_affinity_notify, work);
- _irq_affinity_notify(notify);
-}
-
/**
* irq_set_affinity_notifier - control notification of IRQ affinity changes
* @irq: Interrupt for which to enable/disable notification
@@ -348,8 +275,6 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
notify->irq = irq;
kref_init(&notify->kref);
INIT_WORK(&notify->work, irq_affinity_notify);
- INIT_LIST_HEAD(&notify->list);
- init_helper_thread();
}
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -856,15 +781,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
local_bh_disable();
ret = action->thread_fn(action->irq, action->dev_id);
irq_finalize_oneshot(desc, action);
- /*
- * Interrupts which have real time requirements can be set up
- * to avoid softirq processing in the thread handler. This is
- * safe as these interrupts do not raise soft interrupts.
- */
- if (irq_settings_no_softirq_call(desc))
- _local_bh_enable();
- else
- local_bh_enable();
+ local_bh_enable();
return ret;
}
@@ -947,12 +864,6 @@ static int irq_thread(void *data)
if (!noirqdebug)
note_interrupt(action->irq, desc, action_ret);
-#ifdef CONFIG_PREEMPT_RT_FULL
- migrate_disable();
- add_interrupt_randomness(action->irq, 0,
- desc->random_ip ^ (unsigned long) action);
- migrate_enable();
-#endif
wake_threads_waitq(desc);
}
@@ -1215,9 +1126,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
}
- if (new->flags & IRQF_NO_SOFTIRQ_CALL)
- irq_settings_set_no_softirq_call(desc);
-
/* Set default affinity mask once everything is setup */
setup_affinity(irq, desc, mask);
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0d2c381..1162f10 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -14,7 +14,6 @@ enum {
_IRQ_NO_BALANCING = IRQ_NO_BALANCING,
_IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
_IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
- _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
@@ -27,7 +26,6 @@ enum {
#define IRQ_NOAUTOEN GOT_YOU_MORON
#define IRQ_NESTED_THREAD GOT_YOU_MORON
#define IRQ_PER_CPU_DEVID GOT_YOU_MORON
-#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK GOT_YOU_MORON
@@ -38,16 +36,6 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
}
-static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
-{
- return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
-}
-
-static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
-{
- desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
-}
-
static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
{
return desc->status_use_accessors & _IRQ_PER_CPU;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index e5a309a..7b5f012 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -340,10 +340,6 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
static int __init irqfixup_setup(char *str)
{
-#ifdef CONFIG_PREEMPT_RT_BASE
- pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
- return 1;
-#endif
irqfixup = 1;
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
printk(KERN_WARNING "This may impact system performance.\n");
@@ -356,10 +352,6 @@ module_param(irqfixup, int, 0644);
static int __init irqpoll_setup(char *str)
{
-#ifdef CONFIG_PREEMPT_RT_BASE
- pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
- return 1;
-#endif
irqfixup = 2;
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
"enabled\n");
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 35d21f9..55fcce6 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -20,9 +20,6 @@
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
-#ifdef CONFIG_PREEMPT_RT_FULL
-static DEFINE_PER_CPU(struct llist_head, hirq_work_list);
-#endif
static DEFINE_PER_CPU(int, irq_work_raised);
/*
@@ -51,11 +48,7 @@ static bool irq_work_claim(struct irq_work *work)
return true;
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-void arch_irq_work_raise(void)
-#else
void __weak arch_irq_work_raise(void)
-#endif
{
/*
* Lame architectures will get the timer tick callback
@@ -77,12 +70,8 @@ void irq_work_queue(struct irq_work *work)
/* Queue the entry and raise the IPI if needed. */
preempt_disable();
-#ifdef CONFIG_PREEMPT_RT_FULL
- if (work->flags & IRQ_WORK_HARD_IRQ)
- llist_add(&work->llnode, &__get_cpu_var(hirq_work_list));
- else
-#endif
- llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+ llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+
/*
* If the work is not "lazy" or the tick is stopped, raise the irq
* work interrupt (if supported by the arch), otherwise, just wait
@@ -126,18 +115,12 @@ static void __irq_work_run(void)
__this_cpu_write(irq_work_raised, 0);
barrier();
-#ifdef CONFIG_PREEMPT_RT_FULL
- if (in_irq())
- this_list = &__get_cpu_var(hirq_work_list);
- else
-#endif
- this_list = &__get_cpu_var(irq_work_list);
+ this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
return;
-#ifndef CONFIG_PREEMPT_RT_FULL
BUG_ON(!irqs_disabled());
-#endif
+
llnode = llist_del_all(this_list);
while (llnode != NULL) {
work = llist_entry(llnode, struct irq_work, llnode);
@@ -169,9 +152,7 @@ static void __irq_work_run(void)
*/
void irq_work_run(void)
{
-#ifndef CONFIG_PREEMPT_RT_FULL
BUG_ON(!in_irq());
-#endif
__irq_work_run();
}
EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index d051390..8d262b4 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -213,7 +213,6 @@ again:
/* We are sharing ->siglock with it_real_fn() */
if (hrtimer_try_to_cancel(timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
- hrtimer_wait_for_timer(&tsk->signal->real_timer);
goto again;
}
expires = timeval_to_ktime(value->it_value);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index b66ab9e..9659d38 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -132,15 +132,6 @@ KERNEL_ATTR_RO(vmcoreinfo);
#endif /* CONFIG_KEXEC */
-#if defined(CONFIG_PREEMPT_RT_FULL)
-static ssize_t realtime_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return sprintf(buf, "%d\n", 1);
-}
-KERNEL_ATTR_RO(realtime);
-#endif
-
/* whether file capabilities are enabled */
static ssize_t fscaps_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -206,9 +197,6 @@ static struct attribute * kernel_attrs[] = {
&vmcoreinfo_attr.attr,
#endif
&rcu_expedited_attr.attr,
-#ifdef CONFIG_PREEMPT_RT_FULL
- &realtime_attr.attr,
-#endif
NULL
};
diff --git a/kernel/lglock.c b/kernel/lglock.c
index f2356df..86ae2ae 100644
--- a/kernel/lglock.c
+++ b/kernel/lglock.c
@@ -4,15 +4,6 @@
#include <linux/cpu.h>
#include <linux/string.h>
-#ifndef CONFIG_PREEMPT_RT_FULL
-# define lg_lock_ptr arch_spinlock_t
-# define lg_do_lock(l) arch_spin_lock(l)
-# define lg_do_unlock(l) arch_spin_unlock(l)
-#else
-# define lg_lock_ptr struct rt_mutex
-# define lg_do_lock(l) __rt_spin_lock(l)
-# define lg_do_unlock(l) __rt_spin_unlock(l)
-#endif
/*
* Note there is no uninit, so lglocks cannot be defined in
* modules (but it's fine to use them from there)
@@ -21,60 +12,51 @@
void lg_lock_init(struct lglock *lg, char *name)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- int i;
-
- for_each_possible_cpu(i) {
- struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
-
- rt_mutex_init(lock);
- }
-#endif
LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
}
EXPORT_SYMBOL(lg_lock_init);
void lg_local_lock(struct lglock *lg)
{
- lg_lock_ptr *lock;
+ arch_spinlock_t *lock;
- migrate_disable();
+ preempt_disable();
lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
lock = this_cpu_ptr(lg->lock);
- lg_do_lock(lock);
+ arch_spin_lock(lock);
}
EXPORT_SYMBOL(lg_local_lock);
void lg_local_unlock(struct lglock *lg)
{
- lg_lock_ptr *lock;
+ arch_spinlock_t *lock;
lock_release(&lg->lock_dep_map, 1, _RET_IP_);
lock = this_cpu_ptr(lg->lock);
- lg_do_unlock(lock);
- migrate_enable();
+ arch_spin_unlock(lock);
+ preempt_enable();
}
EXPORT_SYMBOL(lg_local_unlock);
void lg_local_lock_cpu(struct lglock *lg, int cpu)
{
- lg_lock_ptr *lock;
+ arch_spinlock_t *lock;
- preempt_disable_nort();
+ preempt_disable();
lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
lock = per_cpu_ptr(lg->lock, cpu);
- lg_do_lock(lock);
+ arch_spin_lock(lock);
}
EXPORT_SYMBOL(lg_local_lock_cpu);
void lg_local_unlock_cpu(struct lglock *lg, int cpu)
{
- lg_lock_ptr *lock;
+ arch_spinlock_t *lock;
lock_release(&lg->lock_dep_map, 1, _RET_IP_);
lock = per_cpu_ptr(lg->lock, cpu);
- lg_do_unlock(lock);
- preempt_enable_nort();
+ arch_spin_unlock(lock);
+ preempt_enable();
}
EXPORT_SYMBOL(lg_local_unlock_cpu);
@@ -82,12 +64,12 @@ void lg_global_lock(struct lglock *lg)
{
int i;
- preempt_disable_nort();
+ preempt_disable();
lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
for_each_possible_cpu(i) {
- lg_lock_ptr *lock;
+ arch_spinlock_t *lock;
lock = per_cpu_ptr(lg->lock, i);
- lg_do_lock(lock);
+ arch_spin_lock(lock);
}
}
EXPORT_SYMBOL(lg_global_lock);
@@ -98,10 +80,10 @@ void lg_global_unlock(struct lglock *lg)
lock_release(&lg->lock_dep_map, 1, _RET_IP_);
for_each_possible_cpu(i) {
- lg_lock_ptr *lock;
+ arch_spinlock_t *lock;
lock = per_cpu_ptr(lg->lock, i);
- lg_do_unlock(lock);
+ arch_spin_unlock(lock);
}
- preempt_enable_nort();
+ preempt_enable();
}
EXPORT_SYMBOL(lg_global_unlock);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b74f7a5..e16c45b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3541,7 +3541,6 @@ static void check_flags(unsigned long flags)
}
}
-#ifndef CONFIG_PREEMPT_RT_FULL
/*
* We dont accurately track softirq state in e.g.
* hardirq contexts (such as on 4KSTACKS), so only
@@ -3556,7 +3555,6 @@ static void check_flags(unsigned long flags)
DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
}
}
-#endif
if (!debug_locks)
print_irqtrace_events(current);
diff --git a/kernel/panic.c b/kernel/panic.c
index 936d00f..b6c482c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -368,11 +368,9 @@ static u64 oops_id;
static int init_oops_id(void)
{
-#ifndef CONFIG_PREEMPT_RT_FULL
if (!oops_id)
get_random_bytes(&oops_id, sizeof(oops_id));
else
-#endif
oops_id++;
return 0;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 4208655..55e9560 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -312,7 +312,9 @@ static void *pidns_get(struct task_struct *task)
struct pid_namespace *ns;
rcu_read_lock();
- ns = get_pid_ns(task_active_pid_ns(task));
+ ns = task_active_pid_ns(task);
+ if (ns)
+ get_pid_ns(ns);
rcu_read_unlock();
return ns;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 4bf82f8..c7f31aa 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -3,7 +3,6 @@
*/
#include <linux/sched.h>
-#include <linux/sched/rt.h>
#include <linux/posix-timers.h>
#include <linux/errno.h>
#include <linux/math64.h>
@@ -664,7 +663,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
/*
* Disarm any old timer after extracting its expiry time.
*/
- BUG_ON_NONRT(!irqs_disabled());
+ BUG_ON(!irqs_disabled());
ret = 0;
old_incr = timer->it.cpu.incr;
@@ -1111,7 +1110,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
/*
* Now re-arm for the new expiry time.
*/
- BUG_ON_NONRT(!irqs_disabled());
+ BUG_ON(!irqs_disabled());
arm_timer(timer);
spin_unlock(&p->sighand->siglock);
@@ -1178,11 +1177,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
sig = tsk->signal;
if (sig->cputimer.running) {
struct task_cputime group_sample;
- unsigned long flags;
- raw_spin_lock_irqsave(&sig->cputimer.lock, flags);
+ raw_spin_lock(&sig->cputimer.lock);
group_sample = sig->cputimer.cputime;
- raw_spin_unlock_irqrestore(&sig->cputimer.lock, flags);
+ raw_spin_unlock(&sig->cputimer.lock);
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
@@ -1196,13 +1194,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
* already updated our counts. We need to check if any timers fire now.
* Interrupts are disabled.
*/
-static void __run_posix_cpu_timers(struct task_struct *tsk)
+void run_posix_cpu_timers(struct task_struct *tsk)
{
LIST_HEAD(firing);
struct k_itimer *timer, *next;
unsigned long flags;
- BUG_ON_NONRT(!irqs_disabled());
+ BUG_ON(!irqs_disabled());
/*
* The fast path checks that there are no expired thread or thread
@@ -1267,190 +1265,6 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
posix_cpu_timer_kick_nohz();
}
-#ifdef CONFIG_PREEMPT_RT_BASE
-#include <linux/kthread.h>
-#include <linux/cpu.h>
-DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
-DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
-
-static int posix_cpu_timers_thread(void *data)
-{
- int cpu = (long)data;
-
- BUG_ON(per_cpu(posix_timer_task,cpu) != current);
-
- while (!kthread_should_stop()) {
- struct task_struct *tsk = NULL;
- struct task_struct *next = NULL;
-
- if (cpu_is_offline(cpu))
- goto wait_to_die;
-
- /* grab task list */
- raw_local_irq_disable();
- tsk = per_cpu(posix_timer_tasklist, cpu);
- per_cpu(posix_timer_tasklist, cpu) = NULL;
- raw_local_irq_enable();
-
- /* its possible the list is empty, just return */
- if (!tsk) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- __set_current_state(TASK_RUNNING);
- continue;
- }
-
- /* Process task list */
- while (1) {
- /* save next */
- next = tsk->posix_timer_list;
-
- /* run the task timers, clear its ptr and
- * unreference it
- */
- __run_posix_cpu_timers(tsk);
- tsk->posix_timer_list = NULL;
- put_task_struct(tsk);
-
- /* check if this is the last on the list */
- if (next == tsk)
- break;
- tsk = next;
- }
- }
- return 0;
-
-wait_to_die:
- /* Wait for kthread_stop */
- set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop()) {
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
- }
- __set_current_state(TASK_RUNNING);
- return 0;
-}
-
-static inline int __fastpath_timer_check(struct task_struct *tsk)
-{
- /* tsk == current, ensure it is safe to use ->signal/sighand */
- if (unlikely(tsk->exit_state))
- return 0;
-
- if (!task_cputime_zero(&tsk->cputime_expires))
- return 1;
-
- if (!task_cputime_zero(&tsk->signal->cputime_expires))
- return 1;
-
- return 0;
-}
-
-void run_posix_cpu_timers(struct task_struct *tsk)
-{
- unsigned long cpu = smp_processor_id();
- struct task_struct *tasklist;
-
- BUG_ON(!irqs_disabled());
- if(!per_cpu(posix_timer_task, cpu))
- return;
- /* get per-cpu references */
- tasklist = per_cpu(posix_timer_tasklist, cpu);
-
- /* check to see if we're already queued */
- if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
- get_task_struct(tsk);
- if (tasklist) {
- tsk->posix_timer_list = tasklist;
- } else {
- /*
- * The list is terminated by a self-pointing
- * task_struct
- */
- tsk->posix_timer_list = tsk;
- }
- per_cpu(posix_timer_tasklist, cpu) = tsk;
-
- wake_up_process(per_cpu(posix_timer_task, cpu));
- }
-}
-
-/*
- * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
- * Here we can start up the necessary migration thread for the new CPU.
- */
-static int posix_cpu_thread_call(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
- struct task_struct *p;
- struct sched_param param;
-
- switch (action) {
- case CPU_UP_PREPARE:
- p = kthread_create(posix_cpu_timers_thread, hcpu,
- "posixcputmr/%d",cpu);
- if (IS_ERR(p))
- return NOTIFY_BAD;
- p->flags |= PF_NOFREEZE;
- kthread_bind(p, cpu);
- /* Must be high prio to avoid getting starved */
- param.sched_priority = MAX_RT_PRIO-1;
- sched_setscheduler(p, SCHED_FIFO, &param);
- per_cpu(posix_timer_task,cpu) = p;
- break;
- case CPU_ONLINE:
- /* Strictly unneccessary, as first user will wake it. */
- wake_up_process(per_cpu(posix_timer_task,cpu));
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- /* Unbind it from offline cpu so it can run. Fall thru. */
- kthread_bind(per_cpu(posix_timer_task, cpu),
- cpumask_any(cpu_online_mask));
- kthread_stop(per_cpu(posix_timer_task,cpu));
- per_cpu(posix_timer_task,cpu) = NULL;
- break;
- case CPU_DEAD:
- kthread_stop(per_cpu(posix_timer_task,cpu));
- per_cpu(posix_timer_task,cpu) = NULL;
- break;
-#endif
- }
- return NOTIFY_OK;
-}
-
-/* Register at highest priority so that task migration (migrate_all_tasks)
- * happens before everything else.
- */
-static struct notifier_block posix_cpu_thread_notifier = {
- .notifier_call = posix_cpu_thread_call,
- .priority = 10
-};
-
-static int __init posix_cpu_thread_init(void)
-{
- void *hcpu = (void *)(long)smp_processor_id();
- /* Start one for boot CPU. */
- unsigned long cpu;
-
- /* init the per-cpu posix_timer_tasklets */
- for_each_possible_cpu(cpu)
- per_cpu(posix_timer_tasklist, cpu) = NULL;
-
- posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
- posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
- register_cpu_notifier(&posix_cpu_thread_notifier);
- return 0;
-}
-early_initcall(posix_cpu_thread_init);
-#else /* CONFIG_PREEMPT_RT_BASE */
-void run_posix_cpu_timers(struct task_struct *tsk)
-{
- __run_posix_cpu_timers(tsk);
-}
-#endif /* CONFIG_PREEMPT_RT_BASE */
-
/*
* Set one of the process-wide special case CPU timers or RLIMIT_CPU.
* The tsk->sighand->siglock must be held by the caller.
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a22b931..424c2d4 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -497,7 +497,6 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
static struct pid *good_sigevent(sigevent_t * event)
{
struct task_struct *rtn = current->group_leader;
- int sig = event->sigev_signo;
if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
@@ -506,8 +505,7 @@ static struct pid *good_sigevent(sigevent_t * event)
return NULL;
if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
- (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
- sig_kernel_coredump(sig)))
+ ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
return NULL;
return task_pid(rtn);
@@ -818,20 +816,6 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
return overrun;
}
-/*
- * Protected by RCU!
- */
-static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
-{
-#ifdef CONFIG_PREEMPT_RT_FULL
- if (kc->timer_set == common_timer_set)
- hrtimer_wait_for_timer(&timr->it.real.timer);
- else
- /* FIXME: Whacky hack for posix-cpu-timers */
- schedule_timeout(1);
-#endif
-}
-
/* Set a POSIX.1b interval timer. */
/* timr->it_lock is taken. */
static int
@@ -909,7 +893,6 @@ retry:
if (!timr)
return -EINVAL;
- rcu_read_lock();
kc = clockid_to_kclock(timr->it_clock);
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
@@ -918,12 +901,9 @@ retry:
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
- timer_wait_for_callback(kc, timr);
rtn = NULL; // We already got the old time...
- rcu_read_unlock();
goto retry;
}
- rcu_read_unlock();
if (old_setting && !error &&
copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
@@ -961,15 +941,10 @@ retry_delete:
if (!timer)
return -EINVAL;
- rcu_read_lock();
if (timer_delete_hook(timer) == TIMER_RETRY) {
unlock_timer(timer, flags);
- timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
- timer);
- rcu_read_unlock();
goto retry_delete;
}
- rcu_read_unlock();
spin_lock(&current->sighand->siglock);
list_del(&timer->list);
@@ -995,18 +970,8 @@ static void itimer_delete(struct k_itimer *timer)
retry_delete:
spin_lock_irqsave(&timer->it_lock, flags);
- /* On RT we can race with a deletion */
- if (!timer->it_signal) {
- unlock_timer(timer, flags);
- return;
- }
-
if (timer_delete_hook(timer) == TIMER_RETRY) {
- rcu_read_lock();
unlock_timer(timer, flags);
- timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
- timer);
- rcu_read_unlock();
goto retry_delete;
}
list_del(&timer->list);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index d26958b..0121dab 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -275,8 +275,6 @@ static int create_image(int platform_mode)
local_irq_disable();
- system_state = SYSTEM_SUSPEND;
-
error = syscore_suspend();
if (error) {
printk(KERN_ERR "PM: Some system devices failed to power down, "
@@ -304,7 +302,6 @@ static int create_image(int platform_mode)
syscore_resume();
Enable_irqs:
- system_state = SYSTEM_RUNNING;
local_irq_enable();
Enable_cpus:
@@ -430,7 +427,6 @@ static int resume_target_kernel(bool platform_mode)
goto Enable_cpus;
local_irq_disable();
- system_state = SYSTEM_SUSPEND;
error = syscore_suspend();
if (error)
@@ -464,7 +460,6 @@ static int resume_target_kernel(bool platform_mode)
syscore_resume();
Enable_irqs:
- system_state = SYSTEM_RUNNING;
local_irq_enable();
Enable_cpus:
@@ -547,7 +542,6 @@ int hibernation_platform_enter(void)
goto Platform_finish;
local_irq_disable();
- system_state = SYSTEM_SUSPEND;
syscore_suspend();
if (pm_wakeup_pending()) {
error = -EAGAIN;
@@ -560,7 +554,6 @@ int hibernation_platform_enter(void)
Power_up:
syscore_resume();
- system_state = SYSTEM_RUNNING;
local_irq_enable();
enable_nonboot_cpus();
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e6703bb..62ee437 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -218,8 +218,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
arch_suspend_disable_irqs();
BUG_ON(!irqs_disabled());
- system_state = SYSTEM_SUSPEND;
-
error = syscore_suspend();
if (!error) {
*wakeup = pm_wakeup_pending();
@@ -230,8 +228,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
syscore_resume();
}
- system_state = SYSTEM_RUNNING;
-
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0a63f7b..c59896c 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1029,7 +1029,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
{
char *text;
int len = 0;
- int attempts = 0;
text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
if (!text)
@@ -1041,14 +1040,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
u64 seq;
u32 idx;
enum log_flags prev;
- int num_msg;
-try_again:
- attempts++;
- if (attempts > 10) {
- len = -EBUSY;
- goto out;
- }
- num_msg = 0;
+
if (clear_seq < log_first_seq) {
/* messages are gone, move to first available one */
clear_seq = log_first_seq;
@@ -1069,14 +1061,6 @@ try_again:
prev = msg->flags;
idx = log_next(idx);
seq++;
- num_msg++;
- if (num_msg > 5) {
- num_msg = 0;
- raw_spin_unlock_irq(&logbuf_lock);
- raw_spin_lock_irq(&logbuf_lock);
- if (clear_seq < log_first_seq)
- goto try_again;
- }
}
/* move first record forward until length fits into the buffer */
@@ -1090,21 +1074,12 @@ try_again:
prev = msg->flags;
idx = log_next(idx);
seq++;
- num_msg++;
- if (num_msg > 5) {
- num_msg = 0;
- raw_spin_unlock_irq(&logbuf_lock);
- raw_spin_lock_irq(&logbuf_lock);
- if (clear_seq < log_first_seq)
- goto try_again;
- }
}
/* last message fitting into this dump */
next_seq = log_next_seq;
len = 0;
- prev = 0;
while (len >= 0 && seq < next_seq) {
struct printk_log *msg = log_from_idx(idx);
int textlen;
@@ -1139,7 +1114,6 @@ try_again:
clear_seq = log_next_seq;
clear_idx = log_next_idx;
}
-out:
raw_spin_unlock_irq(&logbuf_lock);
kfree(text);
@@ -1297,7 +1271,6 @@ static void call_console_drivers(int level, const char *text, size_t len)
if (!console_drivers)
return;
- migrate_disable();
for_each_console(con) {
if (exclusive_console && con != exclusive_console)
continue;
@@ -1310,7 +1283,6 @@ static void call_console_drivers(int level, const char *text, size_t len)
continue;
con->write(con, text, len);
}
- migrate_enable();
}
/*
@@ -1370,18 +1342,12 @@ static inline int can_use_console(unsigned int cpu)
* interrupts disabled. It should return with 'lockbuf_lock'
* released but interrupts still disabled.
*/
-static int console_trylock_for_printk(unsigned int cpu, unsigned long flags)
+static int console_trylock_for_printk(unsigned int cpu)
__releases(&logbuf_lock)
{
int retval = 0, wake = 0;
-#ifdef CONFIG_PREEMPT_RT_FULL
- int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
- (preempt_count() <= 1);
-#else
- int lock = 1;
-#endif
- if (lock && console_trylock()) {
+ if (console_trylock()) {
retval = 1;
/*
@@ -1521,62 +1487,6 @@ static size_t cont_print_text(char *text, size_t size)
return textlen;
}
-#ifdef CONFIG_EARLY_PRINTK
-struct console *early_console;
-
-void early_vprintk(const char *fmt, va_list ap)
-{
- if (early_console) {
- char buf[512];
- int n = vscnprintf(buf, sizeof(buf), fmt, ap);
-
- early_console->write(early_console, buf, n);
- }
-}
-
-asmlinkage void early_printk(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- early_vprintk(fmt, ap);
- va_end(ap);
-}
-
-/*
- * This is independent of any log levels - a global
- * kill switch that turns off all of printk.
- *
- * Used by the NMI watchdog if early-printk is enabled.
- */
-static bool __read_mostly printk_killswitch;
-
-static int __init force_early_printk_setup(char *str)
-{
- printk_killswitch = true;
- return 0;
-}
-early_param("force_early_printk", force_early_printk_setup);
-
-void printk_kill(void)
-{
- printk_killswitch = true;
-}
-
-static int forced_early_printk(const char *fmt, va_list ap)
-{
- if (!printk_killswitch)
- return 0;
- early_vprintk(fmt, ap);
- return 1;
-}
-#else
-static inline int forced_early_printk(const char *fmt, va_list ap)
-{
- return 0;
-}
-#endif
-
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
@@ -1590,13 +1500,6 @@ asmlinkage int vprintk_emit(int facility, int level,
int this_cpu;
int printed_len = 0;
- /*
- * Fall back to early_printk if a debugging subsystem has
- * killed printk output
- */
- if (unlikely(forced_early_printk(fmt, args)))
- return 1;
-
boot_delay_msec(level);
printk_delay();
@@ -1716,15 +1619,8 @@ asmlinkage int vprintk_emit(int facility, int level,
* The console_trylock_for_printk() function will release 'logbuf_lock'
* regardless of whether it actually gets the console semaphore or not.
*/
- if (console_trylock_for_printk(this_cpu, flags)) {
-#ifndef CONFIG_PREEMPT_RT_FULL
+ if (console_trylock_for_printk(this_cpu))
console_unlock();
-#else
- raw_local_irq_restore(flags);
- console_unlock();
- raw_local_irq_save(flags);
-#endif
- }
lockdep_on();
out_restore_irqs:
@@ -1826,6 +1722,29 @@ static size_t cont_print_text(char *text, size_t size) { return 0; }
#endif /* CONFIG_PRINTK */
+#ifdef CONFIG_EARLY_PRINTK
+struct console *early_console;
+
+void early_vprintk(const char *fmt, va_list ap)
+{
+ if (early_console) {
+ char buf[512];
+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+
+ early_console->write(early_console, buf, n);
+ }
+}
+
+asmlinkage void early_printk(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ early_vprintk(fmt, ap);
+ va_end(ap);
+}
+#endif
+
static int __add_preferred_console(char *name, int idx, char *options,
char *brl_options)
{
@@ -2066,16 +1985,11 @@ static void console_cont_flush(char *text, size_t size)
goto out;
len = cont_print_text(text, size);
-#ifndef CONFIG_PREEMPT_RT_FULL
raw_spin_unlock(&logbuf_lock);
stop_critical_timings();
call_console_drivers(cont.level, text, len);
start_critical_timings();
local_irq_restore(flags);
-#else
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
- call_console_drivers(cont.level, text, len);
-#endif
return;
out:
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
@@ -2158,17 +2072,12 @@ skip:
console_idx = log_next(console_idx);
console_seq++;
console_prev = msg->flags;
-
-#ifndef CONFIG_PREEMPT_RT_FULL
raw_spin_unlock(&logbuf_lock);
+
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(level, text, len);
start_critical_timings();
local_irq_restore(flags);
-#else
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
- call_console_drivers(level, text, len);
-#endif
}
console_locked = 0;
mutex_release(&console_lock_dep_map, 1, _RET_IP_);
@@ -2880,7 +2789,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
next_idx = idx;
l = 0;
- prev = 0;
while (seq < dumper->next_seq) {
struct printk_log *msg = log_from_idx(idx);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index fddaf65..1f4bcb3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -135,12 +135,7 @@ static bool ptrace_freeze_traced(struct task_struct *task)
spin_lock_irq(&task->sighand->siglock);
if (task_is_traced(task) && !__fatal_signal_pending(task)) {
- raw_spin_lock_irq(&task->pi_lock);
- if (task->state & __TASK_TRACED)
- task->state = __TASK_TRACED;
- else
- task->saved_state = __TASK_TRACED;
- raw_spin_unlock_irq(&task->pi_lock);
+ task->state = __TASK_TRACED;
ret = true;
}
spin_unlock_irq(&task->sighand->siglock);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 7e1dd3e..b02a339 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -129,7 +129,6 @@ int notrace debug_lockdep_rcu_enabled(void)
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
-#ifndef CONFIG_PREEMPT_RT_FULL
/**
* rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
*
@@ -156,7 +155,6 @@ int rcu_read_lock_bh_held(void)
return in_softirq() || irqs_disabled();
}
EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
-#endif
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index f202b26..9ed6075 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -369,7 +369,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu_sched);
-#ifndef CONFIG_PREEMPT_RT_FULL
/*
* Post an RCU bottom-half callback to be invoked after any subsequent
* quiescent state.
@@ -379,7 +378,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
__call_rcu(head, func, &rcu_bh_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
-#endif
void rcu_init(void)
{
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 507fab1..32618b3 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -55,11 +55,6 @@
#include <linux/random.h>
#include <linux/ftrace_event.h>
#include <linux/suspend.h>
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/smpboot.h>
-#include "time/tick-internal.h"
#include "rcutree.h"
#include <trace/events/rcu.h>
@@ -150,6 +145,8 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
*/
static int rcu_scheduler_fully_active __read_mostly;
+#ifdef CONFIG_RCU_BOOST
+
/*
* Control variables for per-CPU and per-rcu_node kthreads. These
* handle all flavors of RCU.
@@ -159,6 +156,8 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DEFINE_PER_CPU(char, rcu_cpu_has_work);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
@@ -200,19 +199,6 @@ void rcu_sched_qs(int cpu)
rdp->passed_quiesce = 1;
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-static void rcu_preempt_qs(int cpu);
-
-void rcu_bh_qs(int cpu)
-{
- unsigned long flags;
-
- /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
- local_irq_save(flags);
- rcu_preempt_qs(cpu);
- local_irq_restore(flags);
-}
-#else
void rcu_bh_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
@@ -221,7 +207,6 @@ void rcu_bh_qs(int cpu)
trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
}
-#endif
/*
* Note a context switch. This is a quiescent state for RCU-sched,
@@ -278,7 +263,6 @@ long rcu_batches_completed_sched(void)
}
EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
-#ifndef CONFIG_PREEMPT_RT_FULL
/*
* Return the number of RCU BH batches processed thus far for debug & stats.
*/
@@ -296,7 +280,6 @@ void rcu_bh_force_quiescent_state(void)
force_quiescent_state(&rcu_bh_state);
}
EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
-#endif
/*
* Record the number of times rcutorture tests have been initiated and
@@ -1488,7 +1471,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
/* Handle grace-period start. */
for (;;) {
- swait_event_interruptible(rsp->gp_wq,
+ wait_event_interruptible(rsp->gp_wq,
rsp->gp_flags &
RCU_GP_FLAG_INIT);
if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
@@ -1507,7 +1490,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
for (;;) {
rsp->jiffies_force_qs = jiffies + j;
- ret = swait_event_interruptible_timeout(rsp->gp_wq,
+ ret = wait_event_interruptible_timeout(rsp->gp_wq,
(rsp->gp_flags & RCU_GP_FLAG_FQS) ||
(!ACCESS_ONCE(rnp->qsmask) &&
!rcu_preempt_blocked_readers_cgp(rnp)),
@@ -1545,7 +1528,7 @@ static void rsp_wakeup(struct irq_work *work)
struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
/* Wake up rcu_gp_kthread() to start the grace period. */
- swait_wake(&rsp->gp_wq);
+ wake_up(&rsp->gp_wq);
}
/*
@@ -1619,7 +1602,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
{
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
- swait_wake(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
+ wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
}
/*
@@ -2189,8 +2172,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
}
rsp->gp_flags |= RCU_GP_FLAG_FQS;
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
- /* Memory barrier implied by wake_up() path. */
- swait_wake(&rsp->gp_wq);
+ wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
}
/*
@@ -2227,14 +2209,16 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/*
* Do RCU core processing for the current CPU.
*/
-static void rcu_process_callbacks(void)
+static void rcu_process_callbacks(struct softirq_action *unused)
{
struct rcu_state *rsp;
if (cpu_is_offline(smp_processor_id()))
return;
+ trace_rcu_utilization(TPS("Start RCU core"));
for_each_rcu_flavor(rsp)
__rcu_process_callbacks(rsp);
+ trace_rcu_utilization(TPS("End RCU core"));
}
/*
@@ -2248,105 +2232,18 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
{
if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
return;
- rcu_do_batch(rsp, rdp);
-}
-
-static void rcu_wake_cond(struct task_struct *t, int status)
-{
- /*
- * If the thread is yielding, only wake it when this
- * is invoked from idle
- */
- if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
- wake_up_process(t);
-}
-
-/*
- * Wake up this CPU's rcuc kthread to do RCU core processing.
- */
-static void invoke_rcu_core(void)
-{
- unsigned long flags;
- struct task_struct *t;
-
- if (!cpu_online(smp_processor_id()))
+ if (likely(!rsp->boost)) {
+ rcu_do_batch(rsp, rdp);
return;
- local_irq_save(flags);
- __this_cpu_write(rcu_cpu_has_work, 1);
- t = __this_cpu_read(rcu_cpu_kthread_task);
- if (t != NULL && current != t)
- rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
- local_irq_restore(flags);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
- per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
- return __this_cpu_read(rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
- * RCU softirq used in flavors and configurations of RCU that do not
- * support RCU priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
- unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
- char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
- int spincnt;
-
- for (spincnt = 0; spincnt < 10; spincnt++) {
- trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
- local_bh_disable();
- *statusp = RCU_KTHREAD_RUNNING;
- this_cpu_inc(rcu_cpu_kthread_loops);
- local_irq_disable();
- work = *workp;
- *workp = 0;
- local_irq_enable();
- if (work)
- rcu_process_callbacks();
- local_bh_enable();
- if (*workp == 0) {
- trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
- *statusp = RCU_KTHREAD_WAITING;
- return;
- }
}
- *statusp = RCU_KTHREAD_YIELDING;
- trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
- schedule_timeout_interruptible(2);
- trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
- *statusp = RCU_KTHREAD_WAITING;
+ invoke_rcu_callbacks_kthread();
}
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
- .store = &rcu_cpu_kthread_task,
- .thread_should_run = rcu_cpu_kthread_should_run,
- .thread_fn = rcu_cpu_kthread,
- .thread_comm = "rcuc/%u",
- .setup = rcu_cpu_kthread_setup,
- .park = rcu_cpu_kthread_park,
-};
-
-/*
- * Spawn per-CPU RCU core processing kthreads.
- */
-static int __init rcu_spawn_core_kthreads(void)
+static void invoke_rcu_core(void)
{
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(rcu_cpu_has_work, cpu) = 0;
- BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
- return 0;
+ if (cpu_online(smp_processor_id()))
+ raise_softirq(RCU_SOFTIRQ);
}
-early_initcall(rcu_spawn_core_kthreads);
/*
* Handle any core-RCU processing required by a call_rcu() invocation.
@@ -2476,7 +2373,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu_sched);
-#ifndef CONFIG_PREEMPT_RT_FULL
/*
* Queue an RCU callback for invocation after a quicker grace period.
*/
@@ -2485,7 +2381,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
__call_rcu(head, func, &rcu_bh_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
-#endif
/*
* Because a context switch is a grace period for RCU-sched and RCU-bh,
@@ -2563,7 +2458,6 @@ void synchronize_sched(void)
}
EXPORT_SYMBOL_GPL(synchronize_sched);
-#ifndef CONFIG_PREEMPT_RT_FULL
/**
* synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
*
@@ -2590,7 +2484,6 @@ void synchronize_rcu_bh(void)
wait_rcu_gp(call_rcu_bh);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-#endif
static int synchronize_sched_expedited_cpu_stop(void *data)
{
@@ -2765,10 +2658,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
/* Check for CPU stalls, if enabled. */
check_cpu_stall(rsp, rdp);
- /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
- if (rcu_nohz_full_cpu(rsp))
- return 0;
-
/* Is the RCU core waiting for a quiescent state from this CPU? */
if (rcu_scheduler_fully_active &&
rdp->qs_pending && !rdp->passed_quiesce) {
@@ -3002,7 +2891,6 @@ static void _rcu_barrier(struct rcu_state *rsp)
mutex_unlock(&rsp->barrier_mutex);
}
-#ifndef CONFIG_PREEMPT_RT_FULL
/**
* rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
*/
@@ -3011,7 +2899,6 @@ void rcu_barrier_bh(void)
_rcu_barrier(&rcu_bh_state);
}
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
-#endif
/**
* rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
@@ -3315,7 +3202,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
}
rsp->rda = rda;
- init_swait_head(&rsp->gp_wq);
+ init_waitqueue_head(&rsp->gp_wq);
init_irq_work(&rsp->wakeup_work, rsp_wakeup);
rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) {
@@ -3411,6 +3298,7 @@ void __init rcu_init(void)
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt();
+ open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/*
* We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1df8d9e..52be957 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -28,7 +28,6 @@
#include <linux/cpumask.h>
#include <linux/seqlock.h>
#include <linux/irq_work.h>
-#include <linux/wait-simple.h>
/*
* Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -201,7 +200,7 @@ struct rcu_node {
/* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
- struct swait_head nocb_gp_wq[2];
+ wait_queue_head_t nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2];
@@ -334,7 +333,7 @@ struct rcu_data {
atomic_long_t nocb_q_count_lazy; /* (approximate). */
int nocb_p_count; /* # CBs being invoked by kthread */
int nocb_p_count_lazy; /* (approximate). */
- struct swait_head nocb_wq; /* For nocb kthreads to sleep on. */
+ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
@@ -404,7 +403,7 @@ struct rcu_state {
unsigned long gpnum; /* Current gp number. */
unsigned long completed; /* # of last completed gp. */
struct task_struct *gp_kthread; /* Task for grace periods. */
- struct swait_head gp_wq; /* Where GP task waits. */
+ wait_queue_head_t gp_wq; /* Where GP task waits. */
int gp_flags; /* Commands for GP task. */
/* End of fields guarded by root rcu_node's lock. */
@@ -528,9 +527,10 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
static void __init __rcu_init_preempt(void);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
+static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
-static void rcu_cpu_kthread_setup(unsigned int cpu);
#ifdef CONFIG_RCU_BOOST
+static void rcu_preempt_do_callbacks(void);
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */
@@ -564,7 +564,6 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
unsigned long maxj);
static void rcu_bind_gp_kthread(void);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
-static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c849bd4..511e6b4 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -24,6 +24,12 @@
* Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include "time/tick-internal.h"
+
#define RCU_KTHREAD_PRIO 1
#ifdef CONFIG_RCU_BOOST
@@ -353,7 +359,7 @@ void rcu_read_unlock_special(struct task_struct *t)
}
/* Hardware IRQ handlers cannot block. */
- if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
+ if (in_irq() || in_serving_softirq()) {
local_irq_restore(flags);
return;
}
@@ -650,6 +656,15 @@ static void rcu_preempt_check_callbacks(int cpu)
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
}
+#ifdef CONFIG_RCU_BOOST
+
+static void rcu_preempt_do_callbacks(void)
+{
+ rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
+}
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
/*
* Queue a preemptible-RCU callback for invocation after a grace period.
*/
@@ -1111,19 +1126,6 @@ void exit_rcu(void)
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-/*
- * If boosting, set rcuc kthreads to realtime priority.
- */
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-#ifdef CONFIG_RCU_BOOST
- struct sched_param sp;
-
- sp.sched_priority = RCU_KTHREAD_PRIO;
- sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-}
-
#ifdef CONFIG_RCU_BOOST
#include "rtmutex_common.h"
@@ -1155,6 +1157,16 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+ /*
+ * If the thread is yielding, only wake it when this
+ * is invoked from idle
+ */
+ if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
+ wake_up_process(t);
+}
+
/*
* Carry out RCU priority boosting on the task indicated by ->exp_tasks
* or ->boost_tasks, advancing the pointer to the next task in the
@@ -1298,6 +1310,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
}
/*
+ * Wake up the per-CPU kthread to invoke RCU callbacks.
+ */
+static void invoke_rcu_callbacks_kthread(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __this_cpu_write(rcu_cpu_has_work, 1);
+ if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
+ current != __this_cpu_read(rcu_cpu_kthread_task)) {
+ rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
+ __this_cpu_read(rcu_cpu_kthread_status));
+ }
+ local_irq_restore(flags);
+}
+
+/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
@@ -1351,6 +1380,67 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return 0;
}
+static void rcu_kthread_do_work(void)
+{
+ rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
+ rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+ rcu_preempt_do_callbacks();
+}
+
+static void rcu_cpu_kthread_setup(unsigned int cpu)
+{
+ struct sched_param sp;
+
+ sp.sched_priority = RCU_KTHREAD_PRIO;
+ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+}
+
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+ per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+ return __get_cpu_var(rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
+ * RCU softirq used in flavors and configurations of RCU that do not
+ * support RCU priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+ unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
+ char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
+ int spincnt;
+
+ for (spincnt = 0; spincnt < 10; spincnt++) {
+ trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+ local_bh_disable();
+ *statusp = RCU_KTHREAD_RUNNING;
+ this_cpu_inc(rcu_cpu_kthread_loops);
+ local_irq_disable();
+ work = *workp;
+ *workp = 0;
+ local_irq_enable();
+ if (work)
+ rcu_kthread_do_work();
+ local_bh_enable();
+ if (*workp == 0) {
+ trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+ *statusp = RCU_KTHREAD_WAITING;
+ return;
+ }
+ }
+ *statusp = RCU_KTHREAD_YIELDING;
+ trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+ schedule_timeout_interruptible(2);
+ trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+ *statusp = RCU_KTHREAD_WAITING;
+}
+
/*
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
@@ -1384,14 +1474,27 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
free_cpumask_var(cm);
}
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+ .store = &rcu_cpu_kthread_task,
+ .thread_should_run = rcu_cpu_kthread_should_run,
+ .thread_fn = rcu_cpu_kthread,
+ .thread_comm = "rcuc/%u",
+ .setup = rcu_cpu_kthread_setup,
+ .park = rcu_cpu_kthread_park,
+};
+
/*
* Spawn all kthreads -- called as soon as the scheduler is running.
*/
static int __init rcu_spawn_kthreads(void)
{
struct rcu_node *rnp;
+ int cpu;
rcu_scheduler_fully_active = 1;
+ for_each_possible_cpu(cpu)
+ per_cpu(rcu_cpu_has_work, cpu) = 0;
+ BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
rnp = rcu_get_root(rcu_state);
(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
if (NUM_RCU_NODES > 1) {
@@ -1419,6 +1522,11 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
+static void invoke_rcu_callbacks_kthread(void)
+{
+ WARN_ON_ONCE(1);
+}
+
static bool rcu_is_callbacks_kthread(void)
{
return false;
@@ -1445,7 +1553,7 @@ static void rcu_prepare_kthreads(int cpu)
#endif /* #else #ifdef CONFIG_RCU_BOOST */
-#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
+#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*
* Check to see if any future RCU-related work will need to be done
@@ -1461,9 +1569,6 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
*delta_jiffies = ULONG_MAX;
return rcu_cpu_has_callbacks(cpu, NULL);
}
-#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
-
-#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
@@ -1561,8 +1666,6 @@ static bool rcu_try_advance_all_cbs(void)
return cbs_ready;
}
-#ifndef CONFIG_PREEMPT_RT_FULL
-
/*
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
@@ -1601,7 +1704,6 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
}
return 0;
}
-#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
/*
* Prepare a CPU for idle from an RCU perspective. The first major task
@@ -1959,7 +2061,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp)
*/
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
- swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+ wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
}
/*
@@ -1977,8 +2079,8 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
static void rcu_init_one_nocb(struct rcu_node *rnp)
{
- init_swait_head(&rnp->nocb_gp_wq[0]);
- init_swait_head(&rnp->nocb_gp_wq[1]);
+ init_waitqueue_head(&rnp->nocb_gp_wq[0]);
+ init_waitqueue_head(&rnp->nocb_gp_wq[1]);
}
/* Is the specified CPU a no-CPUs CPU? */
@@ -2018,7 +2120,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
return;
len = atomic_long_read(&rdp->nocb_q_count);
if (old_rhpp == &rdp->nocb_head) {
- swait_wake(&rdp->nocb_wq); /* ... only if queue was empty ... */
+ wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
rdp->qlen_last_fqs_check = 0;
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
wake_up_process(t); /* ... or if many callbacks queued. */
@@ -2108,7 +2210,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
*/
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) {
- swait_event_interruptible(
+ wait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1],
(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
if (likely(d))
@@ -2136,7 +2238,7 @@ static int rcu_nocb_kthread(void *arg)
for (;;) {
/* If not polling, wait for next batch of callbacks. */
if (!rcu_nocb_poll)
- swait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
+ wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
list = ACCESS_ONCE(rdp->nocb_head);
if (!list) {
schedule_timeout_interruptible(1);
@@ -2186,7 +2288,7 @@ static int rcu_nocb_kthread(void *arg)
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
rdp->nocb_tail = &rdp->nocb_head;
- init_swait_head(&rdp->nocb_wq);
+ init_waitqueue_head(&rdp->nocb_wq);
}
/* Create a kthread for each RCU flavor for each no-CBs CPU. */
@@ -2701,23 +2803,3 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
}
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-/*
- * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
- * grace-period kthread will do force_quiescent_state() processing?
- * The idea is to avoid waking up RCU core processing on such a
- * CPU unless the grace period has extended for too long.
- *
- * This code relies on the fact that all NO_HZ_FULL CPUs are also
- * CONFIG_RCU_NOCB_CPUs.
- */
-static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
-{
-#ifdef CONFIG_NO_HZ_FULL
- if (tick_nohz_full_cpu(smp_processor_id()) &&
- (!rcu_gp_in_progress(rsp) ||
- ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ)))
- return 1;
-#endif /* #ifdef CONFIG_NO_HZ_FULL */
- return 0;
-}
diff --git a/kernel/relay.c b/kernel/relay.c
index b915513..5001c98 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -339,10 +339,6 @@ static void wakeup_readers(unsigned long data)
{
struct rchan_buf *buf = (struct rchan_buf *)data;
wake_up_interruptible(&buf->read_wait);
- /*
- * Stupid polling for now:
- */
- mod_timer(&buf->timer, jiffies + 1);
}
/**
@@ -360,7 +356,6 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
init_waitqueue_head(&buf->read_wait);
kref_init(&buf->kref);
setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
- mod_timer(&buf->timer, jiffies + 1);
} else
del_timer_sync(&buf->timer);
@@ -744,6 +739,15 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
else
buf->early_bytes += buf->chan->subbuf_size -
buf->padding[old_subbuf];
+ smp_mb();
+ if (waitqueue_active(&buf->read_wait))
+ /*
+ * Calling wake_up_interruptible() from here
+ * will deadlock if we happen to be logging
+ * from the scheduler (trying to re-grab
+ * rq->lock), so defer it.
+ */
+ mod_timer(&buf->timer, jiffies + 1);
}
old = buf->data;
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 3fbcb0d..4aa8a30 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -49,7 +49,7 @@ static int __res_counter_charge(struct res_counter *counter, unsigned long val,
r = ret = 0;
*limit_fail_at = NULL;
- local_irq_save_nort(flags);
+ local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
r = res_counter_charge_locked(c, val, force);
@@ -69,7 +69,7 @@ static int __res_counter_charge(struct res_counter *counter, unsigned long val,
spin_unlock(&u->lock);
}
}
- local_irq_restore_nort(flags);
+ local_irq_restore(flags);
return ret;
}
@@ -103,7 +103,7 @@ u64 res_counter_uncharge_until(struct res_counter *counter,
struct res_counter *c;
u64 ret = 0;
- local_irq_save_nort(flags);
+ local_irq_save(flags);
for (c = counter; c != top; c = c->parent) {
u64 r;
spin_lock(&c->lock);
@@ -112,7 +112,7 @@ u64 res_counter_uncharge_until(struct res_counter *counter,
ret = r;
spin_unlock(&c->lock);
}
- local_irq_restore_nort(flags);
+ local_irq_restore(flags);
return ret;
}
diff --git a/kernel/rt.c b/kernel/rt.c
deleted file mode 100644
index 5d17727..0000000
--- a/kernel/rt.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * kernel/rt.c
- *
- * Real-Time Preemption Support
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- *
- * historic credit for proving that Linux spinlocks can be implemented via
- * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
- * and others) who prototyped it on 2.4 and did lots of comparative
- * research and analysis; TimeSys, for proving that you can implement a
- * fully preemptible kernel via the use of IRQ threading and mutexes;
- * Bill Huey for persuasively arguing on lkml that the mutex model is the
- * right one; and to MontaVista, who ported pmutexes to 2.6.
- *
- * This code is a from-scratch implementation and is not based on pmutexes,
- * but the idea of converting spinlocks to mutexes is used here too.
- *
- * lock debugging, locking tree, deadlock detection:
- *
- * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
- * Released under the General Public License (GPL).
- *
- * Includes portions of the generic R/W semaphore implementation from:
- *
- * Copyright (c) 2001 David Howells (dhowells@redhat.com).
- * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- *
- * Pending ownership of locks and ownership stealing:
- *
- * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
- *
- * (also by Steven Rostedt)
- * - Converted single pi_lock to individual task locks.
- *
- * By Esben Nielsen:
- * Doing priority inheritance with help of the scheduler.
- *
- * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- * - major rework based on Esben Nielsens initial patch
- * - replaced thread_info references by task_struct refs
- * - removed task->pending_owner dependency
- * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
- * in the scheduler return path as discussed with Steven Rostedt
- *
- * Copyright (C) 2006, Kihon Technologies Inc.
- * Steven Rostedt <rostedt@goodmis.org>
- * - debugged and patched Thomas Gleixner's rework.
- * - added back the cmpxchg to the rework.
- * - turned atomic require back on for SMP.
- */
-
-#include <linux/spinlock.h>
-#include <linux/rtmutex.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/syscalls.h>
-#include <linux/interrupt.h>
-#include <linux/plist.h>
-#include <linux/fs.h>
-#include <linux/futex.h>
-#include <linux/hrtimer.h>
-
-#include "rtmutex_common.h"
-
-/*
- * struct mutex functions
- */
-void __mutex_do_init(struct mutex *mutex, const char *name,
- struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
- lockdep_init_map(&mutex->dep_map, name, key, 0);
-#endif
- mutex->lock.save_state = 0;
-}
-EXPORT_SYMBOL(__mutex_do_init);
-
-void __lockfunc _mutex_lock(struct mutex *lock)
-{
- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- rt_mutex_lock(&lock->lock);
-}
-EXPORT_SYMBOL(_mutex_lock);
-
-int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
-{
- int ret;
-
- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- ret = rt_mutex_lock_interruptible(&lock->lock, 0);
- if (ret)
- mutex_release(&lock->dep_map, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(_mutex_lock_interruptible);
-
-int __lockfunc _mutex_lock_killable(struct mutex *lock)
-{
- int ret;
-
- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- ret = rt_mutex_lock_killable(&lock->lock, 0);
- if (ret)
- mutex_release(&lock->dep_map, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(_mutex_lock_killable);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
-{
- mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
- rt_mutex_lock(&lock->lock);
-}
-EXPORT_SYMBOL(_mutex_lock_nested);
-
-void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
-{
- mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
- rt_mutex_lock(&lock->lock);
-}
-EXPORT_SYMBOL(_mutex_lock_nest_lock);
-
-int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
-{
- int ret;
-
- mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
- ret = rt_mutex_lock_interruptible(&lock->lock, 0);
- if (ret)
- mutex_release(&lock->dep_map, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
-
-int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
-{
- int ret;
-
- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- ret = rt_mutex_lock_killable(&lock->lock, 0);
- if (ret)
- mutex_release(&lock->dep_map, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(_mutex_lock_killable_nested);
-#endif
-
-int __lockfunc _mutex_trylock(struct mutex *lock)
-{
- int ret = rt_mutex_trylock(&lock->lock);
-
- if (ret)
- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-
- return ret;
-}
-EXPORT_SYMBOL(_mutex_trylock);
-
-void __lockfunc _mutex_unlock(struct mutex *lock)
-{
- mutex_release(&lock->dep_map, 1, _RET_IP_);
- rt_mutex_unlock(&lock->lock);
-}
-EXPORT_SYMBOL(_mutex_unlock);
-
-/*
- * rwlock_t functions
- */
-int __lockfunc rt_write_trylock(rwlock_t *rwlock)
-{
- int ret = rt_mutex_trylock(&rwlock->lock);
-
- if (ret) {
- rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
- migrate_disable();
- }
-
- return ret;
-}
-EXPORT_SYMBOL(rt_write_trylock);
-
-int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
-{
- int ret;
-
- *flags = 0;
- ret = rt_write_trylock(rwlock);
- return ret;
-}
-EXPORT_SYMBOL(rt_write_trylock_irqsave);
-
-int __lockfunc rt_read_trylock(rwlock_t *rwlock)
-{
- struct rt_mutex *lock = &rwlock->lock;
- int ret = 1;
-
- /*
- * recursive read locks succeed when current owns the lock,
- * but not when read_depth == 0 which means that the lock is
- * write locked.
- */
- if (rt_mutex_owner(lock) != current) {
- ret = rt_mutex_trylock(lock);
- if (ret) {
- rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
- migrate_disable();
- }
- } else if (!rwlock->read_depth) {
- ret = 0;
- }
-
- if (ret)
- rwlock->read_depth++;
-
- return ret;
-}
-EXPORT_SYMBOL(rt_read_trylock);
-
-void __lockfunc rt_write_lock(rwlock_t *rwlock)
-{
- rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- migrate_disable();
- __rt_spin_lock(&rwlock->lock);
-}
-EXPORT_SYMBOL(rt_write_lock);
-
-void __lockfunc rt_read_lock(rwlock_t *rwlock)
-{
- struct rt_mutex *lock = &rwlock->lock;
-
- /*
- * recursive read locks succeed when current owns the lock
- */
- if (rt_mutex_owner(lock) != current) {
- rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- __rt_spin_lock(lock);
- migrate_disable();
- }
- rwlock->read_depth++;
-}
-
-EXPORT_SYMBOL(rt_read_lock);
-
-void __lockfunc rt_write_unlock(rwlock_t *rwlock)
-{
- /* NOTE: we always pass in '1' for nested, for simplicity */
- rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- __rt_spin_unlock(&rwlock->lock);
- migrate_enable();
-}
-EXPORT_SYMBOL(rt_write_unlock);
-
-void __lockfunc rt_read_unlock(rwlock_t *rwlock)
-{
- /* Release the lock only when read_depth is down to 0 */
- if (--rwlock->read_depth == 0) {
- rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- __rt_spin_unlock(&rwlock->lock);
- migrate_enable();
- }
-}
-EXPORT_SYMBOL(rt_read_unlock);
-
-unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
-{
- rt_write_lock(rwlock);
-
- return 0;
-}
-EXPORT_SYMBOL(rt_write_lock_irqsave);
-
-unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
-{
- rt_read_lock(rwlock);
-
- return 0;
-}
-EXPORT_SYMBOL(rt_read_lock_irqsave);
-
-void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
- lockdep_init_map(&rwlock->dep_map, name, key, 0);
-#endif
- rwlock->lock.save_state = 1;
- rwlock->read_depth = 0;
-}
-EXPORT_SYMBOL(__rt_rwlock_init);
-
-/*
- * rw_semaphores
- */
-
-void rt_up_write(struct rw_semaphore *rwsem)
-{
- rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
- rt_mutex_unlock(&rwsem->lock);
-}
-EXPORT_SYMBOL(rt_up_write);
-
-void rt_up_read(struct rw_semaphore *rwsem)
-{
- if (--rwsem->read_depth == 0) {
- rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
- rt_mutex_unlock(&rwsem->lock);
- }
-}
-EXPORT_SYMBOL(rt_up_read);
-
-/*
- * downgrade a write lock into a read lock
- * - just wake up any readers at the front of the queue
- */
-void rt_downgrade_write(struct rw_semaphore *rwsem)
-{
- BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
- rwsem->read_depth = 1;
-}
-EXPORT_SYMBOL(rt_downgrade_write);
-
-int rt_down_write_trylock(struct rw_semaphore *rwsem)
-{
- int ret = rt_mutex_trylock(&rwsem->lock);
-
- if (ret)
- rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(rt_down_write_trylock);
-
-void rt_down_write(struct rw_semaphore *rwsem)
-{
- rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
- rt_mutex_lock(&rwsem->lock);
-}
-EXPORT_SYMBOL(rt_down_write);
-
-void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
-{
- rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
- rt_mutex_lock(&rwsem->lock);
-}
-EXPORT_SYMBOL(rt_down_write_nested);
-
-void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
- struct lockdep_map *nest)
-{
- rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
- rt_mutex_lock(&rwsem->lock);
-}
-
-int rt_down_read_trylock(struct rw_semaphore *rwsem)
-{
- struct rt_mutex *lock = &rwsem->lock;
- int ret = 1;
-
- /*
- * recursive read locks succeed when current owns the rwsem,
- * but not when read_depth == 0 which means that the rwsem is
- * write locked.
- */
- if (rt_mutex_owner(lock) != current) {
- ret = rt_mutex_trylock(&rwsem->lock);
- if (ret)
- rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
- } else if (!rwsem->read_depth) {
- ret = 0;
- }
-
- if (ret)
- rwsem->read_depth++;
- return ret;
-}
-EXPORT_SYMBOL(rt_down_read_trylock);
-
-static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
-{
- struct rt_mutex *lock = &rwsem->lock;
-
- if (rt_mutex_owner(lock) != current) {
- rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
- rt_mutex_lock(&rwsem->lock);
- }
- rwsem->read_depth++;
-}
-
-void rt_down_read(struct rw_semaphore *rwsem)
-{
- __rt_down_read(rwsem, 0);
-}
-EXPORT_SYMBOL(rt_down_read);
-
-void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
-{
- __rt_down_read(rwsem, subclass);
-}
-EXPORT_SYMBOL(rt_down_read_nested);
-
-void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
- struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
- lockdep_init_map(&rwsem->dep_map, name, key, 0);
-#endif
- rwsem->read_depth = 0;
- rwsem->lock.save_state = 0;
-}
-EXPORT_SYMBOL(__rt_rwsem_init);
-
-/**
- * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
- * @cnt: the atomic which we are to dec
- * @lock: the mutex to return holding if we dec to 0
- *
- * return true and hold lock if we dec to 0, return false otherwise
- */
-int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
-{
- /* dec if we can't possibly hit 0 */
- if (atomic_add_unless(cnt, -1, 1))
- return 0;
- /* we might hit 0, so take the lock */
- mutex_lock(lock);
- if (!atomic_dec_and_test(cnt)) {
- /* when we actually did the dec, we didn't hit 0 */
- mutex_unlock(lock);
- return 0;
- }
- /* we hit 0, and we hold the lock */
- return 1;
-}
-EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 4057bc6..0dd6aec 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -8,12 +8,6 @@
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
*
- * Adaptive Spinlocks:
- * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
- * and Peter Morreale,
- * Adaptive Spinlocks simplification:
- * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
- *
* See Documentation/rt-mutex-design.txt for details.
*/
#include <linux/spinlock.h>
@@ -21,7 +15,6 @@
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/timer.h>
-#include <linux/ww_mutex.h>
#include "rtmutex_common.h"
@@ -75,12 +68,6 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
clear_rt_mutex_waiters(lock);
}
-static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
-{
- return waiter && waiter != PI_WAKEUP_INPROGRESS &&
- waiter != PI_REQUEUE_INPROGRESS;
-}
-
/*
* We can speed up the acquire/release, if the architecture
* supports cmpxchg and if there's no debugging state to be set up
@@ -104,12 +91,6 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
}
#endif
-static inline void init_lists(struct rt_mutex *lock)
-{
- if (unlikely(!lock->wait_list.node_list.prev))
- plist_head_init(&lock->wait_list);
-}
-
/*
* Calculate task priority from the waiter list priority
*
@@ -126,18 +107,6 @@ int rt_mutex_getprio(struct task_struct *task)
}
/*
- * Called by sched_setscheduler() to check whether the priority change
- * is overruled by a possible priority boosting.
- */
-int rt_mutex_check_prio(struct task_struct *task, int newprio)
-{
- if (!task_has_pi_waiters(task))
- return 0;
-
- return task_top_pi_waiter(task)->pi_list_entry.prio <= newprio;
-}
-
-/*
* Adjust the priority of a task, after its pi_waiters got modified.
*
* This can be both boosting and unboosting. task->pi_lock must be held.
@@ -168,14 +137,6 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
-static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
-{
- if (waiter->savestate)
- wake_up_lock_sleeper(waiter->task);
- else
- wake_up_process(waiter->task);
-}
-
/*
* Max number of times we'll walk the boosting chain:
*/
@@ -249,7 +210,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* reached or the state of the chain has changed while we
* dropped the locks.
*/
- if (!rt_mutex_real_waiter(waiter))
+ if (!waiter)
goto out_unlock_pi;
/*
@@ -300,15 +261,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
if (!rt_mutex_owner(lock)) {
- struct rt_mutex_waiter *lock_top_waiter;
-
/*
* If the requeue above changed the top waiter, then we need
* to wake the new top waiter up to try to get the lock.
*/
- lock_top_waiter = rt_mutex_top_waiter(lock);
- if (top_waiter != lock_top_waiter)
- rt_mutex_wake_waiter(lock_top_waiter);
+
+ if (top_waiter != rt_mutex_top_waiter(lock))
+ wake_up_process(rt_mutex_top_waiter(lock)->task);
raw_spin_unlock(&lock->wait_lock);
goto out_put_task;
}
@@ -353,25 +312,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
return ret;
}
-
-#define STEAL_NORMAL 0
-#define STEAL_LATERAL 1
-
-/*
- * Note that RT tasks are excluded from lateral-steals to prevent the
- * introduction of an unbounded latency
- */
-static inline int lock_is_stealable(struct task_struct *task,
- struct task_struct *pendowner, int mode)
-{
- if (mode == STEAL_NORMAL || rt_task(task)) {
- if (task->prio >= pendowner->prio)
- return 0;
- } else if (task->prio > pendowner->prio)
- return 0;
- return 1;
-}
-
/*
* Try to take an rt-mutex
*
@@ -381,9 +321,8 @@ static inline int lock_is_stealable(struct task_struct *task,
* @task: the task which wants to acquire the lock
* @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
*/
-static int
-__try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
- struct rt_mutex_waiter *waiter, int mode)
+static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
{
/*
* We have to be careful here if the atomic speedups are
@@ -416,14 +355,12 @@ __try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* 3) it is top waiter
*/
if (rt_mutex_has_waiters(lock)) {
- struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
-
- if (task != pown && !lock_is_stealable(task, pown, mode))
- return 0;
+ if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
+ if (!waiter || waiter != rt_mutex_top_waiter(lock))
+ return 0;
+ }
}
- /* We got the lock. */
-
if (waiter || rt_mutex_has_waiters(lock)) {
unsigned long flags;
struct rt_mutex_waiter *top;
@@ -448,6 +385,7 @@ __try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
+ /* We got the lock. */
debug_rt_mutex_lock(lock);
rt_mutex_set_owner(lock, task);
@@ -457,13 +395,6 @@ __try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
return 1;
}
-static inline int
-try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
- struct rt_mutex_waiter *waiter)
-{
- return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
-}
-
/*
* Task blocks on lock.
*
@@ -482,23 +413,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
int chain_walk = 0, res;
raw_spin_lock_irqsave(&task->pi_lock, flags);
-
- /*
- * In the case of futex requeue PI, this will be a proxy
- * lock. The task will wake unaware that it is enqueueed on
- * this lock. Avoid blocking on two locks and corrupting
- * pi_blocked_on via the PI_WAKEUP_INPROGRESS
- * flag. futex_wait_requeue_pi() sets this when it wakes up
- * before requeue (due to a signal or timeout). Do not enqueue
- * the task if PI_WAKEUP_INPROGRESS is set.
- */
- if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- return -EAGAIN;
- }
-
- BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
-
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
@@ -523,7 +437,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
__rt_mutex_adjust_prio(owner);
- if (rt_mutex_real_waiter(owner->pi_blocked_on))
+ if (owner->pi_blocked_on)
chain_walk = 1;
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
}
@@ -578,7 +492,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- rt_mutex_wake_waiter(waiter);
+ wake_up_process(waiter->task);
}
/*
@@ -617,7 +531,7 @@ static void remove_waiter(struct rt_mutex *lock,
}
__rt_mutex_adjust_prio(owner);
- if (rt_mutex_real_waiter(owner->pi_blocked_on))
+ if (owner->pi_blocked_on)
chain_walk = 1;
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
@@ -651,371 +565,23 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
- if (!rt_mutex_real_waiter(waiter) ||
- waiter->list_entry.prio == task->prio) {
+ if (!waiter || waiter->list_entry.prio == task->prio) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * preemptible spin_lock functions:
- */
-static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
- void (*slowfn)(struct rt_mutex *lock))
-{
- might_sleep();
-
- if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
- rt_mutex_deadlock_account_lock(lock, current);
- else
- slowfn(lock);
-}
-
-static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
- void (*slowfn)(struct rt_mutex *lock))
-{
- if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
- rt_mutex_deadlock_account_unlock(current);
- else
- slowfn(lock);
-}
-
-#ifdef CONFIG_SMP
-/*
- * Note that owner is a speculative pointer and dereferencing relies
- * on rcu_read_lock() and the check against the lock owner.
- */
-static int adaptive_wait(struct rt_mutex *lock,
- struct task_struct *owner)
-{
- int res = 0;
-
- rcu_read_lock();
- for (;;) {
- if (owner != rt_mutex_owner(lock))
- break;
- /*
- * Ensure that owner->on_cpu is dereferenced _after_
- * checking the above to be valid.
- */
- barrier();
- if (!owner->on_cpu) {
- res = 1;
- break;
- }
- cpu_relax();
- }
- rcu_read_unlock();
- return res;
-}
-#else
-static int adaptive_wait(struct rt_mutex *lock,
- struct task_struct *orig_owner)
-{
- return 1;
-}
-#endif
-
-# define pi_lock(lock) raw_spin_lock_irq(lock)
-# define pi_unlock(lock) raw_spin_unlock_irq(lock)
-
-/*
- * Slow path lock function spin_lock style: this variant is very
- * careful not to miss any non-lock wakeups.
- *
- * We store the current state under p->pi_lock in p->saved_state and
- * the try_to_wake_up() code handles this accordingly.
- */
-static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
-{
- struct task_struct *lock_owner, *self = current;
- struct rt_mutex_waiter waiter, *top_waiter;
- int ret;
-
- rt_mutex_init_waiter(&waiter, true);
-
- raw_spin_lock(&lock->wait_lock);
- init_lists(lock);
-
- if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
- raw_spin_unlock(&lock->wait_lock);
- return;
- }
-
- BUG_ON(rt_mutex_owner(lock) == self);
-
- /*
- * We save whatever state the task is in and we'll restore it
- * after acquiring the lock taking real wakeups into account
- * as well. We are serialized via pi_lock against wakeups. See
- * try_to_wake_up().
- */
- pi_lock(&self->pi_lock);
- self->saved_state = self->state;
- __set_current_state(TASK_UNINTERRUPTIBLE);
- pi_unlock(&self->pi_lock);
-
- ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
- BUG_ON(ret);
-
- for (;;) {
- /* Try to acquire the lock again. */
- if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
- break;
-
- top_waiter = rt_mutex_top_waiter(lock);
- lock_owner = rt_mutex_owner(lock);
-
- raw_spin_unlock(&lock->wait_lock);
-
- debug_rt_mutex_print_deadlock(&waiter);
-
- if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
- schedule_rt_mutex(lock);
-
- raw_spin_lock(&lock->wait_lock);
-
- pi_lock(&self->pi_lock);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- pi_unlock(&self->pi_lock);
- }
-
- /*
- * Restore the task state to current->saved_state. We set it
- * to the original state above and the try_to_wake_up() code
- * has possibly updated it when a real (non-rtmutex) wakeup
- * happened while we were blocked. Clear saved_state so
- * try_to_wakeup() does not get confused.
- */
- pi_lock(&self->pi_lock);
- __set_current_state(self->saved_state);
- self->saved_state = TASK_RUNNING;
- pi_unlock(&self->pi_lock);
-
- /*
- * try_to_take_rt_mutex() sets the waiter bit
- * unconditionally. We might have to fix that up:
- */
- fixup_rt_mutex_waiters(lock);
-
- BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
- BUG_ON(!plist_node_empty(&waiter.list_entry));
-
- raw_spin_unlock(&lock->wait_lock);
-
- debug_rt_mutex_free_waiter(&waiter);
-}
-
-/*
- * Slow path to release a rt_mutex spin_lock style
- */
-static void __sched __rt_spin_lock_slowunlock(struct rt_mutex *lock)
-{
- debug_rt_mutex_unlock(lock);
-
- rt_mutex_deadlock_account_unlock(current);
-
- if (!rt_mutex_has_waiters(lock)) {
- lock->owner = NULL;
- raw_spin_unlock(&lock->wait_lock);
- return;
- }
-
- wakeup_next_waiter(lock);
-
- raw_spin_unlock(&lock->wait_lock);
-
- /* Undo pi boosting.when necessary */
- rt_mutex_adjust_prio(current);
-}
-
-static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
-{
- raw_spin_lock(&lock->wait_lock);
- __rt_spin_lock_slowunlock(lock);
-}
-
-static void noinline __sched rt_spin_lock_slowunlock_hirq(struct rt_mutex *lock)
-{
- int ret;
-
- do {
- ret = raw_spin_trylock(&lock->wait_lock);
- } while (!ret);
-
- __rt_spin_lock_slowunlock(lock);
-}
-
-void __lockfunc rt_spin_lock(spinlock_t *lock)
-{
- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
- spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-}
-EXPORT_SYMBOL(rt_spin_lock);
-
-void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
-{
- rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
-}
-EXPORT_SYMBOL(__rt_spin_lock);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
-{
- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
- spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-}
-EXPORT_SYMBOL(rt_spin_lock_nested);
-#endif
-
-void __lockfunc rt_spin_unlock(spinlock_t *lock)
-{
- /* NOTE: we always pass in '1' for nested, for simplicity */
- spin_release(&lock->dep_map, 1, _RET_IP_);
- rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
-}
-EXPORT_SYMBOL(rt_spin_unlock);
-
-void __lockfunc rt_spin_unlock_after_trylock_in_irq(spinlock_t *lock)
-{
- /* NOTE: we always pass in '1' for nested, for simplicity */
- spin_release(&lock->dep_map, 1, _RET_IP_);
- rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_hirq);
-}
-
-void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
-{
- rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
-}
-EXPORT_SYMBOL(__rt_spin_unlock);
-
-/*
- * Wait for the lock to get unlocked: instead of polling for an unlock
- * (like raw spinlocks do), we lock and unlock, to force the kernel to
- * schedule if there's contention:
- */
-void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
-{
- spin_lock(lock);
- spin_unlock(lock);
-}
-EXPORT_SYMBOL(rt_spin_unlock_wait);
-
-int __lockfunc rt_spin_trylock(spinlock_t *lock)
-{
- int ret = rt_mutex_trylock(&lock->lock);
-
- if (ret)
- spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(rt_spin_trylock);
-
-int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
-{
- int ret;
-
- local_bh_disable();
- ret = rt_mutex_trylock(&lock->lock);
- if (ret) {
- migrate_disable();
- spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
- } else
- local_bh_enable();
- return ret;
-}
-EXPORT_SYMBOL(rt_spin_trylock_bh);
-
-int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
-{
- int ret;
-
- *flags = 0;
- ret = rt_mutex_trylock(&lock->lock);
- if (ret) {
- migrate_disable();
- spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
- }
- return ret;
-}
-EXPORT_SYMBOL(rt_spin_trylock_irqsave);
-
-int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
-{
- /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
- if (atomic_add_unless(atomic, -1, 1))
- return 0;
- rt_spin_lock(lock);
- if (atomic_dec_and_test(atomic)){
- migrate_disable();
- return 1;
- }
- rt_spin_unlock(lock);
- return 0;
-}
-EXPORT_SYMBOL(atomic_dec_and_spin_lock);
-
-void
-__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)lock, sizeof(*lock));
- lockdep_init_map(&lock->dep_map, name, key, 0);
-#endif
-}
-EXPORT_SYMBOL(__rt_spin_lock_init);
-
-#endif /* PREEMPT_RT_FULL */
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-static inline int __sched
-__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
-{
- struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
- struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
-
- if (!hold_ctx)
- return 0;
-
- if (unlikely(ctx == hold_ctx))
- return -EALREADY;
-
- if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
- (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
- ctx->contending_lock = ww;
-#endif
- return -EDEADLK;
- }
-
- return 0;
-}
-#else
-static inline int __sched
-__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
-{
- BUG();
- return 0;
-}
-
-#endif
-
/**
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
* @state: the state the task should block in (TASK_INTERRUPTIBLE
- * or TASK_UNINTERRUPTIBLE)
+ * or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
*
@@ -1024,8 +590,7 @@ __mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter,
- struct ww_acquire_ctx *ww_ctx)
+ struct rt_mutex_waiter *waiter)
{
int ret = 0;
@@ -1048,12 +613,6 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- if (ww_ctx && ww_ctx->acquired > 0) {
- ret = __mutex_lock_check_stamp(lock, ww_ctx);
- if (ret)
- break;
- }
-
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1067,102 +626,23 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
return ret;
}
-static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
- struct ww_acquire_ctx *ww_ctx)
-{
-#ifdef CONFIG_DEBUG_MUTEXES
- /*
- * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
- * but released with a normal mutex_unlock in this call.
- *
- * This should never happen, always use ww_mutex_unlock.
- */
- DEBUG_LOCKS_WARN_ON(ww->ctx);
-
- /*
- * Not quite done after calling ww_acquire_done() ?
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
-
- if (ww_ctx->contending_lock) {
- /*
- * After -EDEADLK you tried to
- * acquire a different ww_mutex? Bad!
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
-
- /*
- * You called ww_mutex_lock after receiving -EDEADLK,
- * but 'forgot' to unlock everything else first?
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
- ww_ctx->contending_lock = NULL;
- }
-
- /*
- * Naughty, using a different class will lead to undefined behavior!
- */
- DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
-#endif
- ww_ctx->acquired++;
-}
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-static void ww_mutex_account_lock(struct rt_mutex *lock,
- struct ww_acquire_ctx *ww_ctx)
-{
- struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
- struct rt_mutex_waiter *waiter;
-
- /*
- * This branch gets optimized out for the common case,
- * and is only important for ww_mutex_lock.
- */
- ww_mutex_lock_acquired(ww, ww_ctx);
- ww->ctx = ww_ctx;
-
- /*
- * Give any possible sleeping processes the chance to wake up,
- * so they can recheck if they have to back off.
- */
- plist_for_each_entry(waiter, &lock->wait_list, list_entry) {
-
- /* XXX debug rt mutex waiter wakeup */
-
- BUG_ON(waiter->lock != lock);
- rt_mutex_wake_waiter(waiter);
- }
-}
-
-#else
-
-static void ww_mutex_account_lock(struct rt_mutex *lock,
- struct ww_acquire_ctx *ww_ctx)
-{
- BUG();
-}
-#endif
-
/*
* Slow path lock function:
*/
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- int detect_deadlock, struct ww_acquire_ctx *ww_ctx)
+ int detect_deadlock)
{
struct rt_mutex_waiter waiter;
int ret = 0;
- rt_mutex_init_waiter(&waiter, false);
+ debug_rt_mutex_init_waiter(&waiter);
raw_spin_lock(&lock->wait_lock);
- init_lists(lock);
/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock, current, NULL)) {
- if (ww_ctx)
- ww_mutex_account_lock(lock, ww_ctx);
raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -1179,14 +659,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
if (likely(!ret))
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, ww_ctx);
+ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
set_current_state(TASK_RUNNING);
if (unlikely(ret))
remove_waiter(lock, &waiter);
- else if (ww_ctx)
- ww_mutex_account_lock(lock, ww_ctx);
/*
* try_to_take_rt_mutex() sets the waiter bit
@@ -1213,9 +691,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
{
int ret = 0;
- if (!raw_spin_trylock(&lock->wait_lock))
- return ret;
- init_lists(lock);
+ raw_spin_lock(&lock->wait_lock);
if (likely(rt_mutex_owner(lock) != current)) {
@@ -1266,33 +742,30 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
*/
static inline int
rt_mutex_fastlock(struct rt_mutex *lock, int state,
- int detect_deadlock, struct ww_acquire_ctx *ww_ctx,
+ int detect_deadlock,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- int detect_deadlock,
- struct ww_acquire_ctx *ww_ctx))
+ int detect_deadlock))
{
if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
- return slowfn(lock, state, NULL, detect_deadlock, ww_ctx);
+ return slowfn(lock, state, NULL, detect_deadlock);
}
static inline int
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout, int detect_deadlock,
- struct ww_acquire_ctx *ww_ctx,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- int detect_deadlock,
- struct ww_acquire_ctx *ww_ctx))
+ int detect_deadlock))
{
if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
- return slowfn(lock, state, timeout, detect_deadlock, ww_ctx);
+ return slowfn(lock, state, timeout, detect_deadlock);
}
static inline int
@@ -1325,19 +798,19 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
{
might_sleep();
- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, rt_mutex_slowlock);
+ rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);
/**
* rt_mutex_lock_interruptible - lock a rt_mutex interruptible
*
- * @lock: the rt_mutex to be locked
+ * @lock: the rt_mutex to be locked
* @detect_deadlock: deadlock detection on/off
*
* Returns:
- * 0 on success
- * -EINTR when interrupted by a signal
+ * 0 on success
+ * -EINTR when interrupted by a signal
* -EDEADLK when the lock would deadlock (when deadlock detection is on)
*/
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
@@ -1346,43 +819,22 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
might_sleep();
return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
- detect_deadlock, NULL, rt_mutex_slowlock);
+ detect_deadlock, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
/**
- * rt_mutex_lock_killable - lock a rt_mutex killable
- *
- * @lock: the rt_mutex to be locked
- * @detect_deadlock: deadlock detection on/off
- *
- * Returns:
- * 0 on success
- * -EINTR when interrupted by a signal
- * -EDEADLK when the lock would deadlock (when deadlock detection is on)
- */
-int __sched rt_mutex_lock_killable(struct rt_mutex *lock,
- int detect_deadlock)
-{
- might_sleep();
-
- return rt_mutex_fastlock(lock, TASK_KILLABLE,
- detect_deadlock, NULL, rt_mutex_slowlock);
-}
-EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
-
-/**
* rt_mutex_timed_lock - lock a rt_mutex interruptible
* the timeout structure is provided
* by the caller
*
- * @lock: the rt_mutex to be locked
+ * @lock: the rt_mutex to be locked
* @timeout: timeout structure or NULL (no timeout)
* @detect_deadlock: deadlock detection on/off
*
* Returns:
- * 0 on success
- * -EINTR when interrupted by a signal
+ * 0 on success
+ * -EINTR when interrupted by a signal
* -ETIMEDOUT when the timeout expired
* -EDEADLK when the lock would deadlock (when deadlock detection is on)
*/
@@ -1393,7 +845,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
might_sleep();
return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
- detect_deadlock, NULL, rt_mutex_slowlock);
+ detect_deadlock, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
@@ -1451,11 +903,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
+ raw_spin_lock_init(&lock->wait_lock);
plist_head_init(&lock->wait_list);
debug_rt_mutex_init(lock, name);
}
-EXPORT_SYMBOL(__rt_mutex_init);
+EXPORT_SYMBOL_GPL(__rt_mutex_init);
/**
* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
@@ -1470,7 +923,7 @@ EXPORT_SYMBOL(__rt_mutex_init);
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
- rt_mutex_init(lock);
+ __rt_mutex_init(lock, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
@@ -1519,35 +972,6 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
return 1;
}
-#ifdef CONFIG_PREEMPT_RT_FULL
- /*
- * In PREEMPT_RT there's an added race.
- * If the task, that we are about to requeue, times out,
- * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
- * to skip this task. But right after the task sets
- * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
- * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
- * This will replace the PI_WAKEUP_INPROGRESS with the actual
- * lock that it blocks on. We *must not* place this task
- * on this proxy lock in that case.
- *
- * To prevent this race, we first take the task's pi_lock
- * and check if it has updated its pi_blocked_on. If it has,
- * we assume that it woke up and we return -EAGAIN.
- * Otherwise, we set the task's pi_blocked_on to
- * PI_REQUEUE_INPROGRESS, so that if the task is waking up
- * it will know that we are in the process of requeuing it.
- */
- raw_spin_lock_irq(&task->pi_lock);
- if (task->pi_blocked_on) {
- raw_spin_unlock_irq(&task->pi_lock);
- raw_spin_unlock(&lock->wait_lock);
- return -EAGAIN;
- }
- task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
- raw_spin_unlock_irq(&task->pi_lock);
-#endif
-
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
if (ret && !rt_mutex_owner(lock)) {
@@ -1617,7 +1041,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
set_current_state(TASK_INTERRUPTIBLE);
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
set_current_state(TASK_RUNNING);
@@ -1634,88 +1058,3 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
return ret;
}
-
-static inline int
-ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
-{
-#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
- unsigned tmp;
-
- if (ctx->deadlock_inject_countdown-- == 0) {
- tmp = ctx->deadlock_inject_interval;
- if (tmp > UINT_MAX/4)
- tmp = UINT_MAX;
- else
- tmp = tmp*2 + tmp + tmp/2;
-
- ctx->deadlock_inject_interval = tmp;
- ctx->deadlock_inject_countdown = tmp;
- ctx->contending_lock = lock;
-
- ww_mutex_unlock(lock);
-
- return -EDEADLK;
- }
-#endif
-
- return 0;
-}
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-int __sched
-__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
-{
- int ret;
-
- might_sleep();
-
- mutex_acquire(&lock->base.dep_map, 0, 0, _RET_IP_);
- ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
- if (ret)
- mutex_release(&lock->base.dep_map, 1, _RET_IP_);
- else if (!ret && ww_ctx->acquired > 1)
- return ww_mutex_deadlock_injection(lock, ww_ctx);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
-
-int __sched
-__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
-{
- int ret;
-
- might_sleep();
-
- mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map,
- _RET_IP_);
- ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
- if (ret)
- mutex_release(&lock->base.dep_map, 1, _RET_IP_);
- else if (!ret && ww_ctx->acquired > 1)
- return ww_mutex_deadlock_injection(lock, ww_ctx);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(__ww_mutex_lock);
-
-void __sched ww_mutex_unlock(struct ww_mutex *lock)
-{
- /*
- * The unlocking fastpath is the 0->1 transition from 'locked'
- * into 'unlocked' state:
- */
- if (lock->ctx) {
-#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
-#endif
- if (lock->ctx->acquired > 0)
- lock->ctx->acquired--;
- lock->ctx = NULL;
- }
-
- mutex_release(&lock->base.dep_map, 1, _RET_IP_);
- rt_mutex_unlock(&lock->base.lock);
-}
-EXPORT_SYMBOL(ww_mutex_unlock);
-#endif
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 6ec3dc1..53a66c8 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -49,7 +49,6 @@ struct rt_mutex_waiter {
struct plist_node pi_list_entry;
struct task_struct *task;
struct rt_mutex *lock;
- bool savestate;
#ifdef CONFIG_DEBUG_RT_MUTEXES
unsigned long ip;
struct pid *deadlock_task_pid;
@@ -104,9 +103,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
/*
* PI-futex support (proxy locking functions, etc.):
*/
-#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
-#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
-
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
@@ -127,12 +123,4 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
# include "rtmutex.h"
#endif
-static inline void
-rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
-{
- debug_rt_mutex_init_waiter(waiter);
- waiter->task = NULL;
- waiter->savestate = savestate;
-}
-
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8749d20..a494ace 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -272,11 +272,7 @@ late_initcall(sched_init_debug);
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
-#ifndef CONFIG_PREEMPT_RT_FULL
const_debug unsigned int sysctl_sched_nr_migrate = 32;
-#else
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
-#endif
/*
* period over which we average the RT time consumption, measured
@@ -495,7 +491,6 @@ static void init_rq_hrtick(struct rq *rq)
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
- rq->hrtick_timer.irqsafe = 1;
}
#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
@@ -540,37 +535,6 @@ void resched_task(struct task_struct *p)
smp_send_reschedule(cpu);
}
-#ifdef CONFIG_PREEMPT_LAZY
-void resched_task_lazy(struct task_struct *p)
-{
- int cpu;
-
- if (!sched_feat(PREEMPT_LAZY)) {
- resched_task(p);
- return;
- }
-
- assert_raw_spin_locked(&task_rq(p)->lock);
-
- if (test_tsk_need_resched(p))
- return;
-
- if (test_tsk_need_resched_lazy(p))
- return;
-
- set_tsk_need_resched_lazy(p);
-
- cpu = task_cpu(p);
- if (cpu == smp_processor_id())
- return;
-
- /* NEED_RESCHED_LAZY must be visible before we test polling */
- smp_mb();
- if (!tsk_is_polling(p))
- smp_send_reschedule(cpu);
-}
-#endif
-
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -735,17 +699,6 @@ void resched_task(struct task_struct *p)
assert_raw_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p);
}
-#ifdef CONFIG_PREEMPT_LAZY
-void resched_task_lazy(struct task_struct *p)
-{
- if (!sched_feat(PREEMPT_LAZY)) {
- resched_task(p);
- return;
- }
- assert_raw_spin_locked(&task_rq(p)->lock);
- set_tsk_need_resched_lazy(p);
-}
-#endif
#endif /* CONFIG_SMP */
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
@@ -1071,18 +1024,6 @@ struct migration_arg {
static int migration_cpu_stop(void *data);
-static bool check_task_state(struct task_struct *p, long match_state)
-{
- bool match = false;
-
- raw_spin_lock_irq(&p->pi_lock);
- if (p->state == match_state || p->saved_state == match_state)
- match = true;
- raw_spin_unlock_irq(&p->pi_lock);
-
- return match;
-}
-
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -1127,7 +1068,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
- if (match_state && !check_task_state(p, match_state))
+ if (match_state && unlikely(p->state != match_state))
return 0;
cpu_relax();
}
@@ -1142,8 +1083,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
running = task_running(rq, p);
on_rq = p->on_rq;
ncsw = 0;
- if (!match_state || p->state == match_state
- || p->saved_state == match_state)
+ if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &flags);
@@ -1289,12 +1229,6 @@ out:
}
}
- /*
- * Clear PF_NO_SETAFFINITY, otherwise we wreckage
- * migrate_disable/enable. See optimization for
- * PF_NO_SETAFFINITY tasks there.
- */
- p->flags &= ~PF_NO_SETAFFINITY;
return dest_cpu;
}
@@ -1374,6 +1308,10 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
{
activate_task(rq, p, en_flags);
p->on_rq = 1;
+
+ /* if a worker is waking up, notify workqueue */
+ if (p->flags & PF_WQ_WORKER)
+ wq_worker_waking_up(p, cpu_of(rq));
}
/*
@@ -1548,27 +1486,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
smp_mb__before_spinlock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
- if (!(p->state & state)) {
- /*
- * The task might be running due to a spinlock sleeper
- * wakeup. Check the saved state and set it to running
- * if the wakeup condition is true.
- */
- if (!(wake_flags & WF_LOCK_SLEEPER)) {
- if (p->saved_state & state) {
- p->saved_state = TASK_RUNNING;
- success = 1;
- }
- }
+ if (!(p->state & state))
goto out;
- }
-
- /*
- * If this is a regular wakeup, then we can unconditionally
- * clear the saved state of a "lock sleeper".
- */
- if (!(wake_flags & WF_LOCK_SLEEPER))
- p->saved_state = TASK_RUNNING;
success = 1; /* we're going to change ->state */
cpu = task_cpu(p);
@@ -1611,6 +1530,42 @@ out:
}
/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not already there. The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+ struct rq *rq = task_rq(p);
+
+ if (WARN_ON_ONCE(rq != this_rq()) ||
+ WARN_ON_ONCE(p == current))
+ return;
+
+ lockdep_assert_held(&rq->lock);
+
+ if (!raw_spin_trylock(&p->pi_lock)) {
+ raw_spin_unlock(&rq->lock);
+ raw_spin_lock(&p->pi_lock);
+ raw_spin_lock(&rq->lock);
+ }
+
+ if (!(p->state & TASK_NORMAL))
+ goto out;
+
+ if (!p->on_rq)
+ ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+
+ ttwu_do_wakeup(rq, p, 0);
+ ttwu_stat(p, smp_processor_id(), 0);
+out:
+ raw_spin_unlock(&p->pi_lock);
+}
+
+/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
@@ -1624,23 +1579,11 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
- WARN_ON(__task_is_stopped_or_traced(p));
+ WARN_ON(task_is_stopped_or_traced(p));
return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
-/**
- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
- * @p: The process to be woken up.
- *
- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
- * the nature of the wakeup.
- */
-int wake_up_lock_sleeper(struct task_struct *p)
-{
- return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
-}
-
int wake_up_state(struct task_struct *p, unsigned int state)
{
return try_to_wake_up(p, state, 0);
@@ -1778,9 +1721,6 @@ void sched_fork(struct task_struct *p)
/* Want to start with kernel preemption disabled. */
task_thread_info(p)->preempt_count = 1;
#endif
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
- task_thread_info(p)->preempt_lazy_count = 0;
-#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
#endif
@@ -1947,12 +1887,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
finish_arch_post_lock_switch();
fire_sched_in_preempt_notifiers(current);
- /*
- * We use mmdrop_delayed() here so we don't have to do the
- * full __mmdrop() when we are the last user.
- */
if (mm)
- mmdrop_delayed(mm);
+ mmdrop(mm);
if (unlikely(prev_state == TASK_DEAD)) {
/*
* Remove function-return probe instances associated with this
@@ -2296,13 +2232,8 @@ void __kprobes add_preempt_count(int val)
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
#endif
- if (preempt_count() == val) {
- unsigned long ip = get_parent_ip(CALLER_ADDR1);
-#ifdef CONFIG_DEBUG_PREEMPT
- current->preempt_disable_ip = ip;
-#endif
- trace_preempt_off(CALLER_ADDR0, ip);
- }
+ if (preempt_count() == val)
+ trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
EXPORT_SYMBOL(add_preempt_count);
@@ -2345,13 +2276,6 @@ static noinline void __schedule_bug(struct task_struct *prev)
print_modules();
if (irqs_disabled())
print_irqtrace_events(prev);
-#ifdef CONFIG_DEBUG_PREEMPT
- if (in_atomic_preempt_off()) {
- pr_err("Preemption disabled at:");
- print_ip_sym(current->preempt_disable_ip);
- pr_cont("\n");
- }
-#endif
dump_stack();
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
@@ -2375,133 +2299,6 @@ static inline void schedule_debug(struct task_struct *prev)
schedstat_inc(this_rq(), sched_count);
}
-#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
-#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
-#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
-#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
-
-static inline void update_migrate_disable(struct task_struct *p)
-{
- const struct cpumask *mask;
-
- if (likely(!p->migrate_disable))
- return;
-
- /* Did we already update affinity? */
- if (unlikely(migrate_disabled_updated(p)))
- return;
-
- /*
- * Since this is always current we can get away with only locking
- * rq->lock, the ->cpus_allowed value can normally only be changed
- * while holding both p->pi_lock and rq->lock, but seeing that this
- * is current, we cannot actually be waking up, so all code that
- * relies on serialization against p->pi_lock is out of scope.
- *
- * Having rq->lock serializes us against things like
- * set_cpus_allowed_ptr() that can still happen concurrently.
- */
- mask = tsk_cpus_allowed(p);
-
- if (p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, mask);
- /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
- p->nr_cpus_allowed = 1;
-
- /* Let migrate_enable know to fix things back up */
- p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
-}
-
-void migrate_disable(void)
-{
- struct task_struct *p = current;
-
- if (in_atomic()) {
-#ifdef CONFIG_SCHED_DEBUG
- p->migrate_disable_atomic++;
-#endif
- return;
- }
-
-#ifdef CONFIG_SCHED_DEBUG
- if (unlikely(p->migrate_disable_atomic)) {
- tracing_off();
- WARN_ON_ONCE(1);
- }
-#endif
-
- if (p->migrate_disable) {
- p->migrate_disable++;
- return;
- }
-
- preempt_disable();
- preempt_lazy_disable();
- pin_current_cpu();
- p->migrate_disable = 1;
- preempt_enable();
-}
-EXPORT_SYMBOL(migrate_disable);
-
-void migrate_enable(void)
-{
- struct task_struct *p = current;
- const struct cpumask *mask;
- unsigned long flags;
- struct rq *rq;
-
- if (in_atomic()) {
-#ifdef CONFIG_SCHED_DEBUG
- p->migrate_disable_atomic--;
-#endif
- return;
- }
-
-#ifdef CONFIG_SCHED_DEBUG
- if (unlikely(p->migrate_disable_atomic)) {
- tracing_off();
- WARN_ON_ONCE(1);
- }
-#endif
- WARN_ON_ONCE(p->migrate_disable <= 0);
-
- if (migrate_disable_count(p) > 1) {
- p->migrate_disable--;
- return;
- }
-
- preempt_disable();
- if (unlikely(migrate_disabled_updated(p))) {
- /*
- * Undo whatever update_migrate_disable() did, also see there
- * about locking.
- */
- rq = this_rq();
- raw_spin_lock_irqsave(&rq->lock, flags);
-
- /*
- * Clearing migrate_disable causes tsk_cpus_allowed to
- * show the tasks original cpu affinity.
- */
- p->migrate_disable = 0;
- mask = tsk_cpus_allowed(p);
- if (p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, mask);
- p->nr_cpus_allowed = cpumask_weight(mask);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- } else
- p->migrate_disable = 0;
-
- unpin_current_cpu();
- preempt_enable();
- preempt_lazy_enable();
-}
-EXPORT_SYMBOL(migrate_enable);
-#else
-static inline void update_migrate_disable(struct task_struct *p) { }
-#define migrate_disabled_updated(p) 0
-#endif
-
static void put_prev_task(struct rq *rq, struct task_struct *prev)
{
if (prev->on_rq || rq->skip_clock_update < 0)
@@ -2601,8 +2398,6 @@ need_resched:
smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
- update_migrate_disable(prev);
-
switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev))) {
@@ -2610,6 +2405,19 @@ need_resched:
} else {
deactivate_task(rq, prev, DEQUEUE_SLEEP);
prev->on_rq = 0;
+
+ /*
+ * If a worker went to sleep, notify and ask workqueue
+ * whether it wants to wake up a task to maintain
+ * concurrency.
+ */
+ if (prev->flags & PF_WQ_WORKER) {
+ struct task_struct *to_wakeup;
+
+ to_wakeup = wq_worker_sleeping(prev, cpu);
+ if (to_wakeup)
+ try_to_wake_up_local(to_wakeup);
+ }
}
switch_count = &prev->nvcsw;
}
@@ -2622,7 +2430,6 @@ need_resched:
put_prev_task(rq, prev);
next = pick_next_task(rq);
clear_tsk_need_resched(prev);
- clear_tsk_need_resched_lazy(prev);
rq->skip_clock_update = 0;
if (likely(prev != next)) {
@@ -2653,14 +2460,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
{
if (!tsk->state || tsk_is_pi_blocked(tsk))
return;
-
- /*
- * If a worker went to sleep, notify and ask workqueue whether
- * it wants to wake up a task to maintain concurrency.
- */
- if (tsk->flags & PF_WQ_WORKER)
- wq_worker_sleeping(tsk);
-
/*
* If we are going to sleep and we have plugged IO queued,
* make sure to submit it to avoid deadlocks.
@@ -2669,19 +2468,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
blk_schedule_flush_plug(tsk);
}
-static inline void sched_update_worker(struct task_struct *tsk)
-{
- if (tsk->flags & PF_WQ_WORKER)
- wq_worker_running(tsk);
-}
-
asmlinkage void __sched schedule(void)
{
struct task_struct *tsk = current;
sched_submit_work(tsk);
__schedule();
- sched_update_worker(tsk);
}
EXPORT_SYMBOL(schedule);
@@ -2727,26 +2519,9 @@ asmlinkage void __sched notrace preempt_schedule(void)
if (likely(!preemptible()))
return;
-#ifdef CONFIG_PREEMPT_LAZY
- /*
- * Check for lazy preemption
- */
- if (current_thread_info()->preempt_lazy_count &&
- !test_thread_flag(TIF_NEED_RESCHED))
- return;
-#endif
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- /*
- * The add/subtract must not be traced by the function
- * tracer. But we still want to account for the
- * preempt off latency tracer. Since the _notrace versions
- * of add/subtract skip the accounting for latency tracer
- * we must force it manually.
- */
- start_critical_timings();
__schedule();
- stop_critical_timings();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -2919,10 +2694,10 @@ void complete(struct completion *x)
{
unsigned long flags;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
@@ -2939,10 +2714,10 @@ void complete_all(struct completion *x)
{
unsigned long flags;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
- __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
@@ -2951,20 +2726,20 @@ do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
- DEFINE_SWAITER(wait);
+ DECLARE_WAITQUEUE(wait, current);
- swait_prepare_locked(&x->wait, &wait);
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
- raw_spin_unlock_irq(&x->wait.lock);
+ spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
- raw_spin_lock_irq(&x->wait.lock);
+ spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
- swait_finish_locked(&x->wait, &wait);
+ __remove_wait_queue(&x->wait, &wait);
if (!x->done)
return timeout;
}
@@ -2978,9 +2753,9 @@ __wait_for_common(struct completion *x,
{
might_sleep();
- raw_spin_lock_irq(&x->wait.lock);
+ spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
- raw_spin_unlock_irq(&x->wait.lock);
+ spin_unlock_irq(&x->wait.lock);
return timeout;
}
@@ -3156,12 +2931,12 @@ bool try_wait_for_completion(struct completion *x)
unsigned long flags;
int ret = 1;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
else
x->done--;
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
@@ -3179,10 +2954,10 @@ bool completion_done(struct completion *x)
unsigned long flags;
int ret = 1;
- raw_spin_lock_irqsave(&x->wait.lock, flags);
+ spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(completion_done);
@@ -3243,8 +3018,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
* This function changes the 'effective' priority of a task. It does
* not touch ->normal_prio like __setscheduler().
*
- * Used by the rt_mutex code to implement priority inheritance
- * logic. Call site only calls if the priority of the task changed.
+ * Used by the rt_mutex code to implement priority inheritance logic.
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
@@ -3475,25 +3249,20 @@ static struct task_struct *find_process_by_pid(pid_t pid)
return pid ? find_task_by_vpid(pid) : current;
}
-static void __setscheduler_params(struct task_struct *p, int policy, int prio)
-{
- p->policy = policy;
- p->rt_priority = prio;
- p->normal_prio = normal_prio(p);
- set_load_weight(p);
-}
-
/* Actually do priority change: must hold rq lock. */
static void
__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
{
- __setscheduler_params(p, policy, prio);
+ p->policy = policy;
+ p->rt_priority = prio;
+ p->normal_prio = normal_prio(p);
/* we are holding p->pi_lock already */
p->prio = rt_mutex_getprio(p);
if (rt_prio(p->prio))
p->sched_class = &rt_sched_class;
else
p->sched_class = &fair_sched_class;
+ set_load_weight(p);
}
/*
@@ -3515,7 +3284,6 @@ static bool check_same_owner(struct task_struct *p)
static int __sched_setscheduler(struct task_struct *p, int policy,
const struct sched_param *param, bool user)
{
- int newprio = MAX_RT_PRIO - 1 - param->sched_priority;
int retval, oldprio, oldpolicy = -1, on_rq, running;
unsigned long flags;
const struct sched_class *prev_class;
@@ -3611,13 +3379,10 @@ recheck:
}
/*
- * If not changing anything there's no need to proceed
- * further, but store a possible modification of
- * reset_on_fork.
+ * If not changing anything there's no need to proceed further:
*/
if (unlikely(policy == p->policy && (!rt_policy(policy) ||
param->sched_priority == p->rt_priority))) {
- p->sched_reset_on_fork = reset_on_fork;
task_rq_unlock(rq, p, &flags);
return 0;
}
@@ -3643,25 +3408,6 @@ recheck:
task_rq_unlock(rq, p, &flags);
goto recheck;
}
-
- p->sched_reset_on_fork = reset_on_fork;
- oldprio = p->prio;
-
- /*
- * Special case for priority boosted tasks.
- *
- * If the new priority is lower or equal (user space view)
- * than the current (boosted) priority, we just store the new
- * normal parameters and do not touch the scheduler class and
- * the runqueue. This will be done when the task deboost
- * itself.
- */
- if (rt_mutex_check_prio(p, newprio)) {
- __setscheduler_params(p, policy, param->sched_priority);
- task_rq_unlock(rq, p, &flags);
- return 0;
- }
-
on_rq = p->on_rq;
running = task_current(rq, p);
if (on_rq)
@@ -3669,18 +3415,17 @@ recheck:
if (running)
p->sched_class->put_prev_task(rq, p);
+ p->sched_reset_on_fork = reset_on_fork;
+
+ oldprio = p->prio;
prev_class = p->sched_class;
__setscheduler(rq, p, policy, param->sched_priority);
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
- /*
- * We enqueue to tail when the priority of a task is
- * increased (user space view).
- */
- enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
- }
+ if (on_rq)
+ enqueue_task(rq, p, 0);
+
check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, p, &flags);
@@ -4056,17 +3801,9 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
- do {
- add_preempt_count(PREEMPT_ACTIVE);
- __schedule();
- sub_preempt_count(PREEMPT_ACTIVE);
- /*
- * Check again in case we missed a preemption
- * opportunity between schedule and now.
- */
- barrier();
-
- } while (need_resched());
+ add_preempt_count(PREEMPT_ACTIVE);
+ __schedule();
+ sub_preempt_count(PREEMPT_ACTIVE);
}
int __sched _cond_resched(void)
@@ -4107,7 +3844,6 @@ int __cond_resched_lock(spinlock_t *lock)
}
EXPORT_SYMBOL(__cond_resched_lock);
-#ifndef CONFIG_PREEMPT_RT_FULL
int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
@@ -4121,7 +3857,6 @@ int __sched __cond_resched_softirq(void)
return 0;
}
EXPORT_SYMBOL(__cond_resched_softirq);
-#endif
/**
* yield - yield the current processor to other threads.
@@ -4471,7 +4206,6 @@ void init_idle(struct task_struct *idle, int cpu)
rcu_read_unlock();
rq->curr = rq->idle = idle;
- idle->on_rq = 1;
#if defined(CONFIG_SMP)
idle->on_cpu = 1;
#endif
@@ -4479,9 +4213,7 @@ void init_idle(struct task_struct *idle, int cpu)
/* Set the preempt count _outside_ the spinlocks! */
task_thread_info(idle)->preempt_count = 0;
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
- task_thread_info(idle)->preempt_lazy_count = 0;
-#endif
+
/*
* The idle tasks have their own, simple scheduling class:
*/
@@ -4496,90 +4228,11 @@ void init_idle(struct task_struct *idle, int cpu)
#ifdef CONFIG_SMP
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
- if (!migrate_disabled_updated(p)) {
- if (p->sched_class && p->sched_class->set_cpus_allowed)
- p->sched_class->set_cpus_allowed(p, new_mask);
- p->nr_cpus_allowed = cpumask_weight(new_mask);
- }
- cpumask_copy(&p->cpus_allowed, new_mask);
-}
-
-static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
-static DEFINE_MUTEX(sched_down_mutex);
-static cpumask_t sched_down_cpumask;
-
-void tell_sched_cpu_down_begin(int cpu)
-{
- mutex_lock(&sched_down_mutex);
- cpumask_set_cpu(cpu, &sched_down_cpumask);
- mutex_unlock(&sched_down_mutex);
-}
-
-void tell_sched_cpu_down_done(int cpu)
-{
- mutex_lock(&sched_down_mutex);
- cpumask_clear_cpu(cpu, &sched_down_cpumask);
- mutex_unlock(&sched_down_mutex);
-}
+ if (p->sched_class && p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, new_mask);
-/**
- * migrate_me - try to move the current task off this cpu
- *
- * Used by the pin_current_cpu() code to try to get tasks
- * to move off the current CPU as it is going down.
- * It will only move the task if the task isn't pinned to
- * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
- * and the task has to be in a RUNNING state. Otherwise the
- * movement of the task will wake it up (change its state
- * to running) when the task did not expect it.
- *
- * Returns 1 if it succeeded in moving the current task
- * 0 otherwise.
- */
-int migrate_me(void)
-{
- struct task_struct *p = current;
- struct migration_arg arg;
- struct cpumask *cpumask;
- struct cpumask *mask;
- unsigned long flags;
- unsigned int dest_cpu;
- struct rq *rq;
-
- /*
- * We can not migrate tasks bounded to a CPU or tasks not
- * running. The movement of the task will wake it up.
- */
- if (p->flags & PF_NO_SETAFFINITY || p->state)
- return 0;
-
- mutex_lock(&sched_down_mutex);
- rq = task_rq_lock(p, &flags);
-
- cpumask = &__get_cpu_var(sched_cpumasks);
- mask = &p->cpus_allowed;
-
- cpumask_andnot(cpumask, mask, &sched_down_cpumask);
-
- if (!cpumask_weight(cpumask)) {
- /* It's only on this CPU? */
- task_rq_unlock(rq, p, &flags);
- mutex_unlock(&sched_down_mutex);
- return 0;
- }
-
- dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
-
- arg.task = p;
- arg.dest_cpu = dest_cpu;
-
- task_rq_unlock(rq, p, &flags);
-
- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
- tlb_migrate_finish(p->mm);
- mutex_unlock(&sched_down_mutex);
-
- return 1;
+ cpumask_copy(&p->cpus_allowed, new_mask);
+ p->nr_cpus_allowed = cpumask_weight(new_mask);
}
/*
@@ -4625,7 +4278,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
do_set_cpus_allowed(p, new_mask);
/* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
@@ -4714,8 +4367,6 @@ static int migration_cpu_stop(void *data)
#ifdef CONFIG_HOTPLUG_CPU
-static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
-
/*
* Ensures that the idle task is using init_mm right before its cpu goes
* offline.
@@ -4728,12 +4379,7 @@ void idle_task_exit(void)
if (mm != &init_mm)
switch_mm(mm, &init_mm, current);
-
- /*
- * Defer the cleanup to an alive cpu. On RT we can neither
- * call mmdrop() nor mmdrop_delayed() from here.
- */
- per_cpu(idle_last_mm, smp_processor_id()) = mm;
+ mmdrop(mm);
}
/*
@@ -5057,10 +4703,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD:
calc_load_migrate(rq);
- if (per_cpu(idle_last_mm, cpu)) {
- mmdrop(per_cpu(idle_last_mm, cpu));
- per_cpu(idle_last_mm, cpu) = NULL;
- }
break;
#endif
}
@@ -6933,8 +6575,7 @@ void __init sched_init(void)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
- int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
- sched_rcu_preempt_depth();
+ int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
return (nested == preempt_offset);
}
@@ -6944,8 +6585,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
static unsigned long prev_jiffy; /* ratelimiting */
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
- if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
- !is_idle_task(current)) ||
+ if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
system_state != SYSTEM_RUNNING || oops_in_progress)
return;
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
@@ -6963,13 +6603,6 @@ void __might_sleep(const char *file, int line, int preempt_offset)
debug_show_held_locks(current);
if (irqs_disabled())
print_irqtrace_events(current);
-#ifdef CONFIG_DEBUG_PREEMPT
- if (!preempt_count_equals(preempt_offset)) {
- pr_err("Preemption disabled at:");
- print_ip_sym(current->preempt_disable_ip);
- pr_cont("\n");
- }
-#endif
dump_stack();
}
EXPORT_SYMBOL(__might_sleep);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1681f49..9994791 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -655,45 +655,37 @@ static void __vtime_account_system(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_gen_account_irq_exit(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu;
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
delta_cpu = get_vtime_delta(tsk);
tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_user_enter(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
tsk->vtime_snap_whence = VTIME_USER;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_guest_enter(struct task_struct *tsk)
@@ -705,23 +697,19 @@ void vtime_guest_enter(struct task_struct *tsk)
* synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta.
*/
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags |= PF_VCPU;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk)
{
- raw_spin_lock(&tsk->vtime_lock);
- write_seqcount_begin(&tsk->vtime_seq);
+ write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags &= ~PF_VCPU;
- write_seqcount_end(&tsk->vtime_seq);
- raw_spin_unlock(&tsk->vtime_lock);
+ write_sequnlock(&tsk->vtime_seqlock);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
@@ -734,30 +722,24 @@ void vtime_account_idle(struct task_struct *tsk)
void arch_vtime_task_switch(struct task_struct *prev)
{
- raw_spin_lock(&prev->vtime_lock);
- write_seqcount_begin(&prev->vtime_seq);
+ write_seqlock(&prev->vtime_seqlock);
prev->vtime_snap_whence = VTIME_SLEEPING;
- write_seqcount_end(&prev->vtime_seq);
- raw_spin_unlock(&prev->vtime_lock);
+ write_sequnlock(&prev->vtime_seqlock);
- raw_spin_lock(&current->vtime_lock);
- write_seqcount_begin(&current->vtime_seq);
+ write_seqlock(&current->vtime_seqlock);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock_cpu(smp_processor_id());
- write_seqcount_end(&current->vtime_seq);
- raw_spin_unlock(&current->vtime_lock);
+ write_sequnlock(&current->vtime_seqlock);
}
void vtime_init_idle(struct task_struct *t, int cpu)
{
unsigned long flags;
- raw_spin_lock_irqsave(&t->vtime_lock, flags);
- write_seqcount_begin(&t->vtime_seq);
+ write_seqlock_irqsave(&t->vtime_seqlock, flags);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock_cpu(cpu);
- write_seqcount_end(&t->vtime_seq);
- raw_spin_unlock_irqrestore(&t->vtime_lock, flags);
+ write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
}
cputime_t task_gtime(struct task_struct *t)
@@ -766,13 +748,13 @@ cputime_t task_gtime(struct task_struct *t)
cputime_t gtime;
do {
- seq = read_seqcount_begin(&t->vtime_seq);
+ seq = read_seqbegin(&t->vtime_seqlock);
gtime = t->gtime;
if (t->flags & PF_VCPU)
gtime += vtime_delta(t);
- } while (read_seqcount_retry(&t->vtime_seq, seq));
+ } while (read_seqretry(&t->vtime_seqlock, seq));
return gtime;
}
@@ -795,7 +777,7 @@ fetch_task_cputime(struct task_struct *t,
*udelta = 0;
*sdelta = 0;
- seq = read_seqcount_begin(&t->vtime_seq);
+ seq = read_seqbegin(&t->vtime_seqlock);
if (u_dst)
*u_dst = *u_src;
@@ -819,7 +801,7 @@ fetch_task_cputime(struct task_struct *t,
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
- } while (read_seqcount_retry(&t->vtime_seq, seq));
+ } while (read_seqretry(&t->vtime_seqlock, seq));
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 70812af..fd9ca1d 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -256,9 +256,6 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
P(rt_throttled);
PN(rt_time);
PN(rt_runtime);
-#ifdef CONFIG_SMP
- P(rt_nr_migratory);
-#endif
#undef PN
#undef P
@@ -588,10 +585,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
#endif
P(policy);
P(prio);
-#ifdef CONFIG_PREEMPT_RT_FULL
- P(migrate_disable);
-#endif
- P(nr_cpus_allowed);
#undef PN
#undef __PN
#undef P
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0af1448..790e2fc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1902,7 +1902,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
@@ -1926,7 +1926,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return;
if (delta > ideal_runtime)
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
}
static void
@@ -2047,7 +2047,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* validating it and just reschedule.
*/
if (queued) {
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
return;
}
/*
@@ -2237,7 +2237,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
- resched_task_lazy(rq_of(cfs_rq)->curr);
+ resched_task(rq_of(cfs_rq)->curr);
}
static __always_inline
@@ -2837,7 +2837,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
if (delta < 0) {
if (rq->curr == p)
- resched_task_lazy(p);
+ resched_task(p);
return;
}
@@ -3704,7 +3704,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
preempt:
- resched_task_lazy(curr);
+ resched_task(curr);
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
@@ -5979,7 +5979,7 @@ static void task_fork_fair(struct task_struct *p)
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
- resched_task_lazy(rq->curr);
+ resched_task(rq->curr);
}
se->vruntime -= cfs_rq->min_vruntime;
@@ -6004,7 +6004,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
*/
if (rq->curr == p) {
if (p->prio > oldprio)
- resched_task_lazy(rq->curr);
+ resched_task(rq->curr);
} else
check_preempt_curr(rq, p, 0);
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 4594051..99399f8 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -50,18 +50,11 @@ SCHED_FEAT(LB_BIAS, true)
*/
SCHED_FEAT(NONTASK_POWER, true)
-#ifndef CONFIG_PREEMPT_RT_FULL
/*
* Queue remote wakeups on the target CPU and process them
* using the scheduler IPI. Reduces rq->lock contention/bounces.
*/
SCHED_FEAT(TTWU_QUEUE, true)
-#else
-SCHED_FEAT(TTWU_QUEUE, false)
-# ifdef CONFIG_PREEMPT_LAZY
-SCHED_FEAT(PREEMPT_LAZY, true)
-# endif
-#endif
SCHED_FEAT(FORCE_SD_OVERLAP, false)
SCHED_FEAT(RT_RUNTIME_SHARE, true)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 240fc60..ff04e1a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -43,7 +43,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- rt_b->rt_period_timer.irqsafe = 1;
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2843303..4f31059 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -898,7 +898,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* child wakeup after fork */
#define WF_MIGRATED 0x4 /* internal use, task got migrated */
-#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
@@ -1046,15 +1045,6 @@ extern void init_sched_fair_class(void);
extern void resched_task(struct task_struct *p);
extern void resched_cpu(int cpu);
-#ifdef CONFIG_PREEMPT_LAZY
-extern void resched_task_lazy(struct task_struct *tsk);
-#else
-static inline void resched_task_lazy(struct task_struct *tsk)
-{
- resched_task(tsk);
-}
-#endif
-
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
diff --git a/kernel/signal.c b/kernel/signal.c
index 3d32f54..ded28b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -14,7 +14,6 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/sched.h>
-#include <linux/sched/rt.h>
#include <linux/fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
@@ -350,45 +349,13 @@ static bool task_participate_group_stop(struct task_struct *task)
return false;
}
-#ifdef __HAVE_ARCH_CMPXCHG
-static inline struct sigqueue *get_task_cache(struct task_struct *t)
-{
- struct sigqueue *q = t->sigqueue_cache;
-
- if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
- return NULL;
- return q;
-}
-
-static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
-{
- if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
- return 0;
- return 1;
-}
-
-#else
-
-static inline struct sigqueue *get_task_cache(struct task_struct *t)
-{
- return NULL;
-}
-
-static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
-{
- return 1;
-}
-
-#endif
-
/*
* allocate a new signal queue record
* - this may be called without locks if and only if t == current, otherwise an
* appropriate lock must be held to stop the target task from exiting
*/
static struct sigqueue *
-__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
- int override_rlimit, int fromslab)
+__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
{
struct sigqueue *q = NULL;
struct user_struct *user;
@@ -405,10 +372,7 @@ __sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
if (override_rlimit ||
atomic_read(&user->sigpending) <=
task_rlimit(t, RLIMIT_SIGPENDING)) {
- if (!fromslab)
- q = get_task_cache(t);
- if (!q)
- q = kmem_cache_alloc(sigqueue_cachep, flags);
+ q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
print_dropped_signal(sig);
}
@@ -425,13 +389,6 @@ __sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
return q;
}
-static struct sigqueue *
-__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
- int override_rlimit)
-{
- return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
-}
-
static void __sigqueue_free(struct sigqueue *q)
{
if (q->flags & SIGQUEUE_PREALLOC)
@@ -441,21 +398,6 @@ static void __sigqueue_free(struct sigqueue *q)
kmem_cache_free(sigqueue_cachep, q);
}
-static void sigqueue_free_current(struct sigqueue *q)
-{
- struct user_struct *up;
-
- if (q->flags & SIGQUEUE_PREALLOC)
- return;
-
- up = q->user;
- if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
- atomic_dec(&up->sigpending);
- free_uid(up);
- } else
- __sigqueue_free(q);
-}
-
void flush_sigqueue(struct sigpending *queue)
{
struct sigqueue *q;
@@ -469,21 +411,6 @@ void flush_sigqueue(struct sigpending *queue)
}
/*
- * Called from __exit_signal. Flush tsk->pending and
- * tsk->sigqueue_cache
- */
-void flush_task_sigqueue(struct task_struct *tsk)
-{
- struct sigqueue *q;
-
- flush_sigqueue(&tsk->pending);
-
- q = get_task_cache(tsk);
- if (q)
- kmem_cache_free(sigqueue_cachep, q);
-}
-
-/*
* Flush all pending signals for a task.
*/
void __flush_signals(struct task_struct *t)
@@ -635,7 +562,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
still_pending:
list_del_init(&first->list);
copy_siginfo(info, &first->info);
- sigqueue_free_current(first);
+ __sigqueue_free(first);
} else {
/*
* Ok, it wasn't in the queue. This must be
@@ -681,8 +608,6 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
{
int signr;
- WARN_ON_ONCE(tsk != current);
-
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
@@ -1305,8 +1230,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
* We don't want to have recursive SIGSEGV's etc, for example,
* that is why we also clear SIGNAL_UNKILLABLE.
*/
-static int
-do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
+int
+force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
{
unsigned long int flags;
int ret, blocked, ignored;
@@ -1331,39 +1256,6 @@ do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
return ret;
}
-int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
-{
-/*
- * On some archs, PREEMPT_RT has to delay sending a signal from a trap
- * since it can not enable preemption, and the signal code's spin_locks
- * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
- * send the signal on exit of the trap.
- */
-#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
- if (in_atomic()) {
- if (WARN_ON_ONCE(t != current))
- return 0;
- if (WARN_ON_ONCE(t->forced_info.si_signo))
- return 0;
-
- if (is_si_special(info)) {
- WARN_ON_ONCE(info != SEND_SIG_PRIV);
- t->forced_info.si_signo = sig;
- t->forced_info.si_errno = 0;
- t->forced_info.si_code = SI_KERNEL;
- t->forced_info.si_pid = 0;
- t->forced_info.si_uid = 0;
- } else {
- t->forced_info = *info;
- }
-
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
- return 0;
- }
-#endif
- return do_force_sig_info(sig, info, t);
-}
-
/*
* Nuke all other threads in the group.
*/
@@ -1394,12 +1286,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
struct sighand_struct *sighand;
for (;;) {
- local_irq_save_nort(*flags);
+ local_irq_save(*flags);
rcu_read_lock();
sighand = rcu_dereference(tsk->sighand);
if (unlikely(sighand == NULL)) {
rcu_read_unlock();
- local_irq_restore_nort(*flags);
+ local_irq_restore(*flags);
break;
}
@@ -1410,7 +1302,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
}
spin_unlock(&sighand->siglock);
rcu_read_unlock();
- local_irq_restore_nort(*flags);
+ local_irq_restore(*flags);
}
return sighand;
@@ -1655,8 +1547,7 @@ EXPORT_SYMBOL(kill_pid);
*/
struct sigqueue *sigqueue_alloc(void)
{
- /* Preallocated sigqueue objects always from the slabcache ! */
- struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
+ struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
if (q)
q->flags |= SIGQUEUE_PREALLOC;
@@ -2017,7 +1908,15 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
if (gstop_done && ptrace_reparented(current))
do_notify_parent_cldstop(current, false, why);
+ /*
+ * Don't want to allow preemption here, because
+ * sys_ptrace() needs this task to be inactive.
+ *
+ * XXX: implement read_unlock_no_resched().
+ */
+ preempt_disable();
read_unlock(&tasklist_lock);
+ preempt_enable_no_resched();
freezable_schedule();
} else {
/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 15ad603..d7d498d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,12 +21,10 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/rcupdate.h>
-#include <linux/delay.h>
#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/smpboot.h>
#include <linux/tick.h>
-#include <linux/locallock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
@@ -64,98 +62,6 @@ char *softirq_to_name[NR_SOFTIRQS] = {
"TASKLET", "SCHED", "HRTIMER", "RCU"
};
-#ifdef CONFIG_NO_HZ_COMMON
-# ifdef CONFIG_PREEMPT_RT_FULL
-
-struct softirq_runner {
- struct task_struct *runner[NR_SOFTIRQS];
-};
-
-static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
-
-static inline void softirq_set_runner(unsigned int sirq)
-{
- struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
-
- sr->runner[sirq] = current;
-}
-
-static inline void softirq_clr_runner(unsigned int sirq)
-{
- struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
-
- sr->runner[sirq] = NULL;
-}
-
-/*
- * On preempt-rt a softirq running context might be blocked on a
- * lock. There might be no other runnable task on this CPU because the
- * lock owner runs on some other CPU. So we have to go into idle with
- * the pending bit set. Therefor we need to check this otherwise we
- * warn about false positives which confuses users and defeats the
- * whole purpose of this test.
- *
- * This code is called with interrupts disabled.
- */
-void softirq_check_pending_idle(void)
-{
- static int rate_limit;
- struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
- u32 warnpending;
- int i;
-
- if (rate_limit >= 10)
- return;
-
- warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
- for (i = 0; i < NR_SOFTIRQS; i++) {
- struct task_struct *tsk = sr->runner[i];
-
- /*
- * The wakeup code in rtmutex.c wakes up the task
- * _before_ it sets pi_blocked_on to NULL under
- * tsk->pi_lock. So we need to check for both: state
- * and pi_blocked_on.
- */
- if (tsk) {
- raw_spin_lock(&tsk->pi_lock);
- if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
- /* Clear all bits pending in that task */
- warnpending &= ~(tsk->softirqs_raised);
- warnpending &= ~(1 << i);
- }
- raw_spin_unlock(&tsk->pi_lock);
- }
- }
-
- if (warnpending) {
- printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
- warnpending);
- rate_limit++;
- }
-}
-# else
-/*
- * On !PREEMPT_RT we just printk rate limited:
- */
-void softirq_check_pending_idle(void)
-{
- static int rate_limit;
-
- if (rate_limit < 10 &&
- (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
- printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
- local_softirq_pending());
- rate_limit++;
- }
-}
-# endif
-
-#else /* !CONFIG_NO_HZ_COMMON */
-static inline void softirq_set_runner(unsigned int sirq) { }
-static inline void softirq_clr_runner(unsigned int sirq) { }
-#endif
-
/*
* we cannot loop indefinitely here to avoid userspace starvation,
* but we also don't want to introduce a worst case 1/HZ latency
@@ -171,57 +77,6 @@ static void wakeup_softirqd(void)
wake_up_process(tsk);
}
-static void handle_softirq(unsigned int vec_nr, int cpu, int need_rcu_bh_qs)
-{
- struct softirq_action *h = softirq_vec + vec_nr;
- unsigned int prev_count = preempt_count();
-
- kstat_incr_softirqs_this_cpu(vec_nr);
- trace_softirq_entry(vec_nr);
- h->action(h);
- trace_softirq_exit(vec_nr);
-
- if (unlikely(prev_count != preempt_count())) {
- pr_err("softirq %u %s %p preempt count leak: %08x -> %08x\n",
- vec_nr, softirq_to_name[vec_nr], h->action,
- prev_count, (unsigned int) preempt_count());
- preempt_count() = prev_count;
- }
- if (need_rcu_bh_qs)
- rcu_bh_qs(cpu);
-}
-
-#ifndef CONFIG_PREEMPT_RT_FULL
-static inline int ksoftirqd_softirq_pending(void)
-{
- return local_softirq_pending();
-}
-
-static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
-{
- unsigned int vec_nr;
-
- local_irq_enable();
- for (vec_nr = 0; pending; vec_nr++, pending >>= 1) {
- if (pending & 1)
- handle_softirq(vec_nr, cpu, need_rcu_bh_qs);
- }
- local_irq_disable();
-}
-
-static void run_ksoftirqd(unsigned int cpu)
-{
- local_irq_disable();
- if (ksoftirqd_softirq_pending()) {
- __do_softirq();
- rcu_note_context_switch(cpu);
- local_irq_enable();
- cond_resched();
- return;
- }
- local_irq_enable();
-}
-
/*
* preempt_count and SOFTIRQ_OFFSET usage:
* - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
@@ -354,51 +209,14 @@ EXPORT_SYMBOL(local_bh_enable_ip);
#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
#define MAX_SOFTIRQ_RESTART 10
-#ifdef CONFIG_TRACE_IRQFLAGS
-/*
- * Convoluted means of passing __do_softirq() a message through the various
- * architecture execute_on_stack() bits.
- *
- * When we run softirqs from irq_exit() and thus on the hardirq stack we need
- * to keep the lockdep irq context tracking as tight as possible in order to
- * not miss-qualify lock contexts and miss possible deadlocks.
- */
-static DEFINE_PER_CPU(int, softirq_from_hardirq);
-
-static inline void lockdep_softirq_from_hardirq(void)
-{
- this_cpu_write(softirq_from_hardirq, 1);
-}
-
-static inline void lockdep_softirq_start(void)
-{
- if (this_cpu_read(softirq_from_hardirq))
- trace_hardirq_exit();
- lockdep_softirq_enter();
-}
-
-static inline void lockdep_softirq_end(void)
-{
- lockdep_softirq_exit();
- if (this_cpu_read(softirq_from_hardirq)) {
- this_cpu_write(softirq_from_hardirq, 0);
- trace_hardirq_enter();
- }
-}
-
-#else
-static inline void lockdep_softirq_from_hardirq(void) { }
-static inline void lockdep_softirq_start(void) { }
-static inline void lockdep_softirq_end(void) { }
-#endif
-
asmlinkage void __do_softirq(void)
{
+ struct softirq_action *h;
+ __u32 pending;
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
+ int cpu;
unsigned long old_flags = current->flags;
int max_restart = MAX_SOFTIRQ_RESTART;
- __u32 pending;
- int cpu;
/*
* Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -411,14 +229,43 @@ asmlinkage void __do_softirq(void)
account_irq_enter_time(current);
__local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET);
- lockdep_softirq_start();
+ lockdep_softirq_enter();
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
- handle_pending_softirqs(pending, cpu, 1);
+ local_irq_enable();
+
+ h = softirq_vec;
+
+ do {
+ if (pending & 1) {
+ unsigned int vec_nr = h - softirq_vec;
+ int prev_count = preempt_count();
+
+ kstat_incr_softirqs_this_cpu(vec_nr);
+
+ trace_softirq_entry(vec_nr);
+ h->action(h);
+ trace_softirq_exit(vec_nr);
+ if (unlikely(prev_count != preempt_count())) {
+ printk(KERN_ERR "huh, entered softirq %u %s %p"
+ "with preempt_count %08x,"
+ " exited with %08x?\n", vec_nr,
+ softirq_to_name[vec_nr], h->action,
+ prev_count, preempt_count());
+ preempt_count() = prev_count;
+ }
+
+ rcu_bh_qs(cpu);
+ }
+ h++;
+ pending >>= 1;
+ } while (pending);
+
+ local_irq_disable();
pending = local_softirq_pending();
if (pending) {
@@ -429,7 +276,8 @@ restart:
wakeup_softirqd();
}
- lockdep_softirq_end();
+ lockdep_softirq_exit();
+
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
tsk_restore_flags(current, old_flags, PF_MEMALLOC);
@@ -458,261 +306,6 @@ asmlinkage void do_softirq(void)
#endif
/*
- * This function must run with irqs disabled!
- */
-void raise_softirq_irqoff(unsigned int nr)
-{
- __raise_softirq_irqoff(nr);
-
- /*
- * If we're in an interrupt or softirq, we're done
- * (this also catches softirq-disabled code). We will
- * actually run the softirq once we return from
- * the irq or softirq.
- *
- * Otherwise we wake up ksoftirqd to make sure we
- * schedule the softirq soon.
- */
- if (!in_interrupt())
- wakeup_softirqd();
-}
-
-void __raise_softirq_irqoff(unsigned int nr)
-{
- trace_softirq_raise(nr);
- or_softirq_pending(1UL << nr);
-}
-
-static inline void local_bh_disable_nort(void) { local_bh_disable(); }
-static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
-static void ksoftirqd_set_sched_params(unsigned int cpu) { }
-static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
-
-#else /* !PREEMPT_RT_FULL */
-
-/*
- * On RT we serialize softirq execution with a cpu local lock per softirq
- */
-static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
-
-void __init softirq_early_init(void)
-{
- int i;
-
- for (i = 0; i < NR_SOFTIRQS; i++)
- local_irq_lock_init(local_softirq_locks[i]);
-}
-
-static void lock_softirq(int which)
-{
- local_lock(local_softirq_locks[which]);
-}
-
-static void unlock_softirq(int which)
-{
- local_unlock(local_softirq_locks[which]);
-}
-
-static void do_single_softirq(int which, int need_rcu_bh_qs)
-{
- unsigned long old_flags = current->flags;
-
- current->flags &= ~PF_MEMALLOC;
- vtime_account_irq_enter(current);
- current->flags |= PF_IN_SOFTIRQ;
- lockdep_softirq_enter();
- local_irq_enable();
- handle_softirq(which, smp_processor_id(), need_rcu_bh_qs);
- local_irq_disable();
- lockdep_softirq_exit();
- current->flags &= ~PF_IN_SOFTIRQ;
- vtime_account_irq_enter(current);
- tsk_restore_flags(current, old_flags, PF_MEMALLOC);
-}
-
-/*
- * Called with interrupts disabled. Process softirqs which were raised
- * in current context (or on behalf of ksoftirqd).
- */
-static void do_current_softirqs(int need_rcu_bh_qs)
-{
- while (current->softirqs_raised) {
- int i = __ffs(current->softirqs_raised);
- unsigned int pending, mask = (1U << i);
-
- current->softirqs_raised &= ~mask;
- local_irq_enable();
-
- /*
- * If the lock is contended, we boost the owner to
- * process the softirq or leave the critical section
- * now.
- */
- lock_softirq(i);
- local_irq_disable();
- softirq_set_runner(i);
- /*
- * Check with the local_softirq_pending() bits,
- * whether we need to process this still or if someone
- * else took care of it.
- */
- pending = local_softirq_pending();
- if (pending & mask) {
- set_softirq_pending(pending & ~mask);
- do_single_softirq(i, need_rcu_bh_qs);
- }
- softirq_clr_runner(i);
- unlock_softirq(i);
- WARN_ON(current->softirq_nestcnt != 1);
- }
-}
-
-void local_bh_disable(void)
-{
- if (++current->softirq_nestcnt == 1)
- migrate_disable();
-}
-EXPORT_SYMBOL(local_bh_disable);
-
-void local_bh_enable(void)
-{
- if (WARN_ON(current->softirq_nestcnt == 0))
- return;
-
- local_irq_disable();
- if (current->softirq_nestcnt == 1 && current->softirqs_raised)
- do_current_softirqs(1);
- local_irq_enable();
-
- if (--current->softirq_nestcnt == 0)
- migrate_enable();
-}
-EXPORT_SYMBOL(local_bh_enable);
-
-void local_bh_enable_ip(unsigned long ip)
-{
- local_bh_enable();
-}
-EXPORT_SYMBOL(local_bh_enable_ip);
-
-void _local_bh_enable(void)
-{
- if (WARN_ON(current->softirq_nestcnt == 0))
- return;
- if (--current->softirq_nestcnt == 0)
- migrate_enable();
-}
-EXPORT_SYMBOL(_local_bh_enable);
-
-int in_serving_softirq(void)
-{
- return current->flags & PF_IN_SOFTIRQ;
-}
-EXPORT_SYMBOL(in_serving_softirq);
-
-/* Called with preemption disabled */
-static void run_ksoftirqd(unsigned int cpu)
-{
- local_irq_disable();
- current->softirq_nestcnt++;
- do_current_softirqs(1);
- current->softirq_nestcnt--;
- rcu_note_context_switch(cpu);
- local_irq_enable();
-}
-
-/*
- * Called from netif_rx_ni(). Preemption enabled, but migration
- * disabled. So the cpu can't go away under us.
- */
-void thread_do_softirq(void)
-{
- if (!in_serving_softirq() && current->softirqs_raised) {
- current->softirq_nestcnt++;
- do_current_softirqs(0);
- current->softirq_nestcnt--;
- }
-}
-
-static void do_raise_softirq_irqoff(unsigned int nr)
-{
- trace_softirq_raise(nr);
- or_softirq_pending(1UL << nr);
-
- /*
- * If we are not in a hard interrupt and inside a bh disabled
- * region, we simply raise the flag on current. local_bh_enable()
- * will make sure that the softirq is executed. Otherwise we
- * delegate it to ksoftirqd.
- */
- if (!in_irq() && current->softirq_nestcnt)
- current->softirqs_raised |= (1U << nr);
- else if (__this_cpu_read(ksoftirqd))
- __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
-}
-
-void __raise_softirq_irqoff(unsigned int nr)
-{
- do_raise_softirq_irqoff(nr);
- if (!in_irq() && !current->softirq_nestcnt)
- wakeup_softirqd();
-}
-
-/*
- * This function must run with irqs disabled!
- */
-void raise_softirq_irqoff(unsigned int nr)
-{
- do_raise_softirq_irqoff(nr);
-
- /*
- * If we're in an hard interrupt we let irq return code deal
- * with the wakeup of ksoftirqd.
- */
- if (in_irq())
- return;
-
- /*
- * If we are in thread context but outside of a bh disabled
- * region, we need to wake ksoftirqd as well.
- *
- * CHECKME: Some of the places which do that could be wrapped
- * into local_bh_disable/enable pairs. Though it's unclear
- * whether this is worth the effort. To find those places just
- * raise a WARN() if the condition is met.
- */
- if (!current->softirq_nestcnt)
- wakeup_softirqd();
-}
-
-static inline int ksoftirqd_softirq_pending(void)
-{
- return current->softirqs_raised;
-}
-
-static inline void local_bh_disable_nort(void) { }
-static inline void _local_bh_enable_nort(void) { }
-
-static inline void ksoftirqd_set_sched_params(unsigned int cpu)
-{
- struct sched_param param = { .sched_priority = 1 };
-
- sched_setscheduler(current, SCHED_FIFO, &param);
- /* Take over all pending softirqs when starting */
- local_irq_disable();
- current->softirqs_raised = local_softirq_pending();
- local_irq_enable();
-}
-
-static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
-{
- struct sched_param param = { .sched_priority = 0 };
-
- sched_setscheduler(current, SCHED_NORMAL, &param);
-}
-
-#endif /* PREEMPT_RT_FULL */
-/*
* Enter an interrupt context.
*/
void irq_enter(void)
@@ -725,9 +318,9 @@ void irq_enter(void)
* Prevent raise_softirq from needlessly waking up ksoftirqd
* here, as softirq will be serviced on return from interrupt.
*/
- local_bh_disable_nort();
+ local_bh_disable();
tick_check_idle(cpu);
- _local_bh_enable_nort();
+ _local_bh_enable();
}
__irq_enter();
@@ -735,9 +328,7 @@ void irq_enter(void)
static inline void invoke_softirq(void)
{
-#ifndef CONFIG_PREEMPT_RT_FULL
if (!force_irqthreads) {
- lockdep_softirq_from_hardirq();
/*
* We can safely execute softirq on the current stack if
* it is the irq stack, because it should be near empty
@@ -750,15 +341,6 @@ static inline void invoke_softirq(void)
} else {
wakeup_softirqd();
}
-#else /* PREEMPT_RT_FULL */
- unsigned long flags;
-
- local_irq_save(flags);
- if (__this_cpu_read(ksoftirqd) &&
- __this_cpu_read(ksoftirqd)->softirqs_raised)
- wakeup_softirqd();
- local_irq_restore(flags);
-#endif
}
static inline void tick_irq_exit(void)
@@ -786,13 +368,33 @@ void irq_exit(void)
#endif
account_irq_exit_time(current);
+ trace_hardirq_exit();
sub_preempt_count(HARDIRQ_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
tick_irq_exit();
rcu_irq_exit();
- trace_hardirq_exit(); /* must be last! */
+}
+
+/*
+ * This function must run with irqs disabled!
+ */
+inline void raise_softirq_irqoff(unsigned int nr)
+{
+ __raise_softirq_irqoff(nr);
+
+ /*
+ * If we're in an interrupt or softirq, we're done
+ * (this also catches softirq-disabled code). We will
+ * actually run the softirq once we return from
+ * the irq or softirq.
+ *
+ * Otherwise we wake up ksoftirqd to make sure we
+ * schedule the softirq soon.
+ */
+ if (!in_interrupt())
+ wakeup_softirqd();
}
void raise_softirq(unsigned int nr)
@@ -804,6 +406,12 @@ void raise_softirq(unsigned int nr)
local_irq_restore(flags);
}
+void __raise_softirq_irqoff(unsigned int nr)
+{
+ trace_softirq_raise(nr);
+ or_softirq_pending(1UL << nr);
+}
+
void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec[nr].action = action;
@@ -821,45 +429,15 @@ struct tasklet_head
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
-static void inline
-__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
-{
- if (tasklet_trylock(t)) {
-again:
- /* We may have been preempted before tasklet_trylock
- * and __tasklet_action may have already run.
- * So double check the sched bit while the takslet
- * is locked before adding it to the list.
- */
- if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
- t->next = NULL;
- *head->tail = t;
- head->tail = &(t->next);
- raise_softirq_irqoff(nr);
- tasklet_unlock(t);
- } else {
- /* This is subtle. If we hit the corner case above
- * It is possible that we get preempted right here,
- * and another task has successfully called
- * tasklet_schedule(), then this function, and
- * failed on the trylock. Thus we must be sure
- * before releasing the tasklet lock, that the
- * SCHED_BIT is clear. Otherwise the tasklet
- * may get its SCHED_BIT set, but not added to the
- * list
- */
- if (!tasklet_tryunlock(t))
- goto again;
- }
- }
-}
-
void __tasklet_schedule(struct tasklet_struct *t)
{
unsigned long flags;
local_irq_save(flags);
- __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
+ t->next = NULL;
+ *__this_cpu_read(tasklet_vec.tail) = t;
+ __this_cpu_write(tasklet_vec.tail, &(t->next));
+ raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_restore(flags);
}
@@ -870,7 +448,10 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
unsigned long flags;
local_irq_save(flags);
- __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
+ t->next = NULL;
+ *__this_cpu_read(tasklet_hi_vec.tail) = t;
+ __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
+ raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_restore(flags);
}
@@ -878,117 +459,48 @@ EXPORT_SYMBOL(__tasklet_hi_schedule);
void __tasklet_hi_schedule_first(struct tasklet_struct *t)
{
- __tasklet_hi_schedule(t);
-}
+ BUG_ON(!irqs_disabled());
-EXPORT_SYMBOL(__tasklet_hi_schedule_first);
-
-void tasklet_enable(struct tasklet_struct *t)
-{
- if (!atomic_dec_and_test(&t->count))
- return;
- if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
- tasklet_schedule(t);
+ t->next = __this_cpu_read(tasklet_hi_vec.head);
+ __this_cpu_write(tasklet_hi_vec.head, t);
+ __raise_softirq_irqoff(HI_SOFTIRQ);
}
-EXPORT_SYMBOL(tasklet_enable);
+EXPORT_SYMBOL(__tasklet_hi_schedule_first);
-void tasklet_hi_enable(struct tasklet_struct *t)
+static void tasklet_action(struct softirq_action *a)
{
- if (!atomic_dec_and_test(&t->count))
- return;
- if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
- tasklet_hi_schedule(t);
-}
-
-EXPORT_SYMBOL(tasklet_hi_enable);
+ struct tasklet_struct *list;
-static void
-__tasklet_action(struct softirq_action *a, struct tasklet_struct *list)
-{
- int loops = 1000000;
+ local_irq_disable();
+ list = __this_cpu_read(tasklet_vec.head);
+ __this_cpu_write(tasklet_vec.head, NULL);
+ __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
+ local_irq_enable();
while (list) {
struct tasklet_struct *t = list;
list = list->next;
- /*
- * Should always succeed - after a tasklist got on the
- * list (after getting the SCHED bit set from 0 to 1),
- * nothing but the tasklet softirq it got queued to can
- * lock it:
- */
- if (!tasklet_trylock(t)) {
- WARN_ON(1);
- continue;
- }
-
- t->next = NULL;
-
- /*
- * If we cannot handle the tasklet because it's disabled,
- * mark it as pending. tasklet_enable() will later
- * re-schedule the tasklet.
- */
- if (unlikely(atomic_read(&t->count))) {
-out_disabled:
- /* implicit unlock: */
- wmb();
- t->state = TASKLET_STATEF_PENDING;
- continue;
- }
-
- /*
- * After this point on the tasklet might be rescheduled
- * on another CPU, but it can only be added to another
- * CPU's tasklet list if we unlock the tasklet (which we
- * dont do yet).
- */
- if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
- WARN_ON(1);
-
-again:
- t->func(t->data);
-
- /*
- * Try to unlock the tasklet. We must use cmpxchg, because
- * another CPU might have scheduled or disabled the tasklet.
- * We only allow the STATE_RUN -> 0 transition here.
- */
- while (!tasklet_tryunlock(t)) {
- /*
- * If it got disabled meanwhile, bail out:
- */
- if (atomic_read(&t->count))
- goto out_disabled;
- /*
- * If it got scheduled meanwhile, re-execute
- * the tasklet function:
- */
- if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
- goto again;
- if (!--loops) {
- printk("hm, tasklet state: %08lx\n", t->state);
- WARN_ON(1);
+ if (tasklet_trylock(t)) {
+ if (!atomic_read(&t->count)) {
+ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+ BUG();
+ t->func(t->data);
tasklet_unlock(t);
- break;
+ continue;
}
+ tasklet_unlock(t);
}
- }
-}
-
-static void tasklet_action(struct softirq_action *a)
-{
- struct tasklet_struct *list;
- local_irq_disable();
- list = __get_cpu_var(tasklet_vec).head;
- __get_cpu_var(tasklet_vec).head = NULL;
- __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
- local_irq_enable();
-
- __tasklet_action(a, list);
+ local_irq_disable();
+ t->next = NULL;
+ *__this_cpu_read(tasklet_vec.tail) = t;
+ __this_cpu_write(tasklet_vec.tail, &(t->next));
+ __raise_softirq_irqoff(TASKLET_SOFTIRQ);
+ local_irq_enable();
+ }
}
static void tasklet_hi_action(struct softirq_action *a)
@@ -1001,7 +513,29 @@ static void tasklet_hi_action(struct softirq_action *a)
__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
local_irq_enable();
- __tasklet_action(a, list);
+ while (list) {
+ struct tasklet_struct *t = list;
+
+ list = list->next;
+
+ if (tasklet_trylock(t)) {
+ if (!atomic_read(&t->count)) {
+ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+ BUG();
+ t->func(t->data);
+ tasklet_unlock(t);
+ continue;
+ }
+ tasklet_unlock(t);
+ }
+
+ local_irq_disable();
+ t->next = NULL;
+ *__this_cpu_read(tasklet_hi_vec.tail) = t;
+ __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
+ __raise_softirq_irqoff(HI_SOFTIRQ);
+ local_irq_enable();
+ }
}
@@ -1024,7 +558,7 @@ void tasklet_kill(struct tasklet_struct *t)
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
do {
- msleep(1);
+ yield();
} while (test_bit(TASKLET_STATE_SCHED, &t->state));
}
tasklet_unlock_wait(t);
@@ -1228,26 +762,22 @@ void __init softirq_init(void)
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
-void tasklet_unlock_wait(struct tasklet_struct *t)
+static int ksoftirqd_should_run(unsigned int cpu)
{
- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
- /*
- * Hack for now to avoid this busy-loop:
- */
-#ifdef CONFIG_PREEMPT_RT_FULL
- msleep(1);
-#else
- barrier();
-#endif
- }
+ return local_softirq_pending();
}
-EXPORT_SYMBOL(tasklet_unlock_wait);
-#endif
-static int ksoftirqd_should_run(unsigned int cpu)
+static void run_ksoftirqd(unsigned int cpu)
{
- return ksoftirqd_softirq_pending();
+ local_irq_disable();
+ if (local_softirq_pending()) {
+ __do_softirq();
+ rcu_note_context_switch(cpu);
+ local_irq_enable();
+ cond_resched();
+ return;
+ }
+ local_irq_enable();
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1330,8 +860,6 @@ static struct notifier_block cpu_nfb = {
static struct smp_hotplug_thread softirq_threads = {
.store = &ksoftirqd,
- .setup = ksoftirqd_set_sched_params,
- .cleanup = ksoftirqd_clr_sched_params,
.thread_should_run = ksoftirqd_should_run,
.thread_fn = run_ksoftirqd,
.thread_comm = "ksoftirqd/%u",
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 5c76166..4b082b5 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -124,11 +124,8 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
* __[spin|read|write]_lock_bh()
*/
BUILD_LOCK_OPS(spin, raw_spinlock);
-
-#ifndef CONFIG_PREEMPT_RT_FULL
BUILD_LOCK_OPS(read, rwlock);
BUILD_LOCK_OPS(write, rwlock);
-#endif
#endif
@@ -212,8 +209,6 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
EXPORT_SYMBOL(_raw_spin_unlock_bh);
#endif
-#ifndef CONFIG_PREEMPT_RT_FULL
-
#ifndef CONFIG_INLINE_READ_TRYLOCK
int __lockfunc _raw_read_trylock(rwlock_t *lock)
{
@@ -358,8 +353,6 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
EXPORT_SYMBOL(_raw_write_unlock_bh);
#endif
-#endif /* !PREEMPT_RT_FULL */
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 5f02a3f..c09f295 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -29,12 +29,12 @@ struct cpu_stop_done {
atomic_t nr_todo; /* nr left to execute */
bool executed; /* actually executed? */
int ret; /* collected return value */
- struct task_struct *waiter; /* woken when nr_todo reaches 0 */
+ struct completion completion; /* fired if nr_todo reaches 0 */
};
/* the actual stopper, one per every possible cpu, enabled on online cpus */
struct cpu_stopper {
- raw_spinlock_t lock;
+ spinlock_t lock;
bool enabled; /* is this stopper enabled? */
struct list_head works; /* list of pending works */
};
@@ -47,7 +47,7 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
{
memset(done, 0, sizeof(*done));
atomic_set(&done->nr_todo, nr_todo);
- done->waiter = current;
+ init_completion(&done->completion);
}
/* signal completion unless @done is NULL */
@@ -56,10 +56,8 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
if (done) {
if (executed)
done->executed = true;
- if (atomic_dec_and_test(&done->nr_todo)) {
- wake_up_process(done->waiter);
- done->waiter = NULL;
- }
+ if (atomic_dec_and_test(&done->nr_todo))
+ complete(&done->completion);
}
}
@@ -71,7 +69,7 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
unsigned long flags;
- raw_spin_lock_irqsave(&stopper->lock, flags);
+ spin_lock_irqsave(&stopper->lock, flags);
if (stopper->enabled) {
list_add_tail(&work->list, &stopper->works);
@@ -79,23 +77,7 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
} else
cpu_stop_signal_done(work->done, false);
- raw_spin_unlock_irqrestore(&stopper->lock, flags);
-}
-
-static void wait_for_stop_done(struct cpu_stop_done *done)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (atomic_read(&done->nr_todo)) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
- /*
- * We need to wait until cpu_stop_signal_done() has cleared
- * done->waiter.
- */
- while (done->waiter)
- cpu_relax();
- set_current_state(TASK_RUNNING);
+ spin_unlock_irqrestore(&stopper->lock, flags);
}
/**
@@ -129,7 +111,7 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
cpu_stop_init_done(&done, 1);
cpu_stop_queue_work(cpu, &work);
- wait_for_stop_done(&done);
+ wait_for_completion(&done.completion);
return done.executed ? done.ret : -ENOENT;
}
@@ -155,12 +137,11 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
/* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex);
-static DEFINE_MUTEX(stopper_lock);
static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
static void queue_stop_cpus_work(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg,
- struct cpu_stop_done *done, bool inactive)
+ struct cpu_stop_done *done)
{
struct cpu_stop_work *work;
unsigned int cpu;
@@ -174,18 +155,14 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
}
/*
- * Make sure that all work is queued on all cpus before we
- * any of the cpus can execute it.
+ * Disable preemption while queueing to avoid getting
+ * preempted by a stopper which might wait for other stoppers
+ * to enter @fn which can lead to deadlock.
*/
- if (!inactive) {
- mutex_lock(&stopper_lock);
- } else {
- while (!mutex_trylock(&stopper_lock))
- cpu_relax();
- }
+ preempt_disable();
for_each_cpu(cpu, cpumask)
cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
- mutex_unlock(&stopper_lock);
+ preempt_enable();
}
static int __stop_cpus(const struct cpumask *cpumask,
@@ -194,8 +171,8 @@ static int __stop_cpus(const struct cpumask *cpumask,
struct cpu_stop_done done;
cpu_stop_init_done(&done, cpumask_weight(cpumask));
- queue_stop_cpus_work(cpumask, fn, arg, &done, false);
- wait_for_stop_done(&done);
+ queue_stop_cpus_work(cpumask, fn, arg, &done);
+ wait_for_completion(&done.completion);
return done.executed ? done.ret : -ENOENT;
}
@@ -274,9 +251,9 @@ static int cpu_stop_should_run(unsigned int cpu)
unsigned long flags;
int run;
- raw_spin_lock_irqsave(&stopper->lock, flags);
+ spin_lock_irqsave(&stopper->lock, flags);
run = !list_empty(&stopper->works);
- raw_spin_unlock_irqrestore(&stopper->lock, flags);
+ spin_unlock_irqrestore(&stopper->lock, flags);
return run;
}
@@ -288,13 +265,13 @@ static void cpu_stopper_thread(unsigned int cpu)
repeat:
work = NULL;
- raw_spin_lock_irq(&stopper->lock);
+ spin_lock_irq(&stopper->lock);
if (!list_empty(&stopper->works)) {
work = list_first_entry(&stopper->works,
struct cpu_stop_work, list);
list_del_init(&work->list);
}
- raw_spin_unlock_irq(&stopper->lock);
+ spin_unlock_irq(&stopper->lock);
if (work) {
cpu_stop_fn_t fn = work->fn;
@@ -302,16 +279,6 @@ repeat:
struct cpu_stop_done *done = work->done;
char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
- /*
- * Wait until the stopper finished scheduling on all
- * cpus
- */
- mutex_lock(&stopper_lock);
- /*
- * Let other cpu threads continue as well
- */
- mutex_unlock(&stopper_lock);
-
/* cpu stop callbacks are not allowed to sleep */
preempt_disable();
@@ -326,13 +293,7 @@ repeat:
kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
ksym_buf), arg);
- /*
- * Make sure that the wakeup and setting done->waiter
- * to NULL is atomic.
- */
- local_irq_disable();
cpu_stop_signal_done(done, true);
- local_irq_enable();
goto repeat;
}
}
@@ -351,20 +312,20 @@ static void cpu_stop_park(unsigned int cpu)
unsigned long flags;
/* drain remaining works */
- raw_spin_lock_irqsave(&stopper->lock, flags);
+ spin_lock_irqsave(&stopper->lock, flags);
list_for_each_entry(work, &stopper->works, list)
cpu_stop_signal_done(work->done, false);
stopper->enabled = false;
- raw_spin_unlock_irqrestore(&stopper->lock, flags);
+ spin_unlock_irqrestore(&stopper->lock, flags);
}
static void cpu_stop_unpark(unsigned int cpu)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
- raw_spin_lock_irq(&stopper->lock);
+ spin_lock_irq(&stopper->lock);
stopper->enabled = true;
- raw_spin_unlock_irq(&stopper->lock);
+ spin_unlock_irq(&stopper->lock);
}
static struct smp_hotplug_thread cpu_stop_threads = {
@@ -386,7 +347,7 @@ static int __init cpu_stop_init(void)
for_each_possible_cpu(cpu) {
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
- raw_spin_lock_init(&stopper->lock);
+ spin_lock_init(&stopper->lock);
INIT_LIST_HEAD(&stopper->works);
}
@@ -569,11 +530,11 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
set_state(&smdata, STOPMACHINE_PREPARE);
cpu_stop_init_done(&done, num_active_cpus());
queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
- &done, true);
+ &done);
ret = stop_machine_cpu_stop(&smdata);
/* Busy wait for completion. */
- while (atomic_read(&done.nr_todo))
+ while (!completion_done(&done.completion))
cpu_relax();
mutex_unlock(&stop_cpus_mutex);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 23d7203..a6a5bf5 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -73,8 +73,7 @@ static struct clocksource clocksource_jiffies = {
.shift = JIFFIES_SHIFT,
};
-__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
-__cacheline_aligned_in_smp seqcount_t jiffies_seq;
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
@@ -83,9 +82,9 @@ u64 get_jiffies_64(void)
u64 ret;
do {
- seq = read_seqcount_begin(&jiffies_seq);
+ seq = read_seqbegin(&jiffies_lock);
ret = jiffies_64;
- } while (read_seqcount_retry(&jiffies_seq, seq));
+ } while (read_seqretry(&jiffies_lock, seq));
return ret;
}
EXPORT_SYMBOL(get_jiffies_64);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index d6132cd..af8d1d4 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -10,7 +10,6 @@
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
#include <linux/jiffies.h>
-#include <linux/kthread.h>
#include <linux/math64.h>
#include <linux/timex.h>
#include <linux/time.h>
@@ -518,49 +517,10 @@ static void sync_cmos_clock(struct work_struct *work)
schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * RT can not call schedule_delayed_work from real interrupt context.
- * Need to make a thread to do the real work.
- */
-static struct task_struct *cmos_delay_thread;
-static bool do_cmos_delay;
-
-static int run_cmos_delay(void *ignore)
-{
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (do_cmos_delay) {
- do_cmos_delay = false;
- schedule_delayed_work(&sync_cmos_work, 0);
- }
- schedule();
- }
- __set_current_state(TASK_RUNNING);
- return 0;
-}
-
-void ntp_notify_cmos_timer(void)
-{
- do_cmos_delay = true;
- /* Make visible before waking up process */
- smp_wmb();
- wake_up_process(cmos_delay_thread);
-}
-
-static __init int create_cmos_delay_thread(void)
-{
- cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
- BUG_ON(!cmos_delay_thread);
- return 0;
-}
-early_initcall(create_cmos_delay_thread);
-#else
void ntp_notify_cmos_timer(void)
{
schedule_delayed_work(&sync_cmos_work, 0);
}
-#endif /* CONFIG_PREEMPT_RT_FULL */
#else
void ntp_notify_cmos_timer(void) { }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 1b80eb0..64522ec 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,15 +63,13 @@ int tick_is_oneshot_available(void)
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
- raw_spin_lock(&jiffies_lock);
- write_seqcount_begin(&jiffies_seq);
+ write_seqlock(&jiffies_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1);
- write_seqcount_end(&jiffies_seq);
- raw_spin_unlock(&jiffies_lock);
+ write_sequnlock(&jiffies_lock);
}
update_process_times(user_mode(get_irq_regs()));
@@ -132,9 +130,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
ktime_t next;
do {
- seq = read_seqcount_begin(&jiffies_seq);
+ seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
- } while (read_seqcount_retry(&jiffies_seq, seq));
+ } while (read_seqretry(&jiffies_lock, seq));
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7e5e7f8..bc906ca 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -4,8 +4,7 @@
#include <linux/hrtimer.h>
#include <linux/tick.h>
-extern raw_spinlock_t jiffies_lock;
-extern seqcount_t jiffies_seq;
+extern seqlock_t jiffies_lock;
#define CS_NAME_LEN 32
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3740f28..ea20f7d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -62,8 +62,7 @@ static void tick_do_update_jiffies64(ktime_t now)
return;
/* Reevalute with jiffies_lock held */
- raw_spin_lock(&jiffies_lock);
- write_seqcount_begin(&jiffies_seq);
+ write_seqlock(&jiffies_lock);
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 >= tick_period.tv64) {
@@ -86,8 +85,7 @@ static void tick_do_update_jiffies64(ktime_t now)
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
}
- write_seqcount_end(&jiffies_seq);
- raw_spin_unlock(&jiffies_lock);
+ write_sequnlock(&jiffies_lock);
}
/*
@@ -97,14 +95,12 @@ static ktime_t tick_init_jiffy_update(void)
{
ktime_t period;
- raw_spin_lock(&jiffies_lock);
- write_seqcount_begin(&jiffies_seq);
+ write_seqlock(&jiffies_lock);
/* Did we start the jiffies update yet ? */
if (last_jiffies_update.tv64 == 0)
last_jiffies_update = tick_next_period;
period = last_jiffies_update;
- write_seqcount_end(&jiffies_seq);
- raw_spin_unlock(&jiffies_lock);
+ write_sequnlock(&jiffies_lock);
return period;
}
@@ -221,7 +217,6 @@ static void nohz_full_kick_work_func(struct irq_work *work)
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_work_func,
- .flags = IRQ_WORK_HARD_IRQ,
};
/*
@@ -543,11 +538,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
/* Read jiffies and the time when jiffies were updated last */
do {
- seq = read_seqcount_begin(&jiffies_seq);
+ seq = read_seqbegin(&jiffies_lock);
last_update = last_jiffies_update;
last_jiffies = jiffies;
time_delta = timekeeping_max_deferment();
- } while (read_seqcount_retry(&jiffies_seq, seq));
+ } while (read_seqretry(&jiffies_lock, seq));
if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
@@ -725,7 +720,14 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
- softirq_check_pending_idle();
+ static int ratelimit;
+
+ if (ratelimit < 10 &&
+ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
+ pr_warn("NOHZ: local_softirq_pending %02x\n",
+ (unsigned int) local_softirq_pending());
+ ratelimit++;
+ }
return false;
}
@@ -1110,7 +1112,6 @@ void tick_setup_sched_timer(void)
* Emulate tick processing via per-CPU hrtimers:
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- ts->sched_timer.irqsafe = 1;
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per cpu) */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d3150a7..bfca770 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1754,9 +1754,7 @@ EXPORT_SYMBOL(hardpps);
*/
void xtime_update(unsigned long ticks)
{
- raw_spin_lock(&jiffies_lock);
- write_seqcount_begin(&jiffies_seq);
+ write_seqlock(&jiffies_lock);
do_timer(ticks);
- write_seqcount_end(&jiffies_seq);
- raw_spin_unlock(&jiffies_lock);
+ write_sequnlock(&jiffies_lock);
}
diff --git a/kernel/timer.c b/kernel/timer.c
index cc34e42..4296d13 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -78,9 +78,6 @@ struct tvec_root {
struct tvec_base {
spinlock_t lock;
struct timer_list *running_timer;
-#ifdef CONFIG_PREEMPT_RT_FULL
- wait_queue_head_t wait_for_running_timer;
-#endif
unsigned long timer_jiffies;
unsigned long next_timer;
unsigned long active_timers;
@@ -723,36 +720,6 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
}
}
-#ifndef CONFIG_PREEMPT_RT_FULL
-static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
- struct tvec_base *old,
- struct tvec_base *new)
-{
- /* See the comment in lock_timer_base() */
- timer_set_base(timer, NULL);
- spin_unlock(&old->lock);
- spin_lock(&new->lock);
- timer_set_base(timer, new);
- return new;
-}
-#else
-static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
- struct tvec_base *old,
- struct tvec_base *new)
-{
- /*
- * We cannot do the above because we might be preempted and
- * then the preempter would see NULL and loop forever.
- */
- if (spin_trylock(&new->lock)) {
- timer_set_base(timer, new);
- spin_unlock(&old->lock);
- return new;
- }
- return old;
-}
-#endif
-
static inline int
__mod_timer(struct timer_list *timer, unsigned long expires,
bool pending_only, int pinned)
@@ -772,15 +739,12 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- preempt_disable_rt();
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
cpu = get_nohz_timer_target();
#endif
- preempt_enable_rt();
-
new_base = per_cpu(tvec_bases, cpu);
if (base != new_base) {
@@ -791,8 +755,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
* handler yet has not finished. This also guarantees that
* the timer is serialized wrt itself.
*/
- if (likely(base->running_timer != timer))
- base = switch_timer_base(timer, base, new_base);
+ if (likely(base->running_timer != timer)) {
+ /* See the comment in lock_timer_base() */
+ timer_set_base(timer, NULL);
+ spin_unlock(&base->lock);
+ base = new_base;
+ spin_lock(&base->lock);
+ timer_set_base(timer, base);
+ }
}
timer->expires = expires;
@@ -975,29 +945,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
}
EXPORT_SYMBOL_GPL(add_timer_on);
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * Wait for a running timer
- */
-static void wait_for_running_timer(struct timer_list *timer)
-{
- struct tvec_base *base = timer->base;
-
- if (base->running_timer == timer)
- wait_event(base->wait_for_running_timer,
- base->running_timer != timer);
-}
-
-# define wakeup_timer_waiters(b) wake_up(&(b)->wait_for_running_timer)
-#else
-static inline void wait_for_running_timer(struct timer_list *timer)
-{
- cpu_relax();
-}
-
-# define wakeup_timer_waiters(b) do { } while (0)
-#endif
-
/**
* del_timer - deactive a timer.
* @timer: the timer to be deactivated
@@ -1055,7 +1002,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
}
EXPORT_SYMBOL(try_to_del_timer_sync);
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+#ifdef CONFIG_SMP
/**
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -1115,7 +1062,7 @@ int del_timer_sync(struct timer_list *timer)
int ret = try_to_del_timer_sync(timer);
if (ret >= 0)
return ret;
- wait_for_running_timer(timer);
+ cpu_relax();
}
}
EXPORT_SYMBOL(del_timer_sync);
@@ -1232,17 +1179,15 @@ static inline void __run_timers(struct tvec_base *base)
if (irqsafe) {
spin_unlock(&base->lock);
call_timer_fn(timer, fn, data);
- base->running_timer = NULL;
spin_lock(&base->lock);
} else {
spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn, data);
- base->running_timer = NULL;
spin_lock_irq(&base->lock);
}
}
}
- wakeup_timer_waiters(base);
+ base->running_timer = NULL;
spin_unlock_irq(&base->lock);
}
@@ -1382,31 +1327,17 @@ unsigned long get_next_timer_interrupt(unsigned long now)
if (cpu_is_offline(smp_processor_id()))
return expires;
-#ifdef CONFIG_PREEMPT_RT_FULL
- /*
- * On PREEMPT_RT we cannot sleep here. If the trylock does not
- * succeed then we return the worst-case 'expires in 1 tick'
- * value. We use the rt functions here directly to avoid a
- * migrate_disable() call.
- */
- if (!spin_do_trylock(&base->lock))
- return now + 1;
-#else
spin_lock(&base->lock);
-#endif
if (base->active_timers) {
if (time_before_eq(base->next_timer, base->timer_jiffies))
base->next_timer = __next_timer_interrupt(base);
expires = base->next_timer;
}
-#ifdef CONFIG_PREEMPT_RT_FULL
- rt_spin_unlock_after_trylock_in_irq(&base->lock);
-#else
spin_unlock(&base->lock);
-#endif
if (time_before_eq(expires, now))
return now;
+
return cmp_next_hrtimer_event(now, expires);
}
#endif
@@ -1422,13 +1353,13 @@ void update_process_times(int user_tick)
/* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
- scheduler_tick();
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
-#if defined(CONFIG_IRQ_WORK)
+#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_run();
#endif
+ scheduler_tick();
run_posix_cpu_timers(p);
}
@@ -1439,9 +1370,7 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __this_cpu_read(tvec_bases);
-#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
- irq_work_run();
-#endif
+ hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
@@ -1452,39 +1381,8 @@ static void run_timer_softirq(struct softirq_action *h)
*/
void run_local_timers(void)
{
- struct tvec_base *base = __this_cpu_read(tvec_bases);
-
hrtimer_run_queues();
- /*
- * We can access this lockless as we are in the timer
- * interrupt. If there are no timers queued, nothing to do in
- * the timer softirq.
- */
-#ifdef CONFIG_PREEMPT_RT_FULL
- /* On RT, irq work runs from softirq */
- if (irq_work_needs_cpu()) {
- raise_softirq(TIMER_SOFTIRQ);
- return;
- }
-
- if (!spin_do_trylock(&base->lock)) {
- raise_softirq(TIMER_SOFTIRQ);
- return;
- }
-#endif
-
- if (!base->active_timers)
- goto out;
-
- /* Check whether the next pending timer has expired */
- if (time_before_eq(base->next_timer, jiffies))
- raise_softirq(TIMER_SOFTIRQ);
-out:
-#ifdef CONFIG_PREEMPT_RT_FULL
- rt_spin_unlock_after_trylock_in_irq(&base->lock);
-#endif
- /* The ; ensures that gcc won't complain in the !RT case */
- ;
+ raise_softirq(TIMER_SOFTIRQ);
}
#ifdef __ARCH_WANT_SYS_ALARM
@@ -1649,9 +1547,6 @@ static int init_timers_cpu(int cpu)
base = per_cpu(tvec_bases, cpu);
}
-#ifdef CONFIG_PREEMPT_RT_FULL
- init_waitqueue_head(&base->wait_for_running_timer);
-#endif
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1690,7 +1585,7 @@ static void migrate_timers(int cpu)
BUG_ON(cpu_online(cpu));
old_base = per_cpu(tvec_bases, cpu);
- new_base = get_local_var(tvec_bases);
+ new_base = get_cpu_var(tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
@@ -1711,7 +1606,7 @@ static void migrate_timers(int cpu)
spin_unlock(&old_base->lock);
spin_unlock_irq(&new_base->lock);
- put_local_var(tvec_bases);
+ put_cpu_var(tvec_bases);
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index bbe95b9..015f85a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -192,24 +192,6 @@ config IRQSOFF_TRACER
enabled. This option and the preempt-off timing option can be
used together or separately.)
-config INTERRUPT_OFF_HIST
- bool "Interrupts-off Latency Histogram"
- depends on IRQSOFF_TRACER
- help
- This option generates continuously updated histograms (one per cpu)
- of the duration of time periods with interrupts disabled. The
- histograms are disabled by default. To enable them, write a non-zero
- number to
-
- /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
-
- If PREEMPT_OFF_HIST is also selected, additional histograms (one
- per cpu) are generated that accumulate the duration of time periods
- when both interrupts and preemption are disabled. The histogram data
- will be located in the debug file system at
-
- /sys/kernel/debug/tracing/latency_hist/irqsoff
-
config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
@@ -234,24 +216,6 @@ config PREEMPT_TRACER
enabled. This option and the irqs-off timing option can be
used together or separately.)
-config PREEMPT_OFF_HIST
- bool "Preemption-off Latency Histogram"
- depends on PREEMPT_TRACER
- help
- This option generates continuously updated histograms (one per cpu)
- of the duration of time periods with preemption disabled. The
- histograms are disabled by default. To enable them, write a non-zero
- number to
-
- /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
-
- If INTERRUPT_OFF_HIST is also selected, additional histograms (one
- per cpu) are generated that accumulate the duration of time periods
- when both interrupts and preemption are disabled. The histogram data
- will be located in the debug file system at
-
- /sys/kernel/debug/tracing/latency_hist/preemptoff
-
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
@@ -262,74 +226,6 @@ config SCHED_TRACER
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
-config WAKEUP_LATENCY_HIST
- bool "Scheduling Latency Histogram"
- depends on SCHED_TRACER
- help
- This option generates continuously updated histograms (one per cpu)
- of the scheduling latency of the highest priority task.
- The histograms are disabled by default. To enable them, write a
- non-zero number to
-
- /sys/kernel/debug/tracing/latency_hist/enable/wakeup
-
- Two different algorithms are used, one to determine the latency of
- processes that exclusively use the highest priority of the system and
- another one to determine the latency of processes that share the
- highest system priority with other processes. The former is used to
- improve hardware and system software, the latter to optimize the
- priority design of a given system. The histogram data will be
- located in the debug file system at
-
- /sys/kernel/debug/tracing/latency_hist/wakeup
-
- and
-
- /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
-
- If both Scheduling Latency Histogram and Missed Timer Offsets
- Histogram are selected, additional histogram data will be collected
- that contain, in addition to the wakeup latency, the timer latency, in
- case the wakeup was triggered by an expired timer. These histograms
- are available in the
-
- /sys/kernel/debug/tracing/latency_hist/timerandwakeup
-
- directory. They reflect the apparent interrupt and scheduling latency
- and are best suitable to determine the worst-case latency of a given
- system. To enable these histograms, write a non-zero number to
-
- /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
-
-config MISSED_TIMER_OFFSETS_HIST
- depends on HIGH_RES_TIMERS
- select GENERIC_TRACER
- bool "Missed Timer Offsets Histogram"
- help
- Generate a histogram of missed timer offsets in microseconds. The
- histograms are disabled by default. To enable them, write a non-zero
- number to
-
- /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
-
- The histogram data will be located in the debug file system at
-
- /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
-
- If both Scheduling Latency Histogram and Missed Timer Offsets
- Histogram are selected, additional histogram data will be collected
- that contain, in addition to the wakeup latency, the timer latency, in
- case the wakeup was triggered by an expired timer. These histograms
- are available in the
-
- /sys/kernel/debug/tracing/latency_hist/timerandwakeup
-
- directory. They reflect the apparent interrupt and scheduling latency
- and are best suitable to determine the worst-case latency of a given
- system. To enable these histograms, write a non-zero number to
-
- /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
-
config ENABLE_DEFAULT_TRACERS
bool "Trace process context switches and events"
depends on !GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index f5e0243..d7e2068 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -34,10 +34,6 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
-obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
-obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
-obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
-obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
deleted file mode 100644
index 66a69eb..0000000
--- a/kernel/trace/latency_hist.c
+++ /dev/null
@@ -1,1178 +0,0 @@
-/*
- * kernel/trace/latency_hist.c
- *
- * Add support for histograms of preemption-off latency and
- * interrupt-off latency and wakeup latency, it depends on
- * Real-Time Preemption Support.
- *
- * Copyright (C) 2005 MontaVista Software, Inc.
- * Yi Yang <yyang@ch.mvista.com>
- *
- * Converted to work with the new latency tracer.
- * Copyright (C) 2008 Red Hat, Inc.
- * Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <linux/module.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#include <linux/kallsyms.h>
-#include <linux/uaccess.h>
-#include <linux/sched.h>
-#include <linux/sched/rt.h>
-#include <linux/slab.h>
-#include <linux/atomic.h>
-#include <asm/div64.h>
-
-#include "trace.h"
-#include <trace/events/sched.h>
-
-#define NSECS_PER_USECS 1000L
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/hist.h>
-
-enum {
- IRQSOFF_LATENCY = 0,
- PREEMPTOFF_LATENCY,
- PREEMPTIRQSOFF_LATENCY,
- WAKEUP_LATENCY,
- WAKEUP_LATENCY_SHAREDPRIO,
- MISSED_TIMER_OFFSETS,
- TIMERANDWAKEUP_LATENCY,
- MAX_LATENCY_TYPE,
-};
-
-#define MAX_ENTRY_NUM 10240
-
-struct hist_data {
- atomic_t hist_mode; /* 0 log, 1 don't log */
- long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
- long min_lat;
- long max_lat;
- unsigned long long below_hist_bound_samples;
- unsigned long long above_hist_bound_samples;
- long long accumulate_lat;
- unsigned long long total_samples;
- unsigned long long hist_array[MAX_ENTRY_NUM];
-};
-
-struct enable_data {
- int latency_type;
- int enabled;
-};
-
-static char *latency_hist_dir_root = "latency_hist";
-
-#ifdef CONFIG_INTERRUPT_OFF_HIST
-static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
-static char *irqsoff_hist_dir = "irqsoff";
-static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
-static DEFINE_PER_CPU(int, hist_irqsoff_counting);
-#endif
-
-#ifdef CONFIG_PREEMPT_OFF_HIST
-static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
-static char *preemptoff_hist_dir = "preemptoff";
-static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
-static DEFINE_PER_CPU(int, hist_preemptoff_counting);
-#endif
-
-#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
-static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
-static char *preemptirqsoff_hist_dir = "preemptirqsoff";
-static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
-static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
-#endif
-
-#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
-static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
-static struct enable_data preemptirqsoff_enabled_data = {
- .latency_type = PREEMPTIRQSOFF_LATENCY,
- .enabled = 0,
-};
-#endif
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
-struct maxlatproc_data {
- char comm[FIELD_SIZEOF(struct task_struct, comm)];
- char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
- int pid;
- int current_pid;
- int prio;
- int current_prio;
- long latency;
- long timeroffset;
- cycle_t timestamp;
-};
-#endif
-
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
-static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
-static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
-static char *wakeup_latency_hist_dir = "wakeup";
-static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
-static notrace void probe_wakeup_latency_hist_start(void *v,
- struct task_struct *p, int success);
-static notrace void probe_wakeup_latency_hist_stop(void *v,
- struct task_struct *prev, struct task_struct *next);
-static notrace void probe_sched_migrate_task(void *,
- struct task_struct *task, int cpu);
-static struct enable_data wakeup_latency_enabled_data = {
- .latency_type = WAKEUP_LATENCY,
- .enabled = 0,
-};
-static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
-static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
-static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
-static DEFINE_PER_CPU(int, wakeup_sharedprio);
-static unsigned long wakeup_pid;
-#endif
-
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
-static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
-static char *missed_timer_offsets_dir = "missed_timer_offsets";
-static notrace void probe_hrtimer_interrupt(void *v, int cpu,
- long long offset, struct task_struct *curr, struct task_struct *task);
-static struct enable_data missed_timer_offsets_enabled_data = {
- .latency_type = MISSED_TIMER_OFFSETS,
- .enabled = 0,
-};
-static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
-static unsigned long missed_timer_offsets_pid;
-#endif
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
-static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
-static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
-static struct enable_data timerandwakeup_enabled_data = {
- .latency_type = TIMERANDWAKEUP_LATENCY,
- .enabled = 0,
-};
-static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
-#endif
-
-void notrace latency_hist(int latency_type, int cpu, long latency,
- long timeroffset, cycle_t stop,
- struct task_struct *p)
-{
- struct hist_data *my_hist;
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- struct maxlatproc_data *mp = NULL;
-#endif
-
- if (!cpu_possible(cpu) || latency_type < 0 ||
- latency_type >= MAX_LATENCY_TYPE)
- return;
-
- switch (latency_type) {
-#ifdef CONFIG_INTERRUPT_OFF_HIST
- case IRQSOFF_LATENCY:
- my_hist = &per_cpu(irqsoff_hist, cpu);
- break;
-#endif
-#ifdef CONFIG_PREEMPT_OFF_HIST
- case PREEMPTOFF_LATENCY:
- my_hist = &per_cpu(preemptoff_hist, cpu);
- break;
-#endif
-#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
- case PREEMPTIRQSOFF_LATENCY:
- my_hist = &per_cpu(preemptirqsoff_hist, cpu);
- break;
-#endif
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- case WAKEUP_LATENCY:
- my_hist = &per_cpu(wakeup_latency_hist, cpu);
- mp = &per_cpu(wakeup_maxlatproc, cpu);
- break;
- case WAKEUP_LATENCY_SHAREDPRIO:
- my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
- mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
- break;
-#endif
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- case MISSED_TIMER_OFFSETS:
- my_hist = &per_cpu(missed_timer_offsets, cpu);
- mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
- break;
-#endif
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- case TIMERANDWAKEUP_LATENCY:
- my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
- mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
- break;
-#endif
-
- default:
- return;
- }
-
- latency += my_hist->offset;
-
- if (atomic_read(&my_hist->hist_mode) == 0)
- return;
-
- if (latency < 0 || latency >= MAX_ENTRY_NUM) {
- if (latency < 0)
- my_hist->below_hist_bound_samples++;
- else
- my_hist->above_hist_bound_samples++;
- } else
- my_hist->hist_array[latency]++;
-
- if (unlikely(latency > my_hist->max_lat ||
- my_hist->min_lat == LONG_MAX)) {
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- if (latency_type == WAKEUP_LATENCY ||
- latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
- latency_type == MISSED_TIMER_OFFSETS ||
- latency_type == TIMERANDWAKEUP_LATENCY) {
- strncpy(mp->comm, p->comm, sizeof(mp->comm));
- strncpy(mp->current_comm, current->comm,
- sizeof(mp->current_comm));
- mp->pid = task_pid_nr(p);
- mp->current_pid = task_pid_nr(current);
- mp->prio = p->prio;
- mp->current_prio = current->prio;
- mp->latency = latency;
- mp->timeroffset = timeroffset;
- mp->timestamp = stop;
- }
-#endif
- my_hist->max_lat = latency;
- }
- if (unlikely(latency < my_hist->min_lat))
- my_hist->min_lat = latency;
- my_hist->total_samples++;
- my_hist->accumulate_lat += latency;
-}
-
-static void *l_start(struct seq_file *m, loff_t *pos)
-{
- loff_t *index_ptr = NULL;
- loff_t index = *pos;
- struct hist_data *my_hist = m->private;
-
- if (index == 0) {
- char minstr[32], avgstr[32], maxstr[32];
-
- atomic_dec(&my_hist->hist_mode);
-
- if (likely(my_hist->total_samples)) {
- long avg = (long) div64_s64(my_hist->accumulate_lat,
- my_hist->total_samples);
- snprintf(minstr, sizeof(minstr), "%ld",
- my_hist->min_lat - my_hist->offset);
- snprintf(avgstr, sizeof(avgstr), "%ld",
- avg - my_hist->offset);
- snprintf(maxstr, sizeof(maxstr), "%ld",
- my_hist->max_lat - my_hist->offset);
- } else {
- strcpy(minstr, "<undef>");
- strcpy(avgstr, minstr);
- strcpy(maxstr, minstr);
- }
-
- seq_printf(m, "#Minimum latency: %s microseconds\n"
- "#Average latency: %s microseconds\n"
- "#Maximum latency: %s microseconds\n"
- "#Total samples: %llu\n"
- "#There are %llu samples lower than %ld"
- " microseconds.\n"
- "#There are %llu samples greater or equal"
- " than %ld microseconds.\n"
- "#usecs\t%16s\n",
- minstr, avgstr, maxstr,
- my_hist->total_samples,
- my_hist->below_hist_bound_samples,
- -my_hist->offset,
- my_hist->above_hist_bound_samples,
- MAX_ENTRY_NUM - my_hist->offset,
- "samples");
- }
- if (index < MAX_ENTRY_NUM) {
- index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
- if (index_ptr)
- *index_ptr = index;
- }
-
- return index_ptr;
-}
-
-static void *l_next(struct seq_file *m, void *p, loff_t *pos)
-{
- loff_t *index_ptr = p;
- struct hist_data *my_hist = m->private;
-
- if (++*pos >= MAX_ENTRY_NUM) {
- atomic_inc(&my_hist->hist_mode);
- return NULL;
- }
- *index_ptr = *pos;
- return index_ptr;
-}
-
-static void l_stop(struct seq_file *m, void *p)
-{
- kfree(p);
-}
-
-static int l_show(struct seq_file *m, void *p)
-{
- int index = *(loff_t *) p;
- struct hist_data *my_hist = m->private;
-
- seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
- my_hist->hist_array[index]);
- return 0;
-}
-
-static const struct seq_operations latency_hist_seq_op = {
- .start = l_start,
- .next = l_next,
- .stop = l_stop,
- .show = l_show
-};
-
-static int latency_hist_open(struct inode *inode, struct file *file)
-{
- int ret;
-
- ret = seq_open(file, &latency_hist_seq_op);
- if (!ret) {
- struct seq_file *seq = file->private_data;
- seq->private = inode->i_private;
- }
- return ret;
-}
-
-static const struct file_operations latency_hist_fops = {
- .open = latency_hist_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
-static void clear_maxlatprocdata(struct maxlatproc_data *mp)
-{
- mp->comm[0] = mp->current_comm[0] = '\0';
- mp->prio = mp->current_prio = mp->pid = mp->current_pid =
- mp->latency = mp->timeroffset = -1;
- mp->timestamp = 0;
-}
-#endif
-
-static void hist_reset(struct hist_data *hist)
-{
- atomic_dec(&hist->hist_mode);
-
- memset(hist->hist_array, 0, sizeof(hist->hist_array));
- hist->below_hist_bound_samples = 0ULL;
- hist->above_hist_bound_samples = 0ULL;
- hist->min_lat = LONG_MAX;
- hist->max_lat = LONG_MIN;
- hist->total_samples = 0ULL;
- hist->accumulate_lat = 0LL;
-
- atomic_inc(&hist->hist_mode);
-}
-
-static ssize_t
-latency_hist_reset(struct file *file, const char __user *a,
- size_t size, loff_t *off)
-{
- int cpu;
- struct hist_data *hist = NULL;
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- struct maxlatproc_data *mp = NULL;
-#endif
- off_t latency_type = (off_t) file->private_data;
-
- for_each_online_cpu(cpu) {
-
- switch (latency_type) {
-#ifdef CONFIG_PREEMPT_OFF_HIST
- case PREEMPTOFF_LATENCY:
- hist = &per_cpu(preemptoff_hist, cpu);
- break;
-#endif
-#ifdef CONFIG_INTERRUPT_OFF_HIST
- case IRQSOFF_LATENCY:
- hist = &per_cpu(irqsoff_hist, cpu);
- break;
-#endif
-#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
- case PREEMPTIRQSOFF_LATENCY:
- hist = &per_cpu(preemptirqsoff_hist, cpu);
- break;
-#endif
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- case WAKEUP_LATENCY:
- hist = &per_cpu(wakeup_latency_hist, cpu);
- mp = &per_cpu(wakeup_maxlatproc, cpu);
- break;
- case WAKEUP_LATENCY_SHAREDPRIO:
- hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
- mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
- break;
-#endif
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- case MISSED_TIMER_OFFSETS:
- hist = &per_cpu(missed_timer_offsets, cpu);
- mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
- break;
-#endif
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- case TIMERANDWAKEUP_LATENCY:
- hist = &per_cpu(timerandwakeup_latency_hist, cpu);
- mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
- break;
-#endif
- }
-
- hist_reset(hist);
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- if (latency_type == WAKEUP_LATENCY ||
- latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
- latency_type == MISSED_TIMER_OFFSETS ||
- latency_type == TIMERANDWAKEUP_LATENCY)
- clear_maxlatprocdata(mp);
-#endif
- }
-
- return size;
-}
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
-static ssize_t
-show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
-{
- char buf[64];
- int r;
- unsigned long *this_pid = file->private_data;
-
- r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t do_pid(struct file *file, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[64];
- unsigned long pid;
- unsigned long *this_pid = file->private_data;
-
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = '\0';
-
- if (kstrtoul(buf, 10, &pid))
- return -EINVAL;
-
- *this_pid = pid;
-
- return cnt;
-}
-#endif
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
-static ssize_t
-show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
-{
- int r;
- struct maxlatproc_data *mp = file->private_data;
- int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
- unsigned long long t;
- unsigned long usecs, secs;
- char *buf;
-
- if (mp->pid == -1 || mp->current_pid == -1) {
- buf = "(none)\n";
- return simple_read_from_buffer(ubuf, cnt, ppos, buf,
- strlen(buf));
- }
-
- buf = kmalloc(strmaxlen, GFP_KERNEL);
- if (buf == NULL)
- return -ENOMEM;
-
- t = ns2usecs(mp->timestamp);
- usecs = do_div(t, USEC_PER_SEC);
- secs = (unsigned long) t;
- r = snprintf(buf, strmaxlen,
- "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
- MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
- mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
- secs, usecs);
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
- kfree(buf);
- return r;
-}
-#endif
-
-static ssize_t
-show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
-{
- char buf[64];
- struct enable_data *ed = file->private_data;
- int r;
-
- r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
-{
- char buf[64];
- long enable;
- struct enable_data *ed = file->private_data;
-
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- if (kstrtoul(buf, 10, &enable))
- return -EINVAL;
-
- if ((enable && ed->enabled) || (!enable && !ed->enabled))
- return cnt;
-
- if (enable) {
- int ret;
-
- switch (ed->latency_type) {
-#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
- case PREEMPTIRQSOFF_LATENCY:
- ret = register_trace_preemptirqsoff_hist(
- probe_preemptirqsoff_hist, NULL);
- if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_preemptirqsoff_hist "
- "to trace_preemptirqsoff_hist\n");
- return ret;
- }
- break;
-#endif
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- case WAKEUP_LATENCY:
- ret = register_trace_sched_wakeup(
- probe_wakeup_latency_hist_start, NULL);
- if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_wakeup_latency_hist_start "
- "to trace_sched_wakeup\n");
- return ret;
- }
- ret = register_trace_sched_wakeup_new(
- probe_wakeup_latency_hist_start, NULL);
- if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_wakeup_latency_hist_start "
- "to trace_sched_wakeup_new\n");
- unregister_trace_sched_wakeup(
- probe_wakeup_latency_hist_start, NULL);
- return ret;
- }
- ret = register_trace_sched_switch(
- probe_wakeup_latency_hist_stop, NULL);
- if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_wakeup_latency_hist_stop "
- "to trace_sched_switch\n");
- unregister_trace_sched_wakeup(
- probe_wakeup_latency_hist_start, NULL);
- unregister_trace_sched_wakeup_new(
- probe_wakeup_latency_hist_start, NULL);
- return ret;
- }
- ret = register_trace_sched_migrate_task(
- probe_sched_migrate_task, NULL);
- if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_sched_migrate_task "
- "to trace_sched_migrate_task\n");
- unregister_trace_sched_wakeup(
- probe_wakeup_latency_hist_start, NULL);
- unregister_trace_sched_wakeup_new(
- probe_wakeup_latency_hist_start, NULL);
- unregister_trace_sched_switch(
- probe_wakeup_latency_hist_stop, NULL);
- return ret;
- }
- break;
-#endif
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- case MISSED_TIMER_OFFSETS:
- ret = register_trace_hrtimer_interrupt(
- probe_hrtimer_interrupt, NULL);
- if (ret) {
- pr_info("wakeup trace: Couldn't assign "
- "probe_hrtimer_interrupt "
- "to trace_hrtimer_interrupt\n");
- return ret;
- }
- break;
-#endif
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- case TIMERANDWAKEUP_LATENCY:
- if (!wakeup_latency_enabled_data.enabled ||
- !missed_timer_offsets_enabled_data.enabled)
- return -EINVAL;
- break;
-#endif
- default:
- break;
- }
- } else {
- switch (ed->latency_type) {
-#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
- case PREEMPTIRQSOFF_LATENCY:
- {
- int cpu;
-
- unregister_trace_preemptirqsoff_hist(
- probe_preemptirqsoff_hist, NULL);
- for_each_online_cpu(cpu) {
-#ifdef CONFIG_INTERRUPT_OFF_HIST
- per_cpu(hist_irqsoff_counting,
- cpu) = 0;
-#endif
-#ifdef CONFIG_PREEMPT_OFF_HIST
- per_cpu(hist_preemptoff_counting,
- cpu) = 0;
-#endif
-#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
- per_cpu(hist_preemptirqsoff_counting,
- cpu) = 0;
-#endif
- }
- }
- break;
-#endif
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- case WAKEUP_LATENCY:
- {
- int cpu;
-
- unregister_trace_sched_wakeup(
- probe_wakeup_latency_hist_start, NULL);
- unregister_trace_sched_wakeup_new(
- probe_wakeup_latency_hist_start, NULL);
- unregister_trace_sched_switch(
- probe_wakeup_latency_hist_stop, NULL);
- unregister_trace_sched_migrate_task(
- probe_sched_migrate_task, NULL);
-
- for_each_online_cpu(cpu) {
- per_cpu(wakeup_task, cpu) = NULL;
- per_cpu(wakeup_sharedprio, cpu) = 0;
- }
- }
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- timerandwakeup_enabled_data.enabled = 0;
-#endif
- break;
-#endif
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- case MISSED_TIMER_OFFSETS:
- unregister_trace_hrtimer_interrupt(
- probe_hrtimer_interrupt, NULL);
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- timerandwakeup_enabled_data.enabled = 0;
-#endif
- break;
-#endif
- default:
- break;
- }
- }
- ed->enabled = enable;
- return cnt;
-}
-
-static const struct file_operations latency_hist_reset_fops = {
- .open = tracing_open_generic,
- .write = latency_hist_reset,
-};
-
-static const struct file_operations enable_fops = {
- .open = tracing_open_generic,
- .read = show_enable,
- .write = do_enable,
-};
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
-static const struct file_operations pid_fops = {
- .open = tracing_open_generic,
- .read = show_pid,
- .write = do_pid,
-};
-
-static const struct file_operations maxlatproc_fops = {
- .open = tracing_open_generic,
- .read = show_maxlatproc,
-};
-#endif
-
-#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
-static notrace void probe_preemptirqsoff_hist(void *v, int reason,
- int starthist)
-{
- int cpu = raw_smp_processor_id();
- int time_set = 0;
-
- if (starthist) {
- cycle_t uninitialized_var(start);
-
- if (!preempt_count() && !irqs_disabled())
- return;
-
-#ifdef CONFIG_INTERRUPT_OFF_HIST
- if ((reason == IRQS_OFF || reason == TRACE_START) &&
- !per_cpu(hist_irqsoff_counting, cpu)) {
- per_cpu(hist_irqsoff_counting, cpu) = 1;
- start = ftrace_now(cpu);
- time_set++;
- per_cpu(hist_irqsoff_start, cpu) = start;
- }
-#endif
-
-#ifdef CONFIG_PREEMPT_OFF_HIST
- if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
- !per_cpu(hist_preemptoff_counting, cpu)) {
- per_cpu(hist_preemptoff_counting, cpu) = 1;
- if (!(time_set++))
- start = ftrace_now(cpu);
- per_cpu(hist_preemptoff_start, cpu) = start;
- }
-#endif
-
-#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
- if (per_cpu(hist_irqsoff_counting, cpu) &&
- per_cpu(hist_preemptoff_counting, cpu) &&
- !per_cpu(hist_preemptirqsoff_counting, cpu)) {
- per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
- if (!time_set)
- start = ftrace_now(cpu);
- per_cpu(hist_preemptirqsoff_start, cpu) = start;
- }
-#endif
- } else {
- cycle_t uninitialized_var(stop);
-
-#ifdef CONFIG_INTERRUPT_OFF_HIST
- if ((reason == IRQS_ON || reason == TRACE_STOP) &&
- per_cpu(hist_irqsoff_counting, cpu)) {
- cycle_t start = per_cpu(hist_irqsoff_start, cpu);
-
- stop = ftrace_now(cpu);
- time_set++;
- if (start) {
- long latency = ((long) (stop - start)) /
- NSECS_PER_USECS;
-
- latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
- stop, NULL);
- }
- per_cpu(hist_irqsoff_counting, cpu) = 0;
- }
-#endif
-
-#ifdef CONFIG_PREEMPT_OFF_HIST
- if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
- per_cpu(hist_preemptoff_counting, cpu)) {
- cycle_t start = per_cpu(hist_preemptoff_start, cpu);
-
- if (!(time_set++))
- stop = ftrace_now(cpu);
- if (start) {
- long latency = ((long) (stop - start)) /
- NSECS_PER_USECS;
-
- latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
- 0, stop, NULL);
- }
- per_cpu(hist_preemptoff_counting, cpu) = 0;
- }
-#endif
-
-#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
- if ((!per_cpu(hist_irqsoff_counting, cpu) ||
- !per_cpu(hist_preemptoff_counting, cpu)) &&
- per_cpu(hist_preemptirqsoff_counting, cpu)) {
- cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
-
- if (!time_set)
- stop = ftrace_now(cpu);
- if (start) {
- long latency = ((long) (stop - start)) /
- NSECS_PER_USECS;
-
- latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
- latency, 0, stop, NULL);
- }
- per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
- }
-#endif
- }
-}
-#endif
-
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
-static DEFINE_RAW_SPINLOCK(wakeup_lock);
-static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
- int cpu)
-{
- int old_cpu = task_cpu(task);
-
- if (cpu != old_cpu) {
- unsigned long flags;
- struct task_struct *cpu_wakeup_task;
-
- raw_spin_lock_irqsave(&wakeup_lock, flags);
-
- cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
- if (task == cpu_wakeup_task) {
- put_task_struct(cpu_wakeup_task);
- per_cpu(wakeup_task, old_cpu) = NULL;
- cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
- get_task_struct(cpu_wakeup_task);
- }
-
- raw_spin_unlock_irqrestore(&wakeup_lock, flags);
- }
-}
-
-static notrace void probe_wakeup_latency_hist_start(void *v,
- struct task_struct *p, int success)
-{
- unsigned long flags;
- struct task_struct *curr = current;
- int cpu = task_cpu(p);
- struct task_struct *cpu_wakeup_task;
-
- raw_spin_lock_irqsave(&wakeup_lock, flags);
-
- cpu_wakeup_task = per_cpu(wakeup_task, cpu);
-
- if (wakeup_pid) {
- if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
- p->prio == curr->prio)
- per_cpu(wakeup_sharedprio, cpu) = 1;
- if (likely(wakeup_pid != task_pid_nr(p)))
- goto out;
- } else {
- if (likely(!rt_task(p)) ||
- (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
- p->prio > curr->prio)
- goto out;
- if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
- p->prio == curr->prio)
- per_cpu(wakeup_sharedprio, cpu) = 1;
- }
-
- if (cpu_wakeup_task)
- put_task_struct(cpu_wakeup_task);
- cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
- get_task_struct(cpu_wakeup_task);
- cpu_wakeup_task->preempt_timestamp_hist =
- ftrace_now(raw_smp_processor_id());
-out:
- raw_spin_unlock_irqrestore(&wakeup_lock, flags);
-}
-
-static notrace void probe_wakeup_latency_hist_stop(void *v,
- struct task_struct *prev, struct task_struct *next)
-{
- unsigned long flags;
- int cpu = task_cpu(next);
- long latency;
- cycle_t stop;
- struct task_struct *cpu_wakeup_task;
-
- raw_spin_lock_irqsave(&wakeup_lock, flags);
-
- cpu_wakeup_task = per_cpu(wakeup_task, cpu);
-
- if (cpu_wakeup_task == NULL)
- goto out;
-
- /* Already running? */
- if (unlikely(current == cpu_wakeup_task))
- goto out_reset;
-
- if (next != cpu_wakeup_task) {
- if (next->prio < cpu_wakeup_task->prio)
- goto out_reset;
-
- if (next->prio == cpu_wakeup_task->prio)
- per_cpu(wakeup_sharedprio, cpu) = 1;
-
- goto out;
- }
-
- if (current->prio == cpu_wakeup_task->prio)
- per_cpu(wakeup_sharedprio, cpu) = 1;
-
- /*
- * The task we are waiting for is about to be switched to.
- * Calculate latency and store it in histogram.
- */
- stop = ftrace_now(raw_smp_processor_id());
-
- latency = ((long) (stop - next->preempt_timestamp_hist)) /
- NSECS_PER_USECS;
-
- if (per_cpu(wakeup_sharedprio, cpu)) {
- latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
- next);
- per_cpu(wakeup_sharedprio, cpu) = 0;
- } else {
- latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- if (timerandwakeup_enabled_data.enabled) {
- latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
- next->timer_offset + latency, next->timer_offset,
- stop, next);
- }
-#endif
- }
-
-out_reset:
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- next->timer_offset = 0;
-#endif
- put_task_struct(cpu_wakeup_task);
- per_cpu(wakeup_task, cpu) = NULL;
-out:
- raw_spin_unlock_irqrestore(&wakeup_lock, flags);
-}
-#endif
-
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
-static notrace void probe_hrtimer_interrupt(void *v, int cpu,
- long long latency_ns, struct task_struct *curr,
- struct task_struct *task)
-{
- if (latency_ns <= 0 && task != NULL && rt_task(task) &&
- (task->prio < curr->prio ||
- (task->prio == curr->prio &&
- !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
- long latency;
- cycle_t now;
-
- if (missed_timer_offsets_pid) {
- if (likely(missed_timer_offsets_pid !=
- task_pid_nr(task)))
- return;
- }
-
- now = ftrace_now(cpu);
- latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
- latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
- task);
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- task->timer_offset = latency;
-#endif
- }
-}
-#endif
-
-static __init int latency_hist_init(void)
-{
- struct dentry *latency_hist_root = NULL;
- struct dentry *dentry;
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- struct dentry *dentry_sharedprio;
-#endif
- struct dentry *entry;
- struct dentry *enable_root;
- int i = 0;
- struct hist_data *my_hist;
- char name[64];
- char *cpufmt = "CPU%d";
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- char *cpufmt_maxlatproc = "max_latency-CPU%d";
- struct maxlatproc_data *mp = NULL;
-#endif
-
- dentry = tracing_init_dentry();
- latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
- enable_root = debugfs_create_dir("enable", latency_hist_root);
-
-#ifdef CONFIG_INTERRUPT_OFF_HIST
- dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
- for_each_possible_cpu(i) {
- sprintf(name, cpufmt, i);
- entry = debugfs_create_file(name, 0444, dentry,
- &per_cpu(irqsoff_hist, i), &latency_hist_fops);
- my_hist = &per_cpu(irqsoff_hist, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
- }
- entry = debugfs_create_file("reset", 0644, dentry,
- (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
-#endif
-
-#ifdef CONFIG_PREEMPT_OFF_HIST
- dentry = debugfs_create_dir(preemptoff_hist_dir,
- latency_hist_root);
- for_each_possible_cpu(i) {
- sprintf(name, cpufmt, i);
- entry = debugfs_create_file(name, 0444, dentry,
- &per_cpu(preemptoff_hist, i), &latency_hist_fops);
- my_hist = &per_cpu(preemptoff_hist, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
- }
- entry = debugfs_create_file("reset", 0644, dentry,
- (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
-#endif
-
-#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
- dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
- latency_hist_root);
- for_each_possible_cpu(i) {
- sprintf(name, cpufmt, i);
- entry = debugfs_create_file(name, 0444, dentry,
- &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
- my_hist = &per_cpu(preemptirqsoff_hist, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
- }
- entry = debugfs_create_file("reset", 0644, dentry,
- (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
-#endif
-
-#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
- entry = debugfs_create_file("preemptirqsoff", 0644,
- enable_root, (void *)&preemptirqsoff_enabled_data,
- &enable_fops);
-#endif
-
-#ifdef CONFIG_WAKEUP_LATENCY_HIST
- dentry = debugfs_create_dir(wakeup_latency_hist_dir,
- latency_hist_root);
- dentry_sharedprio = debugfs_create_dir(
- wakeup_latency_hist_dir_sharedprio, dentry);
- for_each_possible_cpu(i) {
- sprintf(name, cpufmt, i);
-
- entry = debugfs_create_file(name, 0444, dentry,
- &per_cpu(wakeup_latency_hist, i),
- &latency_hist_fops);
- my_hist = &per_cpu(wakeup_latency_hist, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
-
- entry = debugfs_create_file(name, 0444, dentry_sharedprio,
- &per_cpu(wakeup_latency_hist_sharedprio, i),
- &latency_hist_fops);
- my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
-
- sprintf(name, cpufmt_maxlatproc, i);
-
- mp = &per_cpu(wakeup_maxlatproc, i);
- entry = debugfs_create_file(name, 0444, dentry, mp,
- &maxlatproc_fops);
- clear_maxlatprocdata(mp);
-
- mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
- entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
- &maxlatproc_fops);
- clear_maxlatprocdata(mp);
- }
- entry = debugfs_create_file("pid", 0644, dentry,
- (void *)&wakeup_pid, &pid_fops);
- entry = debugfs_create_file("reset", 0644, dentry,
- (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
- entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
- (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
- entry = debugfs_create_file("wakeup", 0644,
- enable_root, (void *)&wakeup_latency_enabled_data,
- &enable_fops);
-#endif
-
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
- dentry = debugfs_create_dir(missed_timer_offsets_dir,
- latency_hist_root);
- for_each_possible_cpu(i) {
- sprintf(name, cpufmt, i);
- entry = debugfs_create_file(name, 0444, dentry,
- &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
- my_hist = &per_cpu(missed_timer_offsets, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
-
- sprintf(name, cpufmt_maxlatproc, i);
- mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
- entry = debugfs_create_file(name, 0444, dentry, mp,
- &maxlatproc_fops);
- clear_maxlatprocdata(mp);
- }
- entry = debugfs_create_file("pid", 0644, dentry,
- (void *)&missed_timer_offsets_pid, &pid_fops);
- entry = debugfs_create_file("reset", 0644, dentry,
- (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
- entry = debugfs_create_file("missed_timer_offsets", 0644,
- enable_root, (void *)&missed_timer_offsets_enabled_data,
- &enable_fops);
-#endif
-
-#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
- defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
- dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
- latency_hist_root);
- for_each_possible_cpu(i) {
- sprintf(name, cpufmt, i);
- entry = debugfs_create_file(name, 0444, dentry,
- &per_cpu(timerandwakeup_latency_hist, i),
- &latency_hist_fops);
- my_hist = &per_cpu(timerandwakeup_latency_hist, i);
- atomic_set(&my_hist->hist_mode, 1);
- my_hist->min_lat = LONG_MAX;
-
- sprintf(name, cpufmt_maxlatproc, i);
- mp = &per_cpu(timerandwakeup_maxlatproc, i);
- entry = debugfs_create_file(name, 0444, dentry, mp,
- &maxlatproc_fops);
- clear_maxlatprocdata(mp);
- }
- entry = debugfs_create_file("reset", 0644, dentry,
- (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
- entry = debugfs_create_file("timerandwakeup", 0644,
- enable_root, (void *)&timerandwakeup_enabled_data,
- &enable_fops);
-#endif
- return 0;
-}
-
-device_initcall(latency_hist_init);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f9401ed..138077b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -442,7 +442,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
+ event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
irq_flags, preempt_count());
if (!event)
return 0;
@@ -1509,7 +1509,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
struct task_struct *tsk = current;
entry->preempt_count = pc & 0xff;
- entry->preempt_lazy_count = preempt_lazy_count();
entry->pid = (tsk) ? tsk->pid : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -1519,10 +1518,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
#endif
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
- (need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
- (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0);
-
- entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
+ (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
@@ -2412,17 +2408,14 @@ get_total_entries(struct trace_buffer *buf,
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _--------=> CPU# \n");
- seq_puts(m, "# / _-------=> irqs-off \n");
- seq_puts(m, "# | / _------=> need-resched \n");
- seq_puts(m, "# || / _-----=> need-resched_lazy \n");
- seq_puts(m, "# ||| / _----=> hardirq/softirq \n");
- seq_puts(m, "# |||| / _---=> preempt-depth \n");
- seq_puts(m, "# ||||| / _--=> preempt-lazy-depth\n");
- seq_puts(m, "# |||||| / _-=> migrate-disable \n");
- seq_puts(m, "# ||||||| / delay \n");
- seq_puts(m, "# cmd pid |||||||| time | caller \n");
- seq_puts(m, "# \\ / |||||||| \\ | / \n");
+ seq_puts(m, "# _------=> CPU# \n");
+ seq_puts(m, "# / _-----=> irqs-off \n");
+ seq_puts(m, "# | / _----=> need-resched \n");
+ seq_puts(m, "# || / _---=> hardirq/softirq \n");
+ seq_puts(m, "# ||| / _--=> preempt-depth \n");
+ seq_puts(m, "# |||| / delay \n");
+ seq_puts(m, "# cmd pid ||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
@@ -2446,16 +2439,13 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
{
print_event_info(buf, m);
- seq_puts(m, "# _-------=> irqs-off \n");
- seq_puts(m, "# / _------=> need-resched \n");
- seq_puts(m, "# |/ _-----=> need-resched_lazy \n");
- seq_puts(m, "# ||/ _----=> hardirq/softirq \n");
- seq_puts(m, "# |||/ _---=> preempt-depth \n");
- seq_puts(m, "# ||||/ _--=> preempt-lazy-depth\n");
- seq_puts(m, "# ||||| / _-=> migrate-disable \n");
- seq_puts(m, "# |||||| / delay\n");
- seq_puts(m, "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n");
- seq_puts(m, "# | | | |||||| | |\n");
+ seq_puts(m, "# _-----=> irqs-off\n");
+ seq_puts(m, "# / _----=> need-resched\n");
+ seq_puts(m, "# | / _---=> hardirq/softirq\n");
+ seq_puts(m, "# || / _--=> preempt-depth\n");
+ seq_puts(m, "# ||| / delay\n");
+ seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | |||| | |\n");
}
void
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 109291a..10c86fb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -117,7 +117,6 @@ struct kretprobe_trace_entry_head {
* NEED_RESCHED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
- * NEED_RESCHED_LAZY - lazy reschedule is requested
*/
enum trace_flag_type {
TRACE_FLAG_IRQS_OFF = 0x01,
@@ -125,7 +124,6 @@ enum trace_flag_type {
TRACE_FLAG_NEED_RESCHED = 0x04,
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
- TRACE_FLAG_NEED_RESCHED_LAZY = 0x20,
};
#define TRACE_BUF_SIZE 1024
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7531ded..bc1bd20 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,12 +27,6 @@
DEFINE_MUTEX(event_mutex);
-DEFINE_MUTEX(event_storage_mutex);
-EXPORT_SYMBOL_GPL(event_storage_mutex);
-
-char event_storage[EVENT_STORAGE_SIZE];
-EXPORT_SYMBOL_GPL(event_storage);
-
LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_common_fields);
@@ -166,8 +160,6 @@ static int trace_define_common_fields(void)
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
- __common_field(unsigned short, migrate_disable);
- __common_field(unsigned short, padding);
return ret;
}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d21a746..d7d0b50 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __array
#define __array(type, item, len) \
do { \
+ char *type_str = #type"["__stringify(len)"]"; \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
- mutex_lock(&event_storage_mutex); \
- snprintf(event_storage, sizeof(event_storage), \
- "%s[%d]", #type, len); \
- ret = trace_define_field(event_call, event_storage, #item, \
+ ret = trace_define_field(event_call, type_str, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
- mutex_unlock(&event_storage_mutex); \
if (ret) \
return ret; \
} while (0);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 2f4eb37..2aefbee 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -17,7 +17,6 @@
#include <linux/fs.h>
#include "trace.h"
-#include <trace/events/hist.h>
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
@@ -440,13 +439,11 @@ void start_critical_timings(void)
{
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
- trace_preemptirqsoff_hist(TRACE_START, 1);
}
EXPORT_SYMBOL_GPL(start_critical_timings);
void stop_critical_timings(void)
{
- trace_preemptirqsoff_hist(TRACE_STOP, 0);
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -456,7 +453,6 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
#ifdef CONFIG_PROVE_LOCKING
void time_hardirqs_on(unsigned long a0, unsigned long a1)
{
- trace_preemptirqsoff_hist(IRQS_ON, 0);
if (!preempt_trace() && irq_trace())
stop_critical_timing(a0, a1);
}
@@ -465,7 +461,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(a0, a1);
- trace_preemptirqsoff_hist(IRQS_OFF, 1);
}
#else /* !CONFIG_PROVE_LOCKING */
@@ -491,7 +486,6 @@ inline void print_irqtrace_events(struct task_struct *curr)
*/
void trace_hardirqs_on(void)
{
- trace_preemptirqsoff_hist(IRQS_ON, 0);
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -501,13 +495,11 @@ void trace_hardirqs_off(void)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
- trace_preemptirqsoff_hist(IRQS_OFF, 1);
}
EXPORT_SYMBOL(trace_hardirqs_off);
void trace_hardirqs_on_caller(unsigned long caller_addr)
{
- trace_preemptirqsoff_hist(IRQS_ON, 0);
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
@@ -517,7 +509,6 @@ void trace_hardirqs_off_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
- trace_preemptirqsoff_hist(IRQS_OFF, 1);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
@@ -527,14 +518,12 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
- trace_preemptirqsoff_hist(PREEMPT_ON, 0);
if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
- trace_preemptirqsoff_hist(PREEMPT_ON, 1);
if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 46b6467..34e7cba 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -606,7 +606,6 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
{
char hardsoft_irq;
char need_resched;
- char need_resched_lazy;
char irqs_off;
int hardirq;
int softirq;
@@ -621,17 +620,14 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
'.';
need_resched =
(entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
- need_resched_lazy =
- (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
hardsoft_irq =
(hardirq && softirq) ? 'H' :
hardirq ? 'h' :
softirq ? 's' :
'.';
- if (!trace_seq_printf(s, "%c%c%c%c",
- irqs_off, need_resched, need_resched_lazy,
- hardsoft_irq))
+ if (!trace_seq_printf(s, "%c%c%c",
+ irqs_off, need_resched, hardsoft_irq))
return 0;
if (entry->preempt_count)
@@ -639,16 +635,6 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
else
ret = trace_seq_putc(s, '.');
- if (entry->preempt_lazy_count)
- ret = trace_seq_printf(s, "%x", entry->preempt_lazy_count);
- else
- ret = trace_seq_putc(s, '.');
-
- if (entry->migrate_disable)
- ret = trace_seq_printf(s, "%x", entry->migrate_disable);
- else
- ret = trace_seq_putc(s, '.');
-
return ret;
}
diff --git a/kernel/user.c b/kernel/user.c
index 2800008..5bbb919 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -154,11 +154,11 @@ void free_uid(struct user_struct *up)
if (!up)
return;
- local_irq_save_nort(flags);
+ local_irq_save(flags);
if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
free_user(up, flags);
else
- local_irq_restore_nort(flags);
+ local_irq_restore(flags);
}
struct user_struct *alloc_uid(kuid_t uid)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 13fb113..6991139 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -146,7 +146,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].first;
last = first + map->extent[idx].count - 1;
@@ -170,7 +170,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id)
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].first;
last = first + map->extent[idx].count - 1;
@@ -193,7 +193,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].lower_first;
last = first + map->extent[idx].count - 1;
@@ -609,9 +609,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,
* were written before the count of the extents.
*
* To achieve this smp_wmb() is used on guarantee the write
- * order and smp_read_barrier_depends() is guaranteed that we
- * don't have crazy architectures returning stale data.
- *
+ * order and smp_rmb() is guaranteed that we don't have crazy
+ * architectures returning stale data.
*/
mutex_lock(&id_map_mutex);
diff --git a/kernel/wait-simple.c b/kernel/wait-simple.c
deleted file mode 100644
index 7dfa86d..0000000
--- a/kernel/wait-simple.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Simple waitqueues without fancy flags and callbacks
- *
- * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
- *
- * Based on kernel/wait.c
- *
- * For licencing details see kernel-base/COPYING
- */
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/wait-simple.h>
-
-/* Adds w to head->list. Must be called with head->lock locked. */
-static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
-{
- list_add(&w->node, &head->list);
- /* We can't let the condition leak before the setting of head */
- smp_mb();
-}
-
-/* Removes w from head->list. Must be called with head->lock locked. */
-static inline void __swait_dequeue(struct swaiter *w)
-{
- list_del_init(&w->node);
-}
-
-void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
-{
- raw_spin_lock_init(&head->lock);
- lockdep_set_class(&head->lock, key);
- INIT_LIST_HEAD(&head->list);
-}
-EXPORT_SYMBOL(__init_swait_head);
-
-void swait_prepare_locked(struct swait_head *head, struct swaiter *w)
-{
- w->task = current;
- if (list_empty(&w->node))
- __swait_enqueue(head, w);
-}
-
-void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&head->lock, flags);
- swait_prepare_locked(head, w);
- __set_current_state(state);
- raw_spin_unlock_irqrestore(&head->lock, flags);
-}
-EXPORT_SYMBOL(swait_prepare);
-
-void swait_finish_locked(struct swait_head *head, struct swaiter *w)
-{
- __set_current_state(TASK_RUNNING);
- if (w->task)
- __swait_dequeue(w);
-}
-
-void swait_finish(struct swait_head *head, struct swaiter *w)
-{
- unsigned long flags;
-
- __set_current_state(TASK_RUNNING);
- if (w->task) {
- raw_spin_lock_irqsave(&head->lock, flags);
- __swait_dequeue(w);
- raw_spin_unlock_irqrestore(&head->lock, flags);
- }
-}
-EXPORT_SYMBOL(swait_finish);
-
-unsigned int
-__swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num)
-{
- struct swaiter *curr, *next;
- int woken = 0;
-
- list_for_each_entry_safe(curr, next, &head->list, node) {
- if (wake_up_state(curr->task, state)) {
- __swait_dequeue(curr);
- /*
- * The waiting task can free the waiter as
- * soon as curr->task = NULL is written,
- * without taking any locks. A memory barrier
- * is required here to prevent the following
- * store to curr->task from getting ahead of
- * the dequeue operation.
- */
- smp_wmb();
- curr->task = NULL;
- if (++woken == num)
- break;
- }
- }
- return woken;
-}
-
-unsigned int
-__swait_wake(struct swait_head *head, unsigned int state, unsigned int num)
-{
- unsigned long flags;
- int woken;
-
- if (!swaitqueue_active(head))
- return 0;
-
- raw_spin_lock_irqsave(&head->lock, flags);
- woken = __swait_wake_locked(head, state, num);
- raw_spin_unlock_irqrestore(&head->lock, flags);
- return woken;
-}
-EXPORT_SYMBOL(__swait_wake);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 870b748..4431610 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -205,8 +205,6 @@ static int is_softlockup(unsigned long touch_ts)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
-
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -241,19 +239,10 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- /*
- * If early-printk is enabled then make sure we do not
- * lock up in printk() and kill console logging:
- */
- printk_kill();
-
- if (hardlockup_panic) {
+ if (hardlockup_panic)
panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- } else {
- raw_spin_lock(&watchdog_output_lock);
+ else
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- raw_spin_unlock(&watchdog_output_lock);
- }
__this_cpu_write(hard_watchdog_warn, true);
return;
@@ -357,7 +346,6 @@ static void watchdog_enable(unsigned int cpu)
/* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
- hrtimer->irqsafe = 1;
/* Enable the perf event */
watchdog_nmi_enable(cpu);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9efb7ce..60fee69 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -48,8 +48,6 @@
#include <linux/nodemask.h>
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
-#include <linux/locallock.h>
-#include <linux/delay.h>
#include "workqueue_internal.h"
@@ -131,11 +129,11 @@ enum {
*
* PL: wq_pool_mutex protected.
*
- * PR: wq_pool_mutex protected for writes. RCU protected for reads.
+ * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
*
* WQ: wq->mutex protected.
*
- * WR: wq->mutex protected for writes. RCU protected for reads.
+ * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
*
* MD: wq_mayday_lock protected.
*/
@@ -180,7 +178,7 @@ struct worker_pool {
atomic_t nr_running ____cacheline_aligned_in_smp;
/*
- * Destruction of pool is RCU protected to allow dereferences
+ * Destruction of pool is sched-RCU protected to allow dereferences
* from get_work_pool().
*/
struct rcu_head rcu;
@@ -209,7 +207,7 @@ struct pool_workqueue {
/*
* Release of unbound pwq is punted to system_wq. See put_pwq()
* and pwq_unbound_release_workfn() for details. pool_workqueue
- * itself is also RCU protected so that the first pwq can be
+ * itself is also sched-RCU protected so that the first pwq can be
* determined without grabbing wq->mutex.
*/
struct work_struct unbound_release_work;
@@ -325,8 +323,6 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq);
struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
-static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
-
static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to,
const struct workqueue_attrs *from);
@@ -335,14 +331,14 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
#include <trace/events/workqueue.h>
#define assert_rcu_or_pool_mutex() \
- rcu_lockdep_assert(rcu_read_lock_held() || \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
lockdep_is_held(&wq_pool_mutex), \
- "RCU or wq_pool_mutex should be held")
+ "sched RCU or wq_pool_mutex should be held")
#define assert_rcu_or_wq_mutex(wq) \
- rcu_lockdep_assert(rcu_read_lock_held() || \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
lockdep_is_held(&wq->mutex), \
- "RCU or wq->mutex should be held")
+ "sched RCU or wq->mutex should be held")
#ifdef CONFIG_LOCKDEP
#define assert_manager_or_pool_lock(pool) \
@@ -364,7 +360,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
* @pool: iteration cursor
* @pi: integer used for iteration
*
- * This must be called either with wq_pool_mutex held or RCU read
+ * This must be called either with wq_pool_mutex held or sched RCU read
* locked. If the pool needs to be used beyond the locking in effect, the
* caller is responsible for guaranteeing that the pool stays online.
*
@@ -397,7 +393,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
* @pwq: iteration cursor
* @wq: the target workqueue
*
- * This must be called either with wq->mutex held or RCU read locked.
+ * This must be called either with wq->mutex held or sched RCU read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
*
@@ -545,7 +541,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
* @wq: the target workqueue
* @node: the node ID
*
- * This must be called either with pwq_lock held or RCU read locked.
+ * This must be called either with pwq_lock held or sched RCU read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
*
@@ -649,8 +645,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* @work: the work item of interest
*
* Pools are created and destroyed under wq_pool_mutex, and allows read
- * access under RCU read lock. As such, this function should be
- * called under wq_pool_mutex or inside of a rcu_read_lock() region.
+ * access under sched-RCU read lock. As such, this function should be
+ * called under wq_pool_mutex or with preemption disabled.
*
* All fields of the returned pool are accessible as long as the above
* mentioned locking is in effect. If the returned pool needs to be used
@@ -808,31 +804,44 @@ static void wake_up_worker(struct worker_pool *pool)
}
/**
- * wq_worker_running - a worker is running again
- * @task: task returning from sleep
+ * wq_worker_waking_up - a worker is waking up
+ * @task: task waking up
+ * @cpu: CPU @task is waking up to
+ *
+ * This function is called during try_to_wake_up() when a worker is
+ * being awoken.
*
- * This function is called when a worker returns from schedule()
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
*/
-void wq_worker_running(struct task_struct *task)
+void wq_worker_waking_up(struct task_struct *task, int cpu)
{
struct worker *worker = kthread_data(task);
- if (!worker->sleeping)
- return;
- if (!(worker->flags & WORKER_NOT_RUNNING))
+ if (!(worker->flags & WORKER_NOT_RUNNING)) {
+ WARN_ON_ONCE(worker->pool->cpu != cpu);
atomic_inc(&worker->pool->nr_running);
- worker->sleeping = 0;
+ }
}
/**
* wq_worker_sleeping - a worker is going to sleep
* @task: task going to sleep
- * This function is called from schedule() when a busy worker is
- * going to sleep.
+ * @cpu: CPU in question, must be the current CPU number
+ *
+ * This function is called during schedule() when a busy worker is
+ * going to sleep. Worker on the same cpu can be woken up by
+ * returning pointer to its task.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * Return:
+ * Worker task on @cpu to wake up, %NULL if none.
*/
-void wq_worker_sleeping(struct task_struct *task)
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
{
- struct worker *next, *worker = kthread_data(task);
+ struct worker *worker = kthread_data(task), *to_wakeup = NULL;
struct worker_pool *pool;
/*
@@ -841,15 +850,14 @@ void wq_worker_sleeping(struct task_struct *task)
* checking NOT_RUNNING.
*/
if (worker->flags & WORKER_NOT_RUNNING)
- return;
+ return NULL;
pool = worker->pool;
- if (WARN_ON_ONCE(worker->sleeping))
- return;
+ /* this can only happen on the local cpu */
+ if (WARN_ON_ONCE(cpu != raw_smp_processor_id()))
+ return NULL;
- worker->sleeping = 1;
- spin_lock_irq(&pool->lock);
/*
* The counterpart of the following dec_and_test, implied mb,
* worklist not empty test sequence is in insert_work().
@@ -862,12 +870,9 @@ void wq_worker_sleeping(struct task_struct *task)
* lock is safe.
*/
if (atomic_dec_and_test(&pool->nr_running) &&
- !list_empty(&pool->worklist)) {
- next = first_worker(pool);
- if (next)
- wake_up_process(next->task);
- }
- spin_unlock_irq(&pool->lock);
+ !list_empty(&pool->worklist))
+ to_wakeup = first_worker(pool);
+ return to_wakeup ? to_wakeup->task : NULL;
}
/**
@@ -1074,12 +1079,12 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
{
if (pwq) {
/*
- * As both pwqs and pools are RCU protected, the
+ * As both pwqs and pools are sched-RCU protected, the
* following lock operations are safe.
*/
- local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
+ spin_lock_irq(&pwq->pool->lock);
put_pwq(pwq);
- local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
+ spin_unlock_irq(&pwq->pool->lock);
}
}
@@ -1181,7 +1186,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
struct worker_pool *pool;
struct pool_workqueue *pwq;
- local_lock_irqsave(pendingb_lock, *flags);
+ local_irq_save(*flags);
/* try to steal the timer if it exists */
if (is_dwork) {
@@ -1200,7 +1205,6 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
return 0;
- rcu_read_lock();
/*
* The queueing is in progress, or it is already queued. Try to
* steal it from ->worklist without clearing WORK_STRUCT_PENDING.
@@ -1239,16 +1243,14 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
set_work_pool_and_keep_pending(work, pool->id);
spin_unlock(&pool->lock);
- rcu_read_unlock();
return 1;
}
spin_unlock(&pool->lock);
fail:
- rcu_read_unlock();
- local_unlock_irqrestore(pendingb_lock, *flags);
+ local_irq_restore(*flags);
if (work_is_canceling(work))
return -ENOENT;
- cpu_chill();
+ cpu_relax();
return -EAGAIN;
}
@@ -1317,7 +1319,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
* queued or lose PENDING. Grabbing PENDING and queueing should
* happen with IRQ disabled.
*/
- WARN_ON_ONCE_NONRT(!irqs_disabled());
+ WARN_ON_ONCE(!irqs_disabled());
debug_work_activate(work);
@@ -1325,8 +1327,6 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
if (unlikely(wq->flags & __WQ_DRAINING) &&
WARN_ON_ONCE(!is_chained_work(wq)))
return;
-
- rcu_read_lock();
retry:
if (req_cpu == WORK_CPU_UNBOUND)
cpu = raw_smp_processor_id();
@@ -1383,8 +1383,10 @@ retry:
/* pwq determined, queue */
trace_workqueue_queue_work(req_cpu, pwq, work);
- if (WARN_ON(!list_empty(&work->entry)))
- goto out;
+ if (WARN_ON(!list_empty(&work->entry))) {
+ spin_unlock(&pwq->pool->lock);
+ return;
+ }
pwq->nr_in_flight[pwq->work_color]++;
work_flags = work_color_to_flags(pwq->work_color);
@@ -1400,9 +1402,7 @@ retry:
insert_work(pwq, work, worklist, work_flags);
-out:
spin_unlock(&pwq->pool->lock);
- rcu_read_unlock();
}
/**
@@ -1422,14 +1422,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
bool ret = false;
unsigned long flags;
- local_lock_irqsave(pendingb_lock,flags);
+ local_irq_save(flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
__queue_work(cpu, wq, work);
ret = true;
}
- local_unlock_irqrestore(pendingb_lock, flags);
+ local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL(queue_work_on);
@@ -1496,14 +1496,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
unsigned long flags;
/* read the comment in __queue_work() */
- local_lock_irqsave(pendingb_lock, flags);
+ local_irq_save(flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
__queue_delayed_work(cpu, wq, dwork, delay);
ret = true;
}
- local_unlock_irqrestore(pendingb_lock, flags);
+ local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL(queue_delayed_work_on);
@@ -1538,7 +1538,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
if (likely(ret >= 0)) {
__queue_delayed_work(cpu, wq, dwork, delay);
- local_unlock_irqrestore(pendingb_lock, flags);
+ local_irq_restore(flags);
}
/* -ENOENT from try_to_grab_pending() becomes %true */
@@ -2809,14 +2809,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
might_sleep();
- rcu_read_lock();
+ local_irq_disable();
pool = get_work_pool(work);
if (!pool) {
- rcu_read_unlock();
+ local_irq_enable();
return false;
}
- spin_lock_irq(&pool->lock);
+ spin_lock(&pool->lock);
/* see the comment in try_to_grab_pending() with the same code */
pwq = get_work_pwq(work);
if (pwq) {
@@ -2843,11 +2843,10 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
else
lock_map_acquire_read(&pwq->wq->lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);
- rcu_read_unlock();
+
return true;
already_gone:
spin_unlock_irq(&pool->lock);
- rcu_read_unlock();
return false;
}
@@ -2901,7 +2900,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
/* tell other tasks trying to grab @work to back off */
mark_work_canceling(work);
- local_unlock_irqrestore(pendingb_lock, flags);
+ local_irq_restore(flags);
flush_work(work);
clear_work_data(work);
@@ -2946,10 +2945,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
*/
bool flush_delayed_work(struct delayed_work *dwork)
{
- local_lock_irq(pendingb_lock);
+ local_irq_disable();
if (del_timer_sync(&dwork->timer))
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
- local_unlock_irq(pendingb_lock);
+ local_irq_enable();
return flush_work(&dwork->work);
}
EXPORT_SYMBOL(flush_delayed_work);
@@ -2984,7 +2983,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)
set_work_pool_and_clear_pending(&dwork->work,
get_work_pool_id(&dwork->work));
- local_unlock_irqrestore(pendingb_lock, flags);
+ local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL(cancel_delayed_work);
@@ -3170,8 +3169,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
const char *delim = "";
int node, written = 0;
- get_online_cpus();
- rcu_read_lock();
+ rcu_read_lock_sched();
for_each_node(node) {
written += scnprintf(buf + written, PAGE_SIZE - written,
"%s%d:%d", delim, node,
@@ -3179,8 +3177,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
delim = " ";
}
written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
- rcu_read_unlock();
- put_online_cpus();
+ rcu_read_unlock_sched();
return written;
}
@@ -3546,7 +3543,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
* put_unbound_pool - put a worker_pool
* @pool: worker_pool to put
*
- * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
+ * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
* safe manner. get_unbound_pool() calls this function on its failure path
* and this function should be able to release pools which went through,
* successfully or not, init_worker_pool().
@@ -3593,8 +3590,8 @@ static void put_unbound_pool(struct worker_pool *pool)
del_timer_sync(&pool->idle_timer);
del_timer_sync(&pool->mayday_timer);
- /* RCU protected to allow dereferences from get_work_pool() */
- call_rcu(&pool->rcu, rcu_free_pool);
+ /* sched-RCU protected to allow dereferences from get_work_pool() */
+ call_rcu_sched(&pool->rcu, rcu_free_pool);
}
/**
@@ -3707,7 +3704,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
put_unbound_pool(pool);
mutex_unlock(&wq_pool_mutex);
- call_rcu(&pwq->rcu, rcu_free_pwq);
+ call_rcu_sched(&pwq->rcu, rcu_free_pwq);
/*
* If we're the last pwq going away, @wq is already dead and no one
@@ -4420,8 +4417,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
struct pool_workqueue *pwq;
bool ret;
- rcu_read_lock();
- preempt_disable();
+ rcu_read_lock_sched();
if (cpu == WORK_CPU_UNBOUND)
cpu = smp_processor_id();
@@ -4432,8 +4428,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
ret = !list_empty(&pwq->delayed_works);
- preempt_enable();
- rcu_read_unlock();
+ rcu_read_unlock_sched();
return ret;
}
@@ -4459,15 +4454,16 @@ unsigned int work_busy(struct work_struct *work)
if (work_pending(work))
ret |= WORK_BUSY_PENDING;
- rcu_read_lock();
+ local_irq_save(flags);
pool = get_work_pool(work);
if (pool) {
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock(&pool->lock);
if (find_worker_executing_work(pool, work))
ret |= WORK_BUSY_RUNNING;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock(&pool->lock);
}
- rcu_read_unlock();
+ local_irq_restore(flags);
+
return ret;
}
EXPORT_SYMBOL_GPL(work_busy);
@@ -4920,16 +4916,16 @@ bool freeze_workqueues_busy(void)
* nr_active is monotonically decreasing. It's safe
* to peek without lock.
*/
- rcu_read_lock();
+ rcu_read_lock_sched();
for_each_pwq(pwq, wq) {
WARN_ON_ONCE(pwq->nr_active < 0);
if (pwq->nr_active) {
busy = true;
- rcu_read_unlock();
+ rcu_read_unlock_sched();
goto out_unlock;
}
}
- rcu_read_unlock();
+ rcu_read_unlock_sched();
}
out_unlock:
mutex_unlock(&wq_pool_mutex);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 2bb5b5a..7e2204d 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -41,7 +41,6 @@ struct worker {
unsigned long last_active; /* L: last active timestamp */
unsigned int flags; /* X: flags */
int id; /* I: worker id */
- int sleeping; /* None */
/*
* Opaque string set with work_set_desc(). Printed out with task
@@ -67,7 +66,7 @@ static inline struct worker *current_wq_worker(void)
* Scheduler hooks for concurrency managed workqueue. Only to be used from
* sched/core.c and workqueue.c.
*/
-void wq_worker_running(struct task_struct *task);
-void wq_worker_sleeping(struct task_struct *task);
+void wq_worker_waking_up(struct task_struct *task, int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */