summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/cpu.c13
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/events/core.c99
-rw-r--r--kernel/events/internal.h10
-rw-r--r--kernel/events/ring_buffer.c37
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/irq/chip.c19
-rw-r--r--kernel/irq/internals.h4
-rw-r--r--kernel/irq/resend.c18
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/locking/qspinlock_paravirt.h11
-rw-r--r--kernel/module.c9
-rw-r--r--kernel/resource.c6
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/signal.c13
-rw-r--r--kernel/time/clockevents.c24
-rw-r--r--kernel/time/tick-broadcast.c164
-rw-r--r--kernel/time/tick-common.c22
-rw-r--r--kernel/time/tick-sched.h10
-rw-r--r--kernel/time/timer.c4
-rw-r--r--kernel/trace/ftrace.c52
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_branch.c17
24 files changed, 388 insertions, 163 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 09c6564..e85bdfd 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1021,8 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
* for strings that are too long, we should not have created
* any.
*/
- if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) {
- WARN_ON(1);
+ if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
send_sig(SIGKILL, current, 0);
return -1;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c9c9fa..5644ec5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,7 @@
#include <linux/suspend.h>
#include <linux/lockdep.h>
#include <linux/tick.h>
+#include <linux/irq.h>
#include <trace/events/power.h>
#include "smpboot.h"
@@ -392,13 +393,19 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
smpboot_park_threads(cpu);
/*
- * So now all preempt/rcu users must observe !cpu_active().
+ * Prevent irq alloc/free while the dying cpu reorganizes the
+ * interrupt affinities.
*/
+ irq_lock_sparse();
+ /*
+ * So now all preempt/rcu users must observe !cpu_active().
+ */
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+ irq_unlock_sparse();
goto out_release;
}
BUG_ON(cpu_online(cpu));
@@ -415,6 +422,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
per_cpu(cpu_dead_idle, cpu) = false;
+ /* Interrupts are moved away from the dying cpu, reenable alloc/free */
+ irq_unlock_sparse();
+
hotplug_cpu__broadcast_tick_pull(cpu);
/* This actually kills the CPU. */
__cpu_die(cpu);
@@ -519,6 +529,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen)
/* Arch-specific enabling code. */
ret = __cpu_up(cpu, idle);
+
if (ret != 0)
goto out_notify;
BUG_ON(!cpu_online(cpu));
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ee14e3a..f0acff0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1223,7 +1223,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
spin_unlock_irq(&callback_lock);
/* use trialcs->mems_allowed as a temp variable */
- update_nodemasks_hier(cs, &cs->mems_allowed);
+ update_nodemasks_hier(cs, &trialcs->mems_allowed);
done:
return retval;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e965cfa..e6feb51 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1868,8 +1868,6 @@ event_sched_in(struct perf_event *event,
perf_pmu_disable(event->pmu);
- event->tstamp_running += tstamp - event->tstamp_stopped;
-
perf_set_shadow_time(event, ctx, tstamp);
perf_log_itrace_start(event);
@@ -1881,6 +1879,8 @@ event_sched_in(struct perf_event *event,
goto out;
}
+ event->tstamp_running += tstamp - event->tstamp_stopped;
+
if (!is_software_event(event))
cpuctx->active_oncpu++;
if (!ctx->nr_active++)
@@ -3958,28 +3958,21 @@ static void perf_event_for_each(struct perf_event *event,
perf_event_for_each_child(sibling, func);
}
-static int perf_event_period(struct perf_event *event, u64 __user *arg)
-{
- struct perf_event_context *ctx = event->ctx;
- int ret = 0, active;
+struct period_event {
+ struct perf_event *event;
u64 value;
+};
- if (!is_sampling_event(event))
- return -EINVAL;
-
- if (copy_from_user(&value, arg, sizeof(value)))
- return -EFAULT;
-
- if (!value)
- return -EINVAL;
+static int __perf_event_period(void *info)
+{
+ struct period_event *pe = info;
+ struct perf_event *event = pe->event;
+ struct perf_event_context *ctx = event->ctx;
+ u64 value = pe->value;
+ bool active;
- raw_spin_lock_irq(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
if (event->attr.freq) {
- if (value > sysctl_perf_event_sample_rate) {
- ret = -EINVAL;
- goto unlock;
- }
-
event->attr.sample_freq = value;
} else {
event->attr.sample_period = value;
@@ -3998,11 +3991,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
event->pmu->start(event, PERF_EF_RELOAD);
perf_pmu_enable(ctx->pmu);
}
+ raw_spin_unlock(&ctx->lock);
-unlock:
+ return 0;
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+ struct period_event pe = { .event = event, };
+ struct perf_event_context *ctx = event->ctx;
+ struct task_struct *task;
+ u64 value;
+
+ if (!is_sampling_event(event))
+ return -EINVAL;
+
+ if (copy_from_user(&value, arg, sizeof(value)))
+ return -EFAULT;
+
+ if (!value)
+ return -EINVAL;
+
+ if (event->attr.freq && value > sysctl_perf_event_sample_rate)
+ return -EINVAL;
+
+ task = ctx->task;
+ pe.value = value;
+
+ if (!task) {
+ cpu_function_call(event->cpu, __perf_event_period, &pe);
+ return 0;
+ }
+
+retry:
+ if (!task_function_call(task, __perf_event_period, &pe))
+ return 0;
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active) {
+ raw_spin_unlock_irq(&ctx->lock);
+ task = ctx->task;
+ goto retry;
+ }
+
+ __perf_event_period(&pe);
raw_spin_unlock_irq(&ctx->lock);
- return ret;
+ return 0;
}
static const struct file_operations perf_fops;
@@ -4358,14 +4393,6 @@ static void ring_buffer_wakeup(struct perf_event *event)
rcu_read_unlock();
}
-static void rb_free_rcu(struct rcu_head *rcu_head)
-{
- struct ring_buffer *rb;
-
- rb = container_of(rcu_head, struct ring_buffer, rcu_head);
- rb_free(rb);
-}
-
struct ring_buffer *ring_buffer_get(struct perf_event *event)
{
struct ring_buffer *rb;
@@ -4748,12 +4775,20 @@ static const struct file_operations perf_fops = {
* to user-space before waking everybody up.
*/
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+ /* only the parent has fasync state */
+ if (event->parent)
+ event = event->parent;
+ return &event->fasync;
+}
+
void perf_event_wakeup(struct perf_event *event)
{
ring_buffer_wakeup(event);
if (event->pending_kill) {
- kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+ kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
event->pending_kill = 0;
}
}
@@ -6132,7 +6167,7 @@ static int __perf_event_overflow(struct perf_event *event,
else
perf_event_output(event, data, regs);
- if (event->fasync && event->pending_kill) {
+ if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
irq_work_queue(&event->pending);
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2deb24c..2bbad9c 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,6 +11,7 @@
struct ring_buffer {
atomic_t refcount;
struct rcu_head rcu_head;
+ struct irq_work irq_work;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
int page_order; /* allocation order */
@@ -55,6 +56,15 @@ struct ring_buffer {
};
extern void rb_free(struct ring_buffer *rb);
+
+static inline void rb_free_rcu(struct rcu_head *rcu_head)
+{
+ struct ring_buffer *rb;
+
+ rb = container_of(rcu_head, struct ring_buffer, rcu_head);
+ rb_free(rb);
+}
+
extern struct ring_buffer *
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 9647282..c8aa3f7 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle)
rcu_read_unlock();
}
+static void rb_irq_work(struct irq_work *work);
+
static void
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
{
@@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
+ init_irq_work(&rb->irq_work, rb_irq_work);
+}
+
+static void ring_buffer_put_async(struct ring_buffer *rb)
+{
+ if (!atomic_dec_and_test(&rb->refcount))
+ return;
+
+ rb->rcu_head.next = (void *)rb;
+ irq_work_queue(&rb->irq_work);
}
/*
@@ -319,7 +331,7 @@ err_put:
rb_free_aux(rb);
err:
- ring_buffer_put(rb);
+ ring_buffer_put_async(rb);
handle->event = NULL;
return NULL;
@@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
local_set(&rb->aux_nest, 0);
rb_free_aux(rb);
- ring_buffer_put(rb);
+ ring_buffer_put_async(rb);
}
/*
@@ -547,17 +559,30 @@ static void __rb_free_aux(struct ring_buffer *rb)
rb->aux_priv = NULL;
}
- for (pg = 0; pg < rb->aux_nr_pages; pg++)
- rb_free_aux_page(rb, pg);
+ if (rb->aux_nr_pages) {
+ for (pg = 0; pg < rb->aux_nr_pages; pg++)
+ rb_free_aux_page(rb, pg);
- kfree(rb->aux_pages);
- rb->aux_nr_pages = 0;
+ kfree(rb->aux_pages);
+ rb->aux_nr_pages = 0;
+ }
}
void rb_free_aux(struct ring_buffer *rb)
{
if (atomic_dec_and_test(&rb->aux_refcount))
+ irq_work_queue(&rb->irq_work);
+}
+
+static void rb_irq_work(struct irq_work *work)
+{
+ struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
+
+ if (!atomic_read(&rb->aux_refcount))
__rb_free_aux(rb);
+
+ if (rb->rcu_head.next == (void *)rb)
+ call_rcu(&rb->rcu_head, rb_free_rcu);
}
#ifndef CONFIG_PERF_USE_VMALLOC
diff --git a/kernel/fork.c b/kernel/fork.c
index 1bfefc6..dbd9b8d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -287,6 +287,11 @@ static void set_max_threads(unsigned int max_threads_suggested)
max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
}
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+/* Initialized by the architecture: */
+int arch_task_struct_size __read_mostly;
+#endif
+
void __init fork_init(void)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
@@ -295,7 +300,7 @@ void __init fork_init(void)
#endif
/* create a slab on which task_structs can be allocated */
task_struct_cachep =
- kmem_cache_create("task_struct", sizeof(struct task_struct),
+ kmem_cache_create("task_struct", arch_task_struct_size,
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 27f4332..ae21682 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -985,6 +985,23 @@ int irq_chip_set_affinity_parent(struct irq_data *data,
}
/**
+ * irq_chip_set_type_parent - Set IRQ type on the parent interrupt
+ * @data: Pointer to interrupt specific data
+ * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
+ *
+ * Conditional, as the underlying parent chip might not implement it.
+ */
+int irq_chip_set_type_parent(struct irq_data *data, unsigned int type)
+{
+ data = data->parent_data;
+
+ if (data->chip->irq_set_type)
+ return data->chip->irq_set_type(data, type);
+
+ return -ENOSYS;
+}
+
+/**
* irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
* @data: Pointer to interrupt specific data
*
@@ -997,7 +1014,7 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data)
if (data->chip && data->chip->irq_retrigger)
return data->chip->irq_retrigger(data);
- return -ENOSYS;
+ return 0;
}
/**
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4834ee8..61008b8 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -76,12 +76,8 @@ extern void unmask_threaded_irq(struct irq_desc *desc);
#ifdef CONFIG_SPARSE_IRQ
static inline void irq_mark_irq(unsigned int irq) { }
-extern void irq_lock_sparse(void);
-extern void irq_unlock_sparse(void);
#else
extern void irq_mark_irq(unsigned int irq);
-static inline void irq_lock_sparse(void) { }
-static inline void irq_unlock_sparse(void) { }
#endif
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 9065107..7a5237a 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -75,13 +75,21 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
!desc->irq_data.chip->irq_retrigger(&desc->irq_data)) {
#ifdef CONFIG_HARDIRQS_SW_RESEND
/*
- * If the interrupt has a parent irq and runs
- * in the thread context of the parent irq,
- * retrigger the parent.
+ * If the interrupt is running in the thread
+ * context of the parent irq we need to be
+ * careful, because we cannot trigger it
+ * directly.
*/
- if (desc->parent_irq &&
- irq_settings_is_nested_thread(desc))
+ if (irq_settings_is_nested_thread(desc)) {
+ /*
+ * If the parent_irq is valid, we
+ * retrigger the parent, otherwise we
+ * do nothing.
+ */
+ if (!desc->parent_irq)
+ return;
irq = desc->parent_irq;
+ }
/* Set it pending and activate the softirq: */
set_bit(irq, irqs_resend);
tasklet_schedule(&resend_tasklet);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 10e489c..fdea0be 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -97,6 +97,7 @@ bool kthread_should_park(void)
{
return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
}
+EXPORT_SYMBOL_GPL(kthread_should_park);
/**
* kthread_freezable_should_stop - should this freezable kthread return now?
@@ -171,6 +172,7 @@ void kthread_parkme(void)
{
__kthread_parkme(to_kthread(current));
}
+EXPORT_SYMBOL_GPL(kthread_parkme);
static int kthread(void *_create)
{
@@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k)
if (kthread)
__kthread_unpark(k, kthread);
}
+EXPORT_SYMBOL_GPL(kthread_unpark);
/**
* kthread_park - park a thread created by kthread_create().
@@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k)
}
return ret;
}
+EXPORT_SYMBOL_GPL(kthread_park);
/**
* kthread_stop - stop a thread created by kthread_create().
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 04ab181..df19ae4 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -4,6 +4,7 @@
#include <linux/hash.h>
#include <linux/bootmem.h>
+#include <linux/debug_locks.h>
/*
* Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
@@ -286,15 +287,23 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
struct pv_node *node;
+ u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
+ if (likely(lockval == _Q_LOCKED_VAL))
return;
+ if (unlikely(lockval != _Q_SLOW_VAL)) {
+ if (debug_locks_silent)
+ return;
+ WARN(1, "pvqspinlock: lock %p has corrupted value 0x%x!\n", lock, atomic_read(&lock->val));
+ return;
+ }
+
/*
* Since the above failed to release, this must be the SLOW path.
* Therefore start by looking up the blocked node and unhashing it.
diff --git a/kernel/module.c b/kernel/module.c
index 3e0e197..b86b7bf 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -602,13 +602,16 @@ const struct kernel_symbol *find_symbol(const char *name,
}
EXPORT_SYMBOL_GPL(find_symbol);
-/* Search for module by name: must hold module_mutex. */
+/*
+ * Search for module by name: must hold module_mutex (or preempt disabled
+ * for read-only access).
+ */
static struct module *find_module_all(const char *name, size_t len,
bool even_unformed)
{
struct module *mod;
- module_assert_mutex();
+ module_assert_mutex_or_preempt();
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
@@ -621,6 +624,7 @@ static struct module *find_module_all(const char *name, size_t len,
struct module *find_module(const char *name)
{
+ module_assert_mutex();
return find_module_all(name, strlen(name), false);
}
EXPORT_SYMBOL_GPL(find_module);
@@ -3557,6 +3561,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
+ mod_tree_remove(mod);
wake_up_all(&module_wq);
/* Wait for RCU-sched synchronizing before releasing mod->list. */
synchronize_sched();
diff --git a/kernel/resource.c b/kernel/resource.c
index 90552aa..fed052a 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -504,13 +504,13 @@ int region_is_ram(resource_size_t start, unsigned long size)
{
struct resource *p;
resource_size_t end = start + size - 1;
- int flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
const char *name = "System RAM";
int ret = -1;
read_lock(&resource_lock);
for (p = iomem_resource.child; p ; p = p->sibling) {
- if (end < p->start)
+ if (p->end < start)
continue;
if (p->start <= start && end <= p->end) {
@@ -521,7 +521,7 @@ int region_is_ram(resource_size_t start, unsigned long size)
ret = 1;
break;
}
- if (p->end < start)
+ if (end < p->start)
break; /* not found */
}
read_unlock(&resource_lock);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 65c8f3e..d113c3b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3683,7 +3683,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->throttled = 1;
cfs_rq->throttled_clock = rq_clock(rq);
raw_spin_lock(&cfs_b->lock);
- empty = list_empty(&cfs_rq->throttled_list);
+ empty = list_empty(&cfs_b->throttled_cfs_rq);
/*
* Add to the _head_ of the list, so that an already-started
diff --git a/kernel/signal.c b/kernel/signal.c
index 836df8d..0f6bbbe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2748,12 +2748,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
- if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
+ if (from->si_signo == SIGBUS &&
+ (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
#endif
#ifdef SEGV_BNDERR
- err |= __put_user(from->si_lower, &to->si_lower);
- err |= __put_user(from->si_upper, &to->si_upper);
+ if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
+ err |= __put_user(from->si_lower, &to->si_lower);
+ err |= __put_user(from->si_upper, &to->si_upper);
+ }
#endif
break;
case __SI_CHLD:
@@ -3017,7 +3020,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
int, sig,
struct compat_siginfo __user *, uinfo)
{
- siginfo_t info;
+ siginfo_t info = {};
int ret = copy_siginfo_from_user32(&info, uinfo);
if (unlikely(ret))
return ret;
@@ -3061,7 +3064,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
int, sig,
struct compat_siginfo __user *, uinfo)
{
- siginfo_t info;
+ siginfo_t info = {};
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 08ccc3d..50eb107 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -120,19 +120,25 @@ static int __clockevents_switch_state(struct clock_event_device *dev,
/* The clockevent device is getting replaced. Shut it down. */
case CLOCK_EVT_STATE_SHUTDOWN:
- return dev->set_state_shutdown(dev);
+ if (dev->set_state_shutdown)
+ return dev->set_state_shutdown(dev);
+ return 0;
case CLOCK_EVT_STATE_PERIODIC:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
return -ENOSYS;
- return dev->set_state_periodic(dev);
+ if (dev->set_state_periodic)
+ return dev->set_state_periodic(dev);
+ return 0;
case CLOCK_EVT_STATE_ONESHOT:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return -ENOSYS;
- return dev->set_state_oneshot(dev);
+ if (dev->set_state_oneshot)
+ return dev->set_state_oneshot(dev);
+ return 0;
case CLOCK_EVT_STATE_ONESHOT_STOPPED:
/* Core internal bug */
@@ -471,18 +477,6 @@ static int clockevents_sanity_check(struct clock_event_device *dev)
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
- /* New state-specific callbacks */
- if (!dev->set_state_shutdown)
- return -EINVAL;
-
- if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
- !dev->set_state_periodic)
- return -EINVAL;
-
- if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
- !dev->set_state_oneshot)
- return -EINVAL;
-
return 0;
}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index d39f32c..f6aae79 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
unsigned long flags;
- int ret;
+ int ret = 0;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
@@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
* If we kept the cpu in the broadcast mask,
* tell the caller to leave the per cpu device
* in shutdown state. The periodic interrupt
- * is delivered by the broadcast device.
+ * is delivered by the broadcast device, if
+ * the broadcast device exists and is not
+ * hrtimer based.
*/
- ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
+ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
+ ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
break;
default:
- /* Nothing to do */
- ret = 0;
break;
}
}
@@ -265,8 +266,22 @@ static bool tick_do_broadcast(struct cpumask *mask)
* Check, if the current cpu is in the mask
*/
if (cpumask_test_cpu(cpu, mask)) {
+ struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
cpumask_clear_cpu(cpu, mask);
- local = true;
+ /*
+ * We only run the local handler, if the broadcast
+ * device is not hrtimer based. Otherwise we run into
+ * a hrtimer recursion.
+ *
+ * local timer_interrupt()
+ * local_handler()
+ * expire_hrtimers()
+ * bc_handler()
+ * local_handler()
+ * expire_hrtimers()
+ */
+ local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
}
if (!cpumask_empty(mask)) {
@@ -301,6 +316,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
bool bc_local;
raw_spin_lock(&tick_broadcast_lock);
+
+ /* Handle spurious interrupts gracefully */
+ if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
+ raw_spin_unlock(&tick_broadcast_lock);
+ return;
+ }
+
bc_local = tick_do_periodic_broadcast();
if (clockevent_state_oneshot(dev)) {
@@ -359,8 +381,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
case TICK_BROADCAST_ON:
cpumask_set_cpu(cpu, tick_broadcast_on);
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
- if (tick_broadcast_device.mode ==
- TICKDEV_MODE_PERIODIC)
+ /*
+ * Only shutdown the cpu local device, if:
+ *
+ * - the broadcast device exists
+ * - the broadcast device is not a hrtimer based one
+ * - the broadcast device is in periodic mode to
+ * avoid a hickup during switch to oneshot mode
+ */
+ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
+ tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
clockevents_shutdown(dev);
}
break;
@@ -379,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
break;
}
- if (cpumask_empty(tick_broadcast_mask)) {
- if (!bc_stopped)
- clockevents_shutdown(bc);
- } else if (bc_stopped) {
- if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
- tick_broadcast_start_periodic(bc);
- else
- tick_broadcast_setup_oneshot(bc);
+ if (bc) {
+ if (cpumask_empty(tick_broadcast_mask)) {
+ if (!bc_stopped)
+ clockevents_shutdown(bc);
+ } else if (bc_stopped) {
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+ tick_broadcast_start_periodic(bc);
+ else
+ tick_broadcast_setup_oneshot(bc);
+ }
}
raw_spin_unlock(&tick_broadcast_lock);
}
@@ -662,71 +694,82 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
}
-/**
- * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
- * @state: The target state (enter/exit)
- *
- * The system enters/leaves a state, where affected devices might stop
- * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
- *
- * Called with interrupts disabled, so clockevents_lock is not
- * required here because the local clock event device cannot go away
- * under us.
- */
-int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
{
struct clock_event_device *bc, *dev;
- struct tick_device *td;
int cpu, ret = 0;
ktime_t now;
/*
- * Periodic mode does not care about the enter/exit of power
- * states
+ * If there is no broadcast device, tell the caller not to go
+ * into deep idle.
*/
- if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
- return 0;
+ if (!tick_broadcast_device.evtdev)
+ return -EBUSY;
- /*
- * We are called with preemtion disabled from the depth of the
- * idle code, so we can't be moved away.
- */
- td = this_cpu_ptr(&tick_cpu_device);
- dev = td->evtdev;
-
- if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
- return 0;
+ dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
raw_spin_lock(&tick_broadcast_lock);
bc = tick_broadcast_device.evtdev;
cpu = smp_processor_id();
if (state == TICK_BROADCAST_ENTER) {
+ /*
+ * If the current CPU owns the hrtimer broadcast
+ * mechanism, it cannot go deep idle and we do not add
+ * the CPU to the broadcast mask. We don't have to go
+ * through the EXIT path as the local timer is not
+ * shutdown.
+ */
+ ret = broadcast_needs_cpu(bc, cpu);
+ if (ret)
+ goto out;
+
+ /*
+ * If the broadcast device is in periodic mode, we
+ * return.
+ */
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
+ /* If it is a hrtimer based broadcast, return busy */
+ if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
+ ret = -EBUSY;
+ goto out;
+ }
+
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
+
+ /* Conditionally shut down the local timer. */
broadcast_shutdown_local(bc, dev);
+
/*
* We only reprogram the broadcast timer if we
* did not mark ourself in the force mask and
* if the cpu local event is earlier than the
* broadcast event. If the current CPU is in
* the force mask, then we are going to be
- * woken by the IPI right away.
+ * woken by the IPI right away; we return
+ * busy, so the CPU does not try to go deep
+ * idle.
*/
- if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
- dev->next_event.tv64 < bc->next_event.tv64)
+ if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
+ ret = -EBUSY;
+ } else if (dev->next_event.tv64 < bc->next_event.tv64) {
tick_broadcast_set_event(bc, cpu, dev->next_event);
+ /*
+ * In case of hrtimer broadcasts the
+ * programming might have moved the
+ * timer to this cpu. If yes, remove
+ * us from the broadcast mask and
+ * return busy.
+ */
+ ret = broadcast_needs_cpu(bc, cpu);
+ if (ret) {
+ cpumask_clear_cpu(cpu,
+ tick_broadcast_oneshot_mask);
+ }
+ }
}
- /*
- * If the current CPU owns the hrtimer broadcast
- * mechanism, it cannot go deep idle and we remove the
- * CPU from the broadcast mask. We don't have to go
- * through the EXIT path as the local timer is not
- * shutdown.
- */
- ret = broadcast_needs_cpu(bc, cpu);
- if (ret)
- cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
} else {
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
@@ -796,7 +839,6 @@ out:
raw_spin_unlock(&tick_broadcast_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
/*
* Reset the one shot broadcast for a cpu
@@ -938,6 +980,16 @@ bool tick_broadcast_oneshot_available(void)
return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
}
+#else
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+ struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
+ if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
+ return -EBUSY;
+
+ return 0;
+}
#endif
void __init tick_broadcast_init(void)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 76446cb..f8bf475 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -343,6 +343,28 @@ out_bc:
tick_install_broadcast_device(newdev);
}
+/**
+ * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
+ * @state: The target state (enter/exit)
+ *
+ * The system enters/leaves a state, where affected devices might stop
+ * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
+ */
+int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+
+ if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP))
+ return 0;
+
+ return __tick_broadcast_oneshot_control(state);
+}
+EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
+
#ifdef CONFIG_HOTPLUG_CPU
/*
* Transfer the do_timer job away from a dying cpu.
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 42fdf49..a4a8d4e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -71,4 +71,14 @@ extern void tick_cancel_sched_timer(int cpu);
static inline void tick_cancel_sched_timer(int cpu) { }
#endif
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state);
+#else
+static inline int
+__tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+ return -EBUSY;
+}
+#endif
+
#endif
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 5e097fa..84190f0 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -807,8 +807,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
spin_unlock(&base->lock);
base = new_base;
spin_lock(&base->lock);
- timer->flags &= ~TIMER_BASEMASK;
- timer->flags |= base->cpu;
+ WRITE_ONCE(timer->flags,
+ (timer->flags & ~TIMER_BASEMASK) | base->cpu);
}
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 02bece4..eb11011 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -98,6 +98,13 @@ struct ftrace_pid {
struct pid *pid;
};
+static bool ftrace_pids_enabled(void)
+{
+ return !list_empty(&ftrace_pids);
+}
+
+static void ftrace_update_trampoline(struct ftrace_ops *ops);
+
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock);
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
-ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
static struct ftrace_ops global_ops;
static struct ftrace_ops control_ops;
@@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
if (!test_tsk_trace_trace(current))
return;
- ftrace_pid_function(ip, parent_ip, op, regs);
-}
-
-static void set_ftrace_pid_function(ftrace_func_t func)
-{
- /* do not set ftrace_pid_function to itself! */
- if (func != ftrace_pid_func)
- ftrace_pid_function = func;
+ op->saved_func(ip, parent_ip, op, regs);
}
/**
@@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func)
void clear_ftrace_function(void)
{
ftrace_trace_function = ftrace_stub;
- ftrace_pid_function = ftrace_stub;
}
static void control_ops_disable_all(struct ftrace_ops *ops)
@@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
} else
add_ftrace_ops(&ftrace_ops_list, ops);
+ /* Always save the function, and reset at unregistering */
+ ops->saved_func = ops->func;
+
+ if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled())
+ ops->func = ftrace_pid_func;
+
ftrace_update_trampoline(ops);
if (ftrace_enabled)
@@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_enabled)
update_ftrace_function();
+ ops->func = ops->saved_func;
+
return 0;
}
static void ftrace_update_pid_func(void)
{
+ bool enabled = ftrace_pids_enabled();
+ struct ftrace_ops *op;
+
/* Only do something if we are tracing something */
if (ftrace_trace_function == ftrace_stub)
return;
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ if (op->flags & FTRACE_OPS_FL_PID) {
+ op->func = enabled ? ftrace_pid_func :
+ op->saved_func;
+ ftrace_update_trampoline(op);
+ }
+ } while_for_each_ftrace_op(op);
+
update_ftrace_function();
}
@@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = {
.local_hash.filter_hash = EMPTY_HASH,
INIT_OPS_HASH(global_ops)
.flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED,
+ FTRACE_OPS_FL_INITIALIZED |
+ FTRACE_OPS_FL_PID,
};
/*
@@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops)
static struct ftrace_ops global_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+ .flags = FTRACE_OPS_FL_RECURSION_SAFE |
+ FTRACE_OPS_FL_INITIALIZED |
+ FTRACE_OPS_FL_PID,
};
static int __init ftrace_nodyn_init(void)
@@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
if (WARN_ON(tr->ops->func != ftrace_stub))
printk("ftrace ops had %pS for function\n",
tr->ops->func);
- /* Only the top level instance does pid tracing */
- if (!list_empty(&ftrace_pids)) {
- set_ftrace_pid_function(func);
- func = ftrace_pid_func;
- }
}
tr->ops->func = func;
tr->ops->private = tr;
@@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&ftrace_lock);
- if (list_empty(&ftrace_pids) && (!*pos))
+ if (!ftrace_pids_enabled() && (!*pos))
return (void *) 1;
return seq_list_start(&ftrace_pids, *pos);
@@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = {
.func = ftrace_stub,
.flags = FTRACE_OPS_FL_RECURSION_SAFE |
FTRACE_OPS_FL_INITIALIZED |
+ FTRACE_OPS_FL_PID |
FTRACE_OPS_FL_STUB,
#ifdef FTRACE_GRAPH_TRAMP_ADDR
.trampoline = FTRACE_GRAPH_TRAMP_ADDR,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f060716..74bde81 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -444,6 +444,7 @@ enum {
TRACE_CONTROL_BIT,
+ TRACE_BRANCH_BIT,
/*
* Abuse of the trace_recursion.
* As we need a way to maintain state if we are tracing the function
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index a87b43f..e2e12ad 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
struct trace_branch *entry;
struct ring_buffer *buffer;
unsigned long flags;
- int cpu, pc;
+ int pc;
const char *p;
+ if (current->trace_recursion & TRACE_BRANCH_BIT)
+ return;
+
/*
* I would love to save just the ftrace_likely_data pointer, but
* this code can also be used by modules. Ugly things can happen
@@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
if (unlikely(!tr))
return;
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- data = per_cpu_ptr(tr->trace_buffer.data, cpu);
- if (atomic_inc_return(&data->disabled) != 1)
+ raw_local_irq_save(flags);
+ current->trace_recursion |= TRACE_BRANCH_BIT;
+ data = this_cpu_ptr(tr->trace_buffer.data);
+ if (atomic_read(&data->disabled))
goto out;
pc = preempt_count();
@@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
__buffer_unlock_commit(buffer, event);
out:
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ current->trace_recursion &= ~TRACE_BRANCH_BIT;
+ raw_local_irq_restore(flags);
}
static inline