diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 4 | ||||
-rw-r--r-- | kernel/hrtimer.c | 22 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 7 | ||||
-rw-r--r-- | kernel/irq/manage.c | 17 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 11 | ||||
-rw-r--r-- | kernel/sched/fair.c | 16 | ||||
-rw-r--r-- | kernel/sched/rt.c | 7 | ||||
-rw-r--r-- | kernel/sched/sched.h | 9 | ||||
-rw-r--r-- | kernel/seccomp.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 5 | ||||
-rw-r--r-- | kernel/timer.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 27 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 6 |
16 files changed, 93 insertions, 53 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 7c28936..47845c5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) if ((task_active_pid_ns(current) != &init_pid_ns)) return -EPERM; - if (!capable(CAP_AUDIT_CONTROL)) + if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: - if (!capable(CAP_AUDIT_WRITE)) + if (!netlink_capable(skb, CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092c..6b715c0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -234,6 +234,11 @@ again: goto again; } timer->base = new_base; + } else { + if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { + cpu = this_cpu; + goto again; + } } return new_base; } @@ -569,6 +574,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next.tv64 = expires_next.tv64; + /* + * If a hang was detected in the last timer interrupt then we + * leave the hang delay active in the hardware. We want the + * system to make progress. That also prevents the following + * scenario: + * T1 expires 50ms from now + * T2 expires 5s from now + * + * T1 is removed, so this code is called and would reprogram + * the hardware to 5s from now. Any hrtimer_start after that + * will not reprogram the hardware due to hang_detected being + * set. So we'd effectivly block all timers until the T2 event + * fires. + */ + if (cpu_base->hang_detected) + return; + if (cpu_base->expires_next.tv64 != KTIME_MAX) tick_program_event(cpu_base->expires_next, 1); } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617..bb07f29 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, if (from > irq) return -EINVAL; from = irq; + } else { + /* + * For interrupts which are freely allocated the + * architecture can force a lower bound to the @from + * argument. x86 uses this to exclude the GSI space. + */ + from = arch_dynirq_lower_bound(from); } mutex_lock(&sparse_irq_lock); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2486a4c..d34131c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -180,7 +180,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; - ret = chip->irq_set_affinity(data, mask, false); + ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: cpumask_copy(data->affinity, mask); @@ -192,7 +192,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) +int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) { struct irq_chip *chip = irq_data_get_irq_chip(data); struct irq_desc *desc = irq_data_to_desc(data); @@ -202,7 +203,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return -EINVAL; if (irq_can_move_pcntxt(data)) { - ret = irq_do_set_affinity(data, mask, false); + ret = irq_do_set_affinity(data, mask, force); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -217,13 +218,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return ret; } -/** - * irq_set_affinity - Set the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @mask: cpumask - * - */ -int irq_set_affinity(unsigned int irq, const struct cpumask *mask) +int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -233,7 +228,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) return -EINVAL; raw_spin_lock_irqsave(&desc->lock, flags); - ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); + ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } diff --git a/kernel/module.c b/kernel/module.c index 1186940..079c461 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - if (!(flags & O_NONBLOCK)) - pr_warn("waiting module removal not supported: please upgrade\n"); - if (mutex_lock_interruptible(&module_mutex) != 0) return -EINTR; @@ -3271,6 +3268,9 @@ static int load_module(struct load_info *info, const char __user *uargs, dynamic_debug_setup(info->debug, info->num_debug); + /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ + ftrace_module_init(mod); + /* Finally it's fully formed, ready to start executing. */ err = complete_formation(mod, info); if (err) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c3ad9ca..8233cd4 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/console.h> #include <linux/cpu.h> +#include <linux/cpuidle.h> #include <linux/syscalls.h> #include <linux/gfp.h> #include <linux/io.h> @@ -53,7 +54,9 @@ static void freeze_begin(void) static void freeze_enter(void) { + cpuidle_resume(); wait_event(suspend_freeze_wait_head, suspend_freeze_wake); + cpuidle_pause(); } void freeze_wake(void) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 27ef409..b080957 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1021,8 +1021,17 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) dl_rq = &rq->dl; - if (need_pull_dl_task(rq, prev)) + if (need_pull_dl_task(rq, prev)) { pull_dl_task(rq); + /* + * pull_rt_task() can drop (and re-acquire) rq->lock; this + * means a stop task can slip in, in which case we need to + * re-start task selection. + */ + if (rq->stop && rq->stop->on_rq) + return RETRY_TASK; + } + /* * When prev is DL, we may throttle it in put_prev_task(). * So, we update time before we check for dl_nr_running. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e9bd0b..7570dd9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1497,7 +1497,7 @@ static void task_numa_placement(struct task_struct *p) /* If the task is part of a group prevent parallel updates to group stats */ if (p->numa_group) { group_lock = &p->numa_group->lock; - spin_lock(group_lock); + spin_lock_irq(group_lock); } /* Find the node with the highest number of faults */ @@ -1572,7 +1572,7 @@ static void task_numa_placement(struct task_struct *p) } } - spin_unlock(group_lock); + spin_unlock_irq(group_lock); } /* Preferred node as the node with the most faults */ @@ -1677,7 +1677,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, if (!join) return; - double_lock(&my_grp->lock, &grp->lock); + BUG_ON(irqs_disabled()); + double_lock_irq(&my_grp->lock, &grp->lock); for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { my_grp->faults[i] -= p->numa_faults_memory[i]; @@ -1691,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, grp->nr_tasks++; spin_unlock(&my_grp->lock); - spin_unlock(&grp->lock); + spin_unlock_irq(&grp->lock); rcu_assign_pointer(p->numa_group, grp); @@ -1710,14 +1711,14 @@ void task_numa_free(struct task_struct *p) void *numa_faults = p->numa_faults_memory; if (grp) { - spin_lock(&grp->lock); + spin_lock_irq(&grp->lock); for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) grp->faults[i] -= p->numa_faults_memory[i]; grp->total_faults -= p->total_numa_faults; list_del(&p->numa_entry); grp->nr_tasks--; - spin_unlock(&grp->lock); + spin_unlock_irq(&grp->lock); rcu_assign_pointer(p->numa_group, NULL); put_numa_group(grp); } @@ -6727,7 +6728,8 @@ static int idle_balance(struct rq *this_rq) out: /* Is there a task of a high priority class? */ if (this_rq->nr_running != this_rq->cfs.h_nr_running && - (this_rq->dl.dl_nr_running || + ((this_rq->stop && this_rq->stop->on_rq) || + this_rq->dl.dl_nr_running || (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) pulled_task = -1; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d8cdf16..bd2267a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1362,10 +1362,11 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) pull_rt_task(rq); /* * pull_rt_task() can drop (and re-acquire) rq->lock; this - * means a dl task can slip in, in which case we need to - * re-start task selection. + * means a dl or stop task can slip in, in which case we need + * to re-start task selection. */ - if (unlikely(rq->dl.dl_nr_running)) + if (unlikely((rq->stop && rq->stop->on_rq) || + rq->dl.dl_nr_running)) return RETRY_TASK; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9007f2..456e492 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1385,6 +1385,15 @@ static inline void double_lock(spinlock_t *l1, spinlock_t *l2) spin_lock_nested(l2, SINGLE_DEPTH_NESTING); } +static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2) +{ + if (l1 > l2) + swap(l1, l2); + + spin_lock_irq(l1); + spin_lock_nested(l2, SINGLE_DEPTH_NESTING); +} + static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) { if (l1 > l2) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 590c379..b35c215 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -255,6 +255,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) goto free_prog; /* Allocate a new seccomp_filter */ + ret = -ENOMEM; filter = kzalloc(sizeof(struct seccomp_filter) + sizeof(struct sock_filter_int) * new_len, GFP_KERNEL|__GFP_NOWARN); @@ -264,6 +265,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); if (ret) goto free_filter; + kfree(fp); atomic_set(&filter->usage, 1); filter->len = new_len; diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a..33e4648 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) { return 0; } + +unsigned int __weak arch_dynirq_lower_bound(unsigned int from) +{ + return from; +} diff --git a/kernel/timer.c b/kernel/timer.c index 87bd529..3bb01a3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) bit = find_last_bit(&mask, BITS_PER_LONG); - mask = (1 << bit) - 1; + mask = (1UL << bit) - 1; expires_limit = expires_limit & ~(mask); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b94..4a54a25 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ @@ -4403,10 +4388,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537..4747b47 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) data->ops->func(data); continue; } - filter = rcu_dereference(data->filter); + filter = rcu_dereference_sched(data->filter); if (filter && !filter_match_preds(filter, rec)) continue; if (data->cmd_ops->post_trigger) { diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e90089f..516203e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -138,7 +138,11 @@ static void __touch_watchdog(void) void touch_softlockup_watchdog(void) { - __this_cpu_write(watchdog_touch_ts, 0); + /* + * Preemption can be enabled. It doesn't matter which CPU's timestamp + * gets zeroed here, so use the raw_ operation. + */ + raw_cpu_write(watchdog_touch_ts, 0); } EXPORT_SYMBOL(touch_softlockup_watchdog); |