diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-07-23 09:48:17 (GMT) |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-07-23 09:48:17 (GMT) |
commit | b59f2b4d30c187160df597bae41cabe497b6acf4 (patch) | |
tree | e29ed9e3cbaf777cc1a10c99c4542985002d2c88 /kernel | |
parent | 1e40c2edef2537f87f94d0baf80aeaeb7d51cc23 (diff) | |
parent | 3b2f64d00c46e1e4e9bd0bb9bb12619adac27a4b (diff) | |
download | linux-fsl-qoriq-b59f2b4d30c187160df597bae41cabe497b6acf4.tar.xz |
Merge tag 'v3.11-rc2' into core/locking
Merge in Linux 3.11-rc2 before moving on with new work.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
41 files changed, 856 insertions, 452 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e5583d1..0e0b20b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -802,7 +802,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, unsigned long subsys_mask); @@ -2642,7 +2641,7 @@ static const struct inode_operations cgroup_file_inode_operations = { }; static const struct inode_operations cgroup_dir_inode_operations = { - .lookup = cgroup_lookup, + .lookup = simple_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .rename = cgroup_rename, @@ -2652,14 +2651,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { .removexattr = cgroup_removexattr, }; -static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) -{ - if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); - d_add(dentry, NULL); - return NULL; -} - /* * Check if a file is a control file */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 198a388..b2b227b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -366,7 +366,7 @@ EXPORT_SYMBOL(cpu_down); #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ -static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) +static int _cpu_up(unsigned int cpu, int tasks_frozen) { int ret, nr_calls = 0; void *hcpu = (void *)(long)cpu; @@ -419,7 +419,7 @@ out: return ret; } -int __cpuinit cpu_up(unsigned int cpu) +int cpu_up(unsigned int cpu) { int err = 0; @@ -618,7 +618,7 @@ core_initcall(cpu_hotplug_pm_sync_init); * It must be called by the arch code on the new cpu, before the new cpu * enables interrupts and before the "boot" cpu returns from __cpu_up(). */ -void __cpuinit notify_cpu_starting(unsigned int cpu) +void notify_cpu_starting(unsigned int cpu) { unsigned long val = CPU_STARTING; diff --git a/kernel/events/core.c b/kernel/events/core.c index 1db3af9..f86599e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -182,7 +182,7 @@ void update_perf_cpu_limits(void) u64 tmp = perf_sample_period_ns; tmp *= sysctl_perf_cpu_time_max_percent; - tmp = do_div(tmp, 100); + do_div(tmp, 100); atomic_set(&perf_sample_allowed_ns, tmp); } @@ -232,7 +232,7 @@ DEFINE_PER_CPU(u64, running_sample_length); void perf_sample_event_took(u64 sample_len_ns) { u64 avg_local_sample_len; - u64 local_samples_len = __get_cpu_var(running_sample_length); + u64 local_samples_len; if (atomic_read(&perf_sample_allowed_ns) == 0) return; @@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; - rcu_read_lock(); retry: + /* + * One of the few rules of preemptible RCU is that one cannot do + * rcu_read_unlock() while holding a scheduler (or nested) lock when + * part of the read side critical section was preemptible -- see + * rcu_read_unlock_special(). + * + * Since ctx->lock nests under rq->lock we must ensure the entire read + * side critical section is non-preemptible. + */ + preempt_disable(); + rcu_read_lock(); ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* @@ -964,6 +974,8 @@ retry: raw_spin_lock_irqsave(&ctx->lock, *flags); if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); + rcu_read_unlock(); + preempt_enable(); goto retry; } @@ -973,6 +985,7 @@ retry: } } rcu_read_unlock(); + preempt_enable(); return ctx; } @@ -1950,7 +1963,16 @@ static int __perf_event_enable(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; - if (WARN_ON_ONCE(!ctx->is_active)) + /* + * There's a time window between 'ctx->is_active' check + * in perf_event_enable function and this place having: + * - IRQs on + * - ctx->lock unlocked + * + * where the task could be killed and 'ctx' deactivated + * by perf_event_exit_task. + */ + if (!ctx->is_active) return -EINVAL; raw_spin_lock(&ctx->lock); @@ -6212,8 +6234,6 @@ perf_event_mux_interval_ms_store(struct device *dev, return count; } -#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) - static struct device_attribute pmu_dev_attrs[] = { __ATTR_RO(type), __ATTR_RW(perf_event_mux_interval_ms), @@ -7465,7 +7485,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * child. */ - child_ctx = alloc_perf_context(event->pmu, child); + child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; @@ -7608,7 +7628,7 @@ static void __init perf_event_init_all_cpus(void) } } -static void __cpuinit perf_event_init_cpu(int cpu) +static void perf_event_init_cpu(int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); @@ -7697,7 +7717,7 @@ static struct notifier_block perf_reboot_notifier = { .priority = INT_MIN, }; -static int __cpuinit +static int perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; diff --git a/kernel/fork.c b/kernel/fork.c index 6e6a1c1..403d2bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -365,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; - mm->free_area_cache = oldmm->mmap_base; - mm->cached_hole_size = ~0UL; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -540,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->nr_ptes = 0; memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; mm_init_aio(mm); mm_init_owner(mm, p); @@ -1550,7 +1546,7 @@ static inline void init_idle_pids(struct pid_link *links) } } -struct task_struct * __cpuinit fork_idle(int cpu) +struct task_struct *fork_idle(int cpu) { struct task_struct *task; task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f0f4fe2..383319b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1659,7 +1659,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, /* * Functions related to boot-time initialization: */ -static void __cpuinit init_hrtimers_cpu(int cpu) +static void init_hrtimers_cpu(int cpu) { struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; @@ -1740,7 +1740,7 @@ static void migrate_hrtimers(int scpu) #endif /* CONFIG_HOTPLUG_CPU */ -static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, +static int hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { int scpu = (long)hcpu; @@ -1773,7 +1773,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata hrtimers_nb = { +static struct notifier_block hrtimers_nb = { .notifier_call = hrtimer_cpu_notify, }; diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 10e663a..452d6f2 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -275,7 +275,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, if (d->gc) return -EBUSY; - numchips = d->revmap_size / irqs_per_chip; + numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); if (!numchips) return -EINVAL; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 2d7cd34..706724e 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -475,18 +475,6 @@ unsigned int irq_create_of_mapping(struct device_node *controller, domain = controller ? irq_find_host(controller) : irq_default_domain; if (!domain) { -#ifdef CONFIG_MIPS - /* - * Workaround to avoid breaking interrupt controller drivers - * that don't yet register an irq_domain. This is temporary - * code. ~~~gcl, Feb 24, 2012 - * - * Scheduled for removal in Linux v3.6. That should be enough - * time. - */ - if (intsize > 0) - return intspec[0]; -#endif pr_warn("no irq domain found for %s !\n", of_node_full_name(controller)); return 0; diff --git a/kernel/module.c b/kernel/module.c index cab4bce..2069158 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -455,7 +455,7 @@ const struct kernel_symbol *find_symbol(const char *name, EXPORT_SYMBOL_GPL(find_symbol); /* Search for module by name: must hold module_mutex. */ -static struct module *find_module_all(const char *name, +static struct module *find_module_all(const char *name, size_t len, bool even_unformed) { struct module *mod; @@ -463,7 +463,7 @@ static struct module *find_module_all(const char *name, list_for_each_entry(mod, &modules, list) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) continue; - if (strcmp(mod->name, name) == 0) + if (strlen(mod->name) == len && !memcmp(mod->name, name, len)) return mod; } return NULL; @@ -471,7 +471,7 @@ static struct module *find_module_all(const char *name, struct module *find_module(const char *name) { - return find_module_all(name, false); + return find_module_all(name, strlen(name), false); } EXPORT_SYMBOL_GPL(find_module); @@ -482,23 +482,28 @@ static inline void __percpu *mod_percpu(struct module *mod) return mod->percpu; } -static int percpu_modalloc(struct module *mod, - unsigned long size, unsigned long align) +static int percpu_modalloc(struct module *mod, struct load_info *info) { + Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu]; + unsigned long align = pcpusec->sh_addralign; + + if (!pcpusec->sh_size) + return 0; + if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - mod->percpu = __alloc_reserved_percpu(size, align); + mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align); if (!mod->percpu) { printk(KERN_WARNING "%s: Could not allocate %lu bytes percpu data\n", - mod->name, size); + mod->name, (unsigned long)pcpusec->sh_size); return -ENOMEM; } - mod->percpu_size = size; + mod->percpu_size = pcpusec->sh_size; return 0; } @@ -563,10 +568,12 @@ static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline int percpu_modalloc(struct module *mod, - unsigned long size, unsigned long align) +static int percpu_modalloc(struct module *mod, struct load_info *info) { - return -ENOMEM; + /* UP modules shouldn't have this section: ENOMEM isn't quite right */ + if (info->sechdrs[info->index.pcpu].sh_size != 0) + return -ENOMEM; + return 0; } static inline void percpu_modfree(struct module *mod) { @@ -2927,7 +2934,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) { /* Module within temporary copy. */ struct module *mod; - Elf_Shdr *pcpusec; int err; mod = setup_load_info(info, flags); @@ -2942,17 +2948,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) err = module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod); if (err < 0) - goto out; + return ERR_PTR(err); - pcpusec = &info->sechdrs[info->index.pcpu]; - if (pcpusec->sh_size) { - /* We have a special allocation for this section. */ - err = percpu_modalloc(mod, - pcpusec->sh_size, pcpusec->sh_addralign); - if (err) - goto out; - pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC; - } + /* We will do a special allocation for per-cpu sections later. */ + info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any @@ -2963,17 +2962,12 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) /* Allocate and move to the final place */ err = move_module(mod, info); if (err) - goto free_percpu; + return ERR_PTR(err); /* Module has been copied to its final place now: return it. */ mod = (void *)info->sechdrs[info->index.mod].sh_addr; kmemleak_load_module(mod, info); return mod; - -free_percpu: - percpu_modfree(mod); -out: - return ERR_PTR(err); } /* mod is no longer valid after this! */ @@ -3014,7 +3008,7 @@ static bool finished_loading(const char *name) bool ret; mutex_lock(&module_mutex); - mod = find_module_all(name, true); + mod = find_module_all(name, strlen(name), true); ret = !mod || mod->state == MODULE_STATE_LIVE || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); @@ -3152,7 +3146,8 @@ static int add_unformed_module(struct module *mod) again: mutex_lock(&module_mutex); - if ((old = find_module_all(mod->name, true)) != NULL) { + old = find_module_all(mod->name, strlen(mod->name), true); + if (old != NULL) { if (old->state == MODULE_STATE_COMING || old->state == MODULE_STATE_UNFORMED) { /* Wait in case it fails to load. */ @@ -3198,6 +3193,17 @@ out: return err; } +static int unknown_module_param_cb(char *param, char *val, const char *modname) +{ + /* Check for magic 'dyndbg' arg */ + int ret = ddebug_dyndbg_module_param_cb(param, val, modname); + if (ret != 0) { + printk(KERN_WARNING "%s: unknown parameter '%s' ignored\n", + modname, param); + } + return 0; +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static int load_module(struct load_info *info, const char __user *uargs, @@ -3237,6 +3243,11 @@ static int load_module(struct load_info *info, const char __user *uargs, } #endif + /* To avoid stressing percpu allocator, do this once we're unique. */ + err = percpu_modalloc(mod, info); + if (err) + goto unlink_mod; + /* Now module is in final location, initialize linked lists, etc. */ err = module_unload_init(mod); if (err) @@ -3284,7 +3295,7 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Module is ready to execute: parsing args may do that. */ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, &ddebug_dyndbg_module_param_cb); + -32768, 32767, unknown_module_param_cb); if (err < 0) goto bug_cleanup; @@ -3563,10 +3574,8 @@ unsigned long module_kallsyms_lookup_name(const char *name) /* Don't lock: we're in enough trouble already. */ preempt_disable(); if ((colon = strchr(name, ':')) != NULL) { - *colon = '\0'; - if ((mod = find_module(name)) != NULL) + if ((mod = find_module_all(name, colon - name, false)) != NULL) ret = mod_find_symname(mod, colon+1); - *colon = ':'; } else { list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) diff --git a/kernel/panic.c b/kernel/panic.c index 9771231..8018646 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -15,6 +15,7 @@ #include <linux/notifier.h> #include <linux/module.h> #include <linux/random.h> +#include <linux/ftrace.h> #include <linux/reboot.h> #include <linux/delay.h> #include <linux/kexec.h> @@ -399,6 +400,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { + disable_trace_on_warning(); + pr_warn("------------[ cut here ]------------\n"); pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n", raw_smp_processor_id(), current->pid, file, line, caller); diff --git a/kernel/params.c b/kernel/params.c index 53b958f..440e65d 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -787,7 +787,7 @@ static void __init kernel_add_sysfs_param(const char *name, } /* - * param_sysfs_builtin - add contents in /sys/parameters for built-in modules + * param_sysfs_builtin - add sysfs parameters for built-in modules * * Add module_parameters to sysfs for "modules" built into the kernel. * diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index c6422ff..9012ecf 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work) mutex_lock(&autosleep_lock); - if (!pm_save_wakeup_count(initial_count)) { + if (!pm_save_wakeup_count(initial_count) || + system_state != SYSTEM_RUNNING) { mutex_unlock(&autosleep_lock); goto out; } diff --git a/kernel/printk.c b/kernel/printk.c index 8212c1a..69b0890 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu) } } logbuf_cpu = UINT_MAX; + raw_spin_unlock(&logbuf_lock); if (wake) up(&console_sem); - raw_spin_unlock(&logbuf_lock); return retval; } @@ -1921,7 +1921,7 @@ void resume_console(void) * called when a new CPU comes online (or fails to come up), and ensures * that any such output gets printed. */ -static int __cpuinit console_cpu_notify(struct notifier_block *self, +static int console_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { switch (action) { diff --git a/kernel/profile.c b/kernel/profile.c index 0bf4007..6631e1e 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -331,7 +331,7 @@ out: put_cpu(); } -static int __cpuinit profile_cpu_callback(struct notifier_block *info, +static int profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index b1fa551..f4871e5 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -1476,7 +1476,7 @@ rcu_torture_shutdown(void *arg) * Execute random CPU-hotplug operations at the interval specified * by the onoff_interval. */ -static int __cpuinit +static int rcu_torture_onoff(void *arg) { int cpu; @@ -1558,7 +1558,7 @@ rcu_torture_onoff(void *arg) return 0; } -static int __cpuinit +static int rcu_torture_onoff_init(void) { int ret; @@ -1601,7 +1601,7 @@ static void rcu_torture_onoff_cleanup(void) * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then * induces a CPU stall for the time specified by stall_cpu. */ -static int __cpuinit rcu_torture_stall(void *args) +static int rcu_torture_stall(void *args) { unsigned long stop_at; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e08abb9..068de3a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2910,7 +2910,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) * can accept some slop in the rsp->completed access due to the fact * that this CPU cannot possibly have any RCU callbacks in flight yet. */ -static void __cpuinit +static void rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) { unsigned long flags; @@ -2962,7 +2962,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) mutex_unlock(&rsp->onoff_mutex); } -static void __cpuinit rcu_prepare_cpu(int cpu) +static void rcu_prepare_cpu(int cpu) { struct rcu_state *rsp; @@ -2974,7 +2974,7 @@ static void __cpuinit rcu_prepare_cpu(int cpu) /* * Handle CPU online/offline notification events. */ -static int __cpuinit rcu_cpu_notify(struct notifier_block *self, +static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4a39d36..b383258 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -521,10 +521,10 @@ static void invoke_rcu_callbacks_kthread(void); static bool rcu_is_callbacks_kthread(void); #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void); -static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, +static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ -static void __cpuinit rcu_prepare_kthreads(int cpu); +static void rcu_prepare_kthreads(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); static void rcu_idle_count_callbacks_posted(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 63098a5..769e12e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1352,7 +1352,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) * already exist. We only create this kthread for preemptible RCU. * Returns zero if all is well, a negated errno otherwise. */ -static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, +static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp) { int rnp_index = rnp - &rsp->node[0]; @@ -1507,7 +1507,7 @@ static int __init rcu_spawn_kthreads(void) } early_initcall(rcu_spawn_kthreads); -static void __cpuinit rcu_prepare_kthreads(int cpu) +static void rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; @@ -1549,7 +1549,7 @@ static int __init rcu_scheduler_really_started(void) } early_initcall(rcu_scheduler_really_started); -static void __cpuinit rcu_prepare_kthreads(int cpu) +static void rcu_prepare_kthreads(int cpu) { } diff --git a/kernel/relay.c b/kernel/relay.c index b91488b..5001c98 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -516,7 +516,7 @@ static void setup_callbacks(struct rchan *chan, * * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) */ -static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, +static int relay_hotcpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9b1f2e5..b7c32cb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -370,13 +370,6 @@ static struct rq *this_rq_lock(void) #ifdef CONFIG_SCHED_HRTICK /* * Use HR-timers to deliver accurate preemption points. - * - * Its all a bit involved since we cannot program an hrt while holding the - * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a - * reschedule event. - * - * When we get rescheduled we reprogram the hrtick_timer outside of the - * rq->lock. */ static void hrtick_clear(struct rq *rq) @@ -404,6 +397,15 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) } #ifdef CONFIG_SMP + +static int __hrtick_restart(struct rq *rq) +{ + struct hrtimer *timer = &rq->hrtick_timer; + ktime_t time = hrtimer_get_softexpires(timer); + + return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0); +} + /* * called from hardirq (IPI) context */ @@ -412,7 +414,7 @@ static void __hrtick_start(void *arg) struct rq *rq = arg; raw_spin_lock(&rq->lock); - hrtimer_restart(&rq->hrtick_timer); + __hrtick_restart(rq); rq->hrtick_csd_pending = 0; raw_spin_unlock(&rq->lock); } @@ -430,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) hrtimer_set_expires(timer, time); if (rq == this_rq()) { - hrtimer_restart(timer); + __hrtick_restart(rq); } else if (!rq->hrtick_csd_pending) { __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); rq->hrtick_csd_pending = 1; @@ -4131,7 +4133,7 @@ void show_state_filter(unsigned long state_filter) debug_show_all_locks(); } -void __cpuinit init_idle_bootup_task(struct task_struct *idle) +void init_idle_bootup_task(struct task_struct *idle) { idle->sched_class = &idle_sched_class; } @@ -4144,7 +4146,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void __cpuinit init_idle(struct task_struct *idle, int cpu) +void init_idle(struct task_struct *idle, int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; @@ -4628,7 +4630,7 @@ static void set_rq_offline(struct rq *rq) * migration_call - callback that gets triggered when a CPU is added. * Here we can start up the necessary migration thread for the new CPU. */ -static int __cpuinit +static int migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) { int cpu = (long)hcpu; @@ -4682,12 +4684,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) * happens before everything else. This has to be lower priority than * the notifier in the perf_event subsystem, though. */ -static struct notifier_block __cpuinitdata migration_notifier = { +static struct notifier_block migration_notifier = { .notifier_call = migration_call, .priority = CPU_PRI_MIGRATION, }; -static int __cpuinit sched_cpu_active(struct notifier_block *nfb, +static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { switch (action & ~CPU_TASKS_FROZEN) { @@ -4700,7 +4702,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb, } } -static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb, +static int sched_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { switch (action & ~CPU_TASKS_FROZEN) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f77f9c5..bb456f4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5506,7 +5506,7 @@ void nohz_balance_enter_idle(int cpu) set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); } -static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, +static int sched_ilb_notifier(struct notifier_block *nfb, unsigned long action, void *hcpu) { switch (action & ~CPU_TASKS_FROZEN) { diff --git a/kernel/smp.c b/kernel/smp.c index 4dba0f7..fe9f773 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -73,7 +73,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { +static struct notifier_block hotplug_cfd_notifier = { .notifier_call = hotplug_cfd, }; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 02fc5c9..eb89e18 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -24,7 +24,7 @@ */ static DEFINE_PER_CPU(struct task_struct *, idle_threads); -struct task_struct * __cpuinit idle_thread_get(unsigned int cpu) +struct task_struct *idle_thread_get(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); diff --git a/kernel/softirq.c b/kernel/softirq.c index ca25e6e..be3d351 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -699,7 +699,7 @@ void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) } EXPORT_SYMBOL(send_remote_softirq); -static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, +static int remote_softirq_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { /* @@ -728,7 +728,7 @@ static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { +static struct notifier_block remote_softirq_cpu_notifier = { .notifier_call = remote_softirq_cpu_notify, }; @@ -830,7 +830,7 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __cpuinit cpu_callback(struct notifier_block *nfb, +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -845,7 +845,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ce13c3..ac09d98 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -599,6 +599,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "traceoff_on_warning", + .data = &__disable_trace_on_warning, + .maxlen = sizeof(__disable_trace_on_warning), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_MODULES { @@ -800,7 +807,7 @@ static struct ctl_table kern_table[] = { #if defined(CONFIG_LOCKUP_DETECTOR) { .procname = "watchdog", - .data = &watchdog_enabled, + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = proc_dowatchdog, @@ -827,7 +834,7 @@ static struct ctl_table kern_table[] = { }, { .procname = "nmi_watchdog", - .data = &watchdog_enabled, + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = proc_dowatchdog, diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index aea4a9e..b609213 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -3,7 +3,6 @@ #include "../fs/xfs/xfs_sysctl.h" #include <linux/sunrpc/debug.h> #include <linux/string.h> -#include <net/ip_vs.h> #include <linux/syscalls.h> #include <linux/namei.h> #include <linux/mount.h> diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 6d3f916..218bcb5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -157,7 +157,10 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); - tick_broadcast_start_periodic(bc); + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); ret = 1; } else { /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0cf1c14..e80183f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -178,6 +178,11 @@ static bool can_stop_full_tick(void) */ if (!sched_clock_stable) { trace_tick_stop(0, "unstable sched clock\n"); + /* + * Don't allow the user to think they can get + * full NO_HZ with this machine. + */ + WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); return false; } #endif @@ -293,7 +298,7 @@ static int __init tick_nohz_full_setup(char *str) } __setup("nohz_full=", tick_nohz_full_setup); -static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, +static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -346,16 +351,6 @@ void __init tick_nohz_init(void) } cpu_notifier(tick_nohz_cpu_down_callback, 0); - - /* Make sure full dynticks CPU are also RCU nocbs */ - for_each_cpu(cpu, nohz_full_mask) { - if (!rcu_is_nocb_cpu(cpu)) { - pr_warning("NO_HZ: CPU %d is not RCU nocb: " - "cleared from nohz_full range", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); - } - } - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); } diff --git a/kernel/timer.c b/kernel/timer.c index 15bc1b4..4296d13 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1505,11 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -static int __cpuinit init_timers_cpu(int cpu) +static int init_timers_cpu(int cpu) { int j; struct tvec_base *base; - static char __cpuinitdata tvec_base_done[NR_CPUS]; + static char tvec_base_done[NR_CPUS]; if (!tvec_base_done[cpu]) { static char boot_done; @@ -1577,7 +1577,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea } } -static void __cpuinit migrate_timers(int cpu) +static void migrate_timers(int cpu) { struct tvec_base *old_base; struct tvec_base *new_base; @@ -1610,7 +1610,7 @@ static void __cpuinit migrate_timers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __cpuinit timer_cpu_notify(struct notifier_block *self, +static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1635,7 +1635,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata timers_nb = { +static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff..67708f4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops) return 0; } +static void ftrace_sync(struct work_struct *work) +{ + /* + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + * + * Yes, function tracing is rude. + */ +} + static int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; @@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) * so there'll be no new users. We must ensure * all current users are done before we free * the control data. + * Note synchronize_sched() is not enough, as we + * use preempt_disable() to do RCU, but the function + * tracer can be called where RCU is not active + * (before user_exit()). */ - synchronize_sched(); + schedule_on_each_cpu(ftrace_sync); control_ops_free(ops); } } else @@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. + * + * Again, normal synchronize_sched() is not good enough. + * We need to do a hard force of sched synchronization. */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - synchronize_sched(); + schedule_on_each_cpu(ftrace_sync); + return 0; } @@ -622,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v) if (rec->counter <= 1) stddev = 0; else { - stddev = rec->time_squared - rec->counter * avg * avg; + /* + * Apply Welford's method: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + */ + stddev = rec->counter * rec->time_squared - + rec->time * rec->time; + /* * Divide only 1000 for ns^2 -> us^2 conversion. * trace_print_graph_duration will divide 1000 again. */ - do_div(stddev, (rec->counter - 1) * 1000); + do_div(stddev, rec->counter * (rec->counter - 1) * 1000); } trace_seq_init(&s); @@ -3512,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; +/* Used by function selftest to not test if filter is set */ +bool ftrace_filter_param __initdata; + static int __init set_ftrace_notrace(char *str) { + ftrace_filter_param = true; strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } @@ -3521,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { + ftrace_filter_param = true; strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be..0cd500b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask; enum ftrace_dump_mode ftrace_dump_on_oops; +/* When set, tracing will stop when a WARN*() is hit */ +int __disable_trace_on_warning; + static int tracing_set_tracer(const char *buf); #define MAX_TRACER_SIZE 100 @@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); +static int __init stop_trace_on_warning(char *str) +{ + __disable_trace_on_warning = 1; + return 1; +} +__setup("traceoff_on_warning=", stop_trace_on_warning); + static int __init boot_alloc_snapshot(char *str) { allocate_snapshot = true; @@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str) } __setup("trace_options=", set_trace_boot_options); + unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; @@ -193,6 +204,37 @@ static struct trace_array global_trace; LIST_HEAD(ftrace_trace_arrays); +int trace_array_get(struct trace_array *this_tr) +{ + struct trace_array *tr; + int ret = -ENODEV; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; + ret = 0; + break; + } + } + mutex_unlock(&trace_types_lock); + + return ret; +} + +static void __trace_array_put(struct trace_array *this_tr) +{ + WARN_ON(!this_tr->ref); + this_tr->ref--; +} + +void trace_array_put(struct trace_array *this_tr) +{ + mutex_lock(&trace_types_lock); + __trace_array_put(this_tr); + mutex_unlock(&trace_types_lock); +} + int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) @@ -215,9 +257,24 @@ cycle_t ftrace_now(int cpu) return ts; } +/** + * tracing_is_enabled - Show if global_trace has been disabled + * + * Shows if the global trace has been enabled or not. It uses the + * mirror flag "buffer_disabled" to be used in fast paths such as for + * the irqsoff tracer. But it may be inaccurate due to races. If you + * need to know the accurate state, use tracing_is_on() which is a little + * slower, but accurate. + */ int tracing_is_enabled(void) { - return tracing_is_on(); + /* + * For quick access (irqsoff uses this in fast path), just + * return the mirror variable of the state of the ring buffer. + * It's a little racy, but we don't really care. + */ + smp_rmb(); + return !global_trace.buffer_disabled; } /* @@ -240,7 +297,7 @@ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ -static DEFINE_MUTEX(trace_types_lock); +DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer @@ -330,6 +387,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; +static void tracer_tracing_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_on(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 0; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_on - enable tracing buffers * @@ -338,15 +412,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void tracing_on(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_on(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 0; + tracer_tracing_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_on); @@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ +static void tracer_tracing_off(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_off(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 1; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_off - turn off tracing buffers * @@ -550,26 +633,35 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); */ void tracing_off(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_off(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 1; + tracer_tracing_off(&global_trace); } EXPORT_SYMBOL_GPL(tracing_off); +void disable_trace_on_warning(void) +{ + if (__disable_trace_on_warning) + tracing_off(); +} + +/** + * tracer_tracing_is_on - show real state of ring buffer enabled + * @tr : the trace array to know if ring buffer is enabled + * + * Shows real state of the ring buffer if it is enabled or not. + */ +static int tracer_tracing_is_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + return ring_buffer_record_is_on(tr->trace_buffer.buffer); + return !tr->buffer_disabled; +} + /** * tracing_is_on - show state of ring buffers enabled */ int tracing_is_on(void) { - if (global_trace.trace_buffer.buffer) - return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); - return !global_trace.buffer_disabled; + return tracer_tracing_is_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_is_on); @@ -1543,15 +1635,6 @@ trace_function(struct trace_array *tr, __buffer_unlock_commit(buffer, event); } -void -ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) -{ - if (likely(!atomic_read(&data->disabled))) - trace_function(tr, ip, parent_ip, flags, pc); -} - #ifdef CONFIG_STACKTRACE #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) @@ -2768,10 +2851,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct trace_array *tr, struct trace_cpu *tc, + struct inode *inode, struct file *file, bool snapshot) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; struct trace_iterator *iter; int cpu; @@ -2850,8 +2932,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) tracing_iter_reset(iter, cpu); } - tr->ref++; - mutex_unlock(&trace_types_lock); return iter; @@ -2874,6 +2954,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_tr(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + +static int tracing_open_generic_tc(struct inode *inode, struct file *filp) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; @@ -2881,17 +2998,20 @@ static int tracing_release(struct inode *inode, struct file *file) struct trace_array *tr; int cpu; - if (!(file->f_mode & FMODE_READ)) + /* Writes do not use seq_file, need to grab tr from inode */ + if (!(file->f_mode & FMODE_READ)) { + struct trace_cpu *tc = inode->i_private; + + trace_array_put(tc->tr); return 0; + } iter = m->private; tr = iter->tr; + trace_array_put(tr); mutex_lock(&trace_types_lock); - WARN_ON(!tr->ref); - tr->ref--; - for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); @@ -2910,20 +3030,49 @@ static int tracing_release(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter->buffer_iter); seq_release_private(inode, file); + + return 0; +} + +static int tracing_release_generic_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); return 0; } +static int tracing_release_generic_tc(struct inode *inode, struct file *file) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + trace_array_put(tr); + return 0; +} + +static int tracing_single_release_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return single_release(inode, file); +} + static int tracing_open(struct inode *inode, struct file *file) { + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - if (tc->cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else @@ -2931,12 +3080,16 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, false); + iter = __tracing_open(tr, tc, inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } + + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -3293,9 +3446,14 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + return single_open(file, tracing_trace_options_show, inode->i_private); } @@ -3303,7 +3461,7 @@ static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; @@ -3791,6 +3949,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + mutex_lock(&trace_types_lock); /* create a buffer to store the information to pass to userspace */ @@ -3843,6 +4004,7 @@ out: fail: kfree(iter->trace); kfree(iter); + __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; } @@ -3850,6 +4012,8 @@ fail: static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; mutex_lock(&trace_types_lock); @@ -3863,6 +4027,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter); + trace_array_put(tr); + return 0; } @@ -3939,7 +4105,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_enabled() && iter->pos) + if (!tracing_is_on() && iter->pos) break; } @@ -4320,6 +4486,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); + trace_array_put(tr); + return 0; } @@ -4328,6 +4496,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { unsigned long addr = (unsigned long)ubuf; + struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; @@ -4387,7 +4556,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.trace_buffer.buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { @@ -4495,10 +4664,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, static int tracing_clock_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_clock_show, inode->i_private); + if (trace_array_get(tr)) + return -ENODEV; + + ret = single_open(file, tracing_clock_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } struct ftrace_buffer_info { @@ -4511,12 +4690,16 @@ struct ftrace_buffer_info { static int tracing_snapshot_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; struct seq_file *m; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, true); + iter = __tracing_open(tr, tc, inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { @@ -4529,13 +4712,16 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) kfree(m); return -ENOMEM; } - iter->tr = tc->tr; + iter->tr = tr; iter->trace_buffer = &tc->tr->max_buffer; iter->cpu_file = tc->cpu; m->private = iter; file->private_data = m; } + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4616,9 +4802,12 @@ out: static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; + int ret; + + ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) - return tracing_release(inode, file); + return ret; /* If write only, the seq_file is just a stub */ if (m) @@ -4684,34 +4873,38 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tc, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tc, }; static const struct file_operations tracing_total_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { + .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .write = tracing_mark_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_clock_write, }; @@ -4739,13 +4932,19 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; struct ftrace_buffer_info *info; + int ret; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + trace_array_put(tr); return -ENOMEM; + } mutex_lock(&trace_types_lock); @@ -4763,7 +4962,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_unlock(&trace_types_lock); - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret < 0) + trace_array_put(tr); + + return ret; } static unsigned int @@ -4863,8 +5066,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - WARN_ON(!iter->tr->ref); - iter->tr->ref--; + __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); @@ -5612,15 +5814,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; char buf[64]; int r; - if (buffer) - r = ring_buffer_record_is_on(buffer); - else - r = 0; - + r = tracer_tracing_is_on(tr); r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -5642,11 +5839,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); if (val) { - ring_buffer_record_on(buffer); + tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); } else { - ring_buffer_record_off(buffer); + tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); } @@ -5659,9 +5856,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, } static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, + .release = tracing_release_generic_tr, .llseek = default_llseek, }; @@ -5933,7 +6131,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("buffer_total_size_kb", 0444, d_tracer, tr, &tracing_total_entries_fops); - trace_create_file("free_buffer", 0644, d_tracer, + trace_create_file("free_buffer", 0200, d_tracer, tr, &tracing_free_buffer_fops); trace_create_file("trace_marker", 0220, d_tracer, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed..4a4f6e1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -224,6 +224,11 @@ enum { extern struct list_head ftrace_trace_arrays; +extern struct mutex trace_types_lock; + +extern int trace_array_get(struct trace_array *tr); +extern void trace_array_put(struct trace_array *tr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. @@ -554,11 +559,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu); void poll_wait_pipe(struct trace_iterator *iter); -void ftrace(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long ip, - unsigned long parent_ip, - unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, @@ -774,6 +774,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER +extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct task_struct *task) { if (list_empty(&ftrace_pids)) @@ -899,12 +900,6 @@ static inline void trace_branch_disable(void) /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); -/* trace event type bit fields, not numeric */ -enum { - TRACE_EVENT_TYPE_PRINTF = 1, - TRACE_EVENT_TYPE_RAW = 2, -}; - struct ftrace_event_field { struct list_head link; const char *name; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2..7d85429 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; +#define SYSTEM_FL_FREE_NAME (1 << 31) + +static inline int system_refcount(struct event_subsystem *system) +{ + return system->ref_count & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_inc(struct event_subsystem *system) +{ + return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_dec(struct event_subsystem *system) +{ + return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; +} + /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ @@ -97,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) - goto err; + return -ENOMEM; field->name = name; field->type = type; @@ -114,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type, list_add(&field->link, head); return 0; - -err: - kmem_cache_free(field_cachep, field); - - return -ENOMEM; } int trace_define_field(struct ftrace_event_call *call, const char *type, @@ -279,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, } call->class->reg(call, TRACE_REG_UNREGISTER, file); } - /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ + /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + else + clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* @@ -349,8 +363,8 @@ static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; - WARN_ON_ONCE(system->ref_count == 0); - if (--system->ref_count) + WARN_ON_ONCE(system_refcount(system) == 0); + if (system_refcount_dec(system)) return; list_del(&system->list); @@ -359,13 +373,15 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { - WARN_ON_ONCE(system->ref_count == 0); - system->ref_count++; + WARN_ON_ONCE(system_refcount(system) == 0); + system_refcount_inc(system); } static void __get_system_dir(struct ftrace_subsystem_dir *dir) @@ -379,7 +395,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ - WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) @@ -394,16 +410,45 @@ static void put_system(struct ftrace_subsystem_dir *dir) } /* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int tracing_release_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + + trace_array_put(tr); + + return 0; +} + +/* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) +static int +__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; - mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; @@ -429,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, ret = 0; } + + return ret; +} + +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) +{ + int ret; + + mutex_lock(&event_mutex); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; @@ -624,17 +680,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_file *file = filp->private_data; - char *buf; + char buf[4] = "0"; - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) - buf = "0*\n"; - else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) - buf = "1*\n"; - else - buf = "1\n"; - } else - buf = "0\n"; + if (file->flags & FTRACE_EVENT_FL_ENABLED && + !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + strcpy(buf, "1"); + + if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || + file->flags & FTRACE_EVENT_FL_SOFT_MODE) + strcat(buf, "*"); + + strcat(buf, "\n"); return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); } @@ -992,6 +1048,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) int ret; /* Make sure the system still exists */ + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { @@ -1007,6 +1064,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) } exit_loop: mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); if (!system) return -ENODEV; @@ -1014,9 +1072,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); + /* Still need to increment the ref count of the system */ + if (trace_array_get(tr) < 0) { + put_system(dir); + return -ENODEV; + } + ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); put_system(dir); + } return ret; } @@ -1027,16 +1093,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) + if (!dir) { + trace_array_put(tr); return -ENOMEM; + } dir->tr = tr; ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); kfree(dir); + } filp->private_data = dir; @@ -1047,6 +1120,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct ftrace_subsystem_dir *dir = file->private_data; + trace_array_put(dir->tr); + /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable @@ -1174,9 +1249,10 @@ static const struct file_operations ftrace_set_event_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_file, .read = event_enable_read, .write = event_enable_write, + .release = tracing_release_generic_file, .llseek = default_llseek, }; @@ -1279,7 +1355,15 @@ create_new_subsystem(const char *name) return NULL; system->ref_count = 1; - system->name = name; + + /* Only allocate if dynamic (kprobes and modules) */ + if (!core_kernel_data((unsigned long)name)) { + system->ref_count |= SYSTEM_FL_FREE_NAME; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) + goto out_free; + } else + system->name = name; system->filter = NULL; @@ -1292,6 +1376,8 @@ create_new_subsystem(const char *name) return system; out_free: + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); return NULL; } @@ -1591,6 +1677,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, int trace_add_event_call(struct ftrace_event_call *call) { int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); ret = __register_event(call, NULL); @@ -1598,11 +1685,13 @@ int trace_add_event_call(struct ftrace_event_call *call) __add_event_to_tracers(call, NULL); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return ret; } /* - * Must be called under locking both of event_mutex and trace_event_sem. + * Must be called under locking of trace_types_lock, event_mutex and + * trace_event_sem. */ static void __trace_remove_event_call(struct ftrace_event_call *call) { @@ -1614,11 +1703,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) /* Remove an event_call */ void trace_remove_event_call(struct ftrace_event_call *call) { + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); __trace_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); } #define for_each_event(event, start, end) \ @@ -1762,6 +1853,7 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); switch (val) { case MODULE_STATE_COMING: @@ -1772,6 +1864,7 @@ static int trace_module_notify(struct notifier_block *self, break; } mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return 0; } @@ -2011,10 +2104,7 @@ event_enable_func(struct ftrace_hash *hash, int ret; /* hash funcs only work with set_ftrace_filter */ - if (!enabled) - return -EINVAL; - - if (!param) + if (!enabled || !param) return -EINVAL; system = strsep(¶m, ":"); @@ -2329,11 +2419,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) int event_trace_del_tracer(struct trace_array *tr) { - /* Disable any running events */ - __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); - mutex_lock(&event_mutex); + /* Disable any running events */ + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f..0d883dc 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -44,6 +44,7 @@ enum filter_op_ids OP_LE, OP_GT, OP_GE, + OP_BAND, OP_NONE, OP_OPEN_PAREN, }; @@ -54,6 +55,7 @@ struct filter_op { int precedence; }; +/* Order must be the same as enum filter_op_ids above */ static struct filter_op filter_ops[] = { { OP_OR, "||", 1 }, { OP_AND, "&&", 2 }, @@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = { { OP_LE, "<=", 5 }, { OP_GT, ">", 5 }, { OP_GE, ">=", 5 }, + { OP_BAND, "&", 6 }, { OP_NONE, "OP_NONE", 0 }, { OP_OPEN_PAREN, "(", 0 }, }; @@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ case OP_GE: \ match = (*addr >= val); \ break; \ + case OP_BAND: \ + match = (*addr & val); \ + break; \ default: \ break; \ } \ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c4d6d71..b863f93 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -290,6 +290,21 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) trace_dump_stack(STACK_SKIP); } +static void +ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ALL); +} + +/* Only dump the current CPU buffer. */ +static void +ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ORIG); +} + static int ftrace_probe_print(const char *name, struct seq_file *m, unsigned long ip, void *data) @@ -327,6 +342,20 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, return ftrace_probe_print("stacktrace", m, ip, data); } +static int +ftrace_dump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("dump", m, ip, data); +} + +static int +ftrace_cpudump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("cpudump", m, ip, data); +} + static struct ftrace_probe_ops traceon_count_probe_ops = { .func = ftrace_traceon_count, .print = ftrace_traceon_print, @@ -342,6 +371,16 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = { .print = ftrace_stacktrace_print, }; +static struct ftrace_probe_ops dump_probe_ops = { + .func = ftrace_dump_probe, + .print = ftrace_dump_print, +}; + +static struct ftrace_probe_ops cpudump_probe_ops = { + .func = ftrace_cpudump_probe, + .print = ftrace_cpudump_print, +}; + static struct ftrace_probe_ops traceon_probe_ops = { .func = ftrace_traceon, .print = ftrace_traceon_print, @@ -425,6 +464,32 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash, param, enable); } +static int +ftrace_dump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &dump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + +static int +ftrace_cpudump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &cpudump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + static struct ftrace_func_command ftrace_traceon_cmd = { .name = "traceon", .func = ftrace_trace_onoff_callback, @@ -440,6 +505,16 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = { .func = ftrace_stacktrace_callback, }; +static struct ftrace_func_command ftrace_dump_cmd = { + .name = "dump", + .func = ftrace_dump_callback, +}; + +static struct ftrace_func_command ftrace_cpudump_cmd = { + .name = "cpudump", + .func = ftrace_cpudump_callback, +}; + static int __init init_func_cmd_traceon(void) { int ret; @@ -450,13 +525,31 @@ static int __init init_func_cmd_traceon(void) ret = register_ftrace_command(&ftrace_traceon_cmd); if (ret) - unregister_ftrace_command(&ftrace_traceoff_cmd); + goto out_free_traceoff; ret = register_ftrace_command(&ftrace_stacktrace_cmd); - if (ret) { - unregister_ftrace_command(&ftrace_traceoff_cmd); - unregister_ftrace_command(&ftrace_traceon_cmd); - } + if (ret) + goto out_free_traceon; + + ret = register_ftrace_command(&ftrace_dump_cmd); + if (ret) + goto out_free_stacktrace; + + ret = register_ftrace_command(&ftrace_cpudump_cmd); + if (ret) + goto out_free_dump; + + return 0; + + out_free_dump: + unregister_ftrace_command(&ftrace_dump_cmd); + out_free_stacktrace: + unregister_ftrace_command(&ftrace_stacktrace_cmd); + out_free_traceon: + unregister_ftrace_command(&ftrace_traceon_cmd); + out_free_traceoff: + unregister_ftrace_command(&ftrace_traceoff_cmd); + return ret; } #else diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065..2aefbee 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + if (!tracer_enabled || !tracing_is_enabled()) return; cpu = raw_smp_processor_id(); @@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) else return; - if (!tracer_enabled) + if (!tracer_enabled || !tracing_is_enabled()) return; data = per_cpu_ptr(tr->trace_buffer.data, cpu); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98..7ed6976 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,12 +35,17 @@ struct trace_probe { const char *symbol; /* symbol name */ struct ftrace_event_class class; struct ftrace_event_call call; - struct ftrace_event_file * __rcu *files; + struct list_head files; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; }; +struct event_file_link { + struct ftrace_event_file *file; + struct list_head list; +}; + #define SIZEOF_TRACE_PROBE(n) \ (offsetof(struct trace_probe, args) + \ (sizeof(struct probe_arg) * (n))) @@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; INIT_LIST_HEAD(&tp->list); + INIT_LIST_HEAD(&tp->files); return tp; error: kfree(tp->call.name); @@ -183,25 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event, return NULL; } -static int trace_probe_nr_files(struct trace_probe *tp) -{ - struct ftrace_event_file **file; - int ret = 0; - - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - file = rcu_dereference_raw(tp->files); - if (file) - while (*(file++)) - ret++; - - return ret; -} - -static DEFINE_MUTEX(probe_enable_lock); - /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. @@ -211,67 +198,42 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - - old = rcu_dereference_raw(tp->files); - /* 1 is for new one and 1 is for stopper */ - new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { + struct event_file_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) { ret = -ENOMEM; - goto out_unlock; + goto out; } - memcpy(new, old, n * sizeof(struct ftrace_event_file *)); - new[n] = file; - /* The last one keeps a NULL */ - rcu_assign_pointer(tp->files, new); - tp->flags |= TP_FLAG_TRACE; + link->file = file; + list_add_tail_rcu(&link->list, &tp->files); - if (old) { - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); - } + tp->flags |= TP_FLAG_TRACE; } else tp->flags |= TP_FLAG_PROFILE; - if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && - !trace_probe_has_gone(tp)) { + if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { if (trace_probe_is_return(tp)) ret = enable_kretprobe(&tp->rp); else ret = enable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } -static int -trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) +static struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) { - struct ftrace_event_file **files; - int i; + struct event_file_link *link; - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - files = rcu_dereference_raw(tp->files); - if (files) { - for (i = 0; files[i]; i++) - if (files[i] == file) - return i; - } + list_for_each_entry(link, &tp->files, list) + if (link->file == file) + return link; - return -1; + return NULL; } /* @@ -283,41 +245,24 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - int i, j; + struct event_file_link *link; - old = rcu_dereference_raw(tp->files); - if (n == 0 || trace_probe_file_index(tp, file) < 0) { + link = find_event_file_link(tp, file); + if (!link) { ret = -EINVAL; - goto out_unlock; + goto out; } - if (n == 1) { /* Remove the last file */ - tp->flags &= ~TP_FLAG_TRACE; - new = NULL; - } else { - new = kzalloc(n * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto out_unlock; - } - - /* This copy & check loop copies the NULL stopper too */ - for (i = 0, j = 0; j < n && i < n + 1; i++) - if (old[i] != file) - new[j++] = old[i]; - } + list_del_rcu(&link->list); + /* synchronize with kprobe_trace_func/kretprobe_trace_func */ + synchronize_sched(); + kfree(link); - rcu_assign_pointer(tp->files, new); + if (!list_empty(&tp->files)) + goto out; - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); + tp->flags &= ~TP_FLAG_TRACE; } else tp->flags &= ~TP_FLAG_PROFILE; @@ -327,10 +272,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) else disable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } @@ -885,20 +827,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, static __kprobes void kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - - if (unlikely(!file)) - return; + struct event_file_link *link; - while (*file) { - __kprobe_trace_func(tp, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kprobe_trace_func(tp, regs, link->file); } /* Kretprobe handler */ @@ -945,20 +877,10 @@ static __kprobes void kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); + struct event_file_link *link; - if (unlikely(!file)) - return; - - while (*file) { - __kretprobe_trace_func(tp, ri, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kretprobe_trace_func(tp, ri, regs, link->file); } /* Event entry printers */ @@ -1157,6 +1079,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1172,10 +1098,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, - entry->ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } /* Kretprobe profile handler */ @@ -1189,6 +1112,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1204,13 +1131,16 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, - entry->ret_ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } #endif /* CONFIG_PERF_EVENTS */ +/* + * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. + * + * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe + * lockless, but we can't race with this __init function. + */ static __kprobes int kprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) @@ -1376,6 +1306,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) return NULL; } +/* + * Nobody but us can call enable_trace_probe/disable_trace_probe at this + * stage, we can do this lockless. + */ static __init int kprobe_trace_self_tests_init(void) { int ret, warn = 0; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 2901e3b..a7329b7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -640,13 +640,20 @@ out: * Enable ftrace, sleep 1/10 second, and then read the trace * buffer to see if all is in order. */ -int +__init int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { int save_ftrace_enabled = ftrace_enabled; unsigned long count; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + if (ftrace_filter_param) { + printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); + return 0; + } +#endif + /* make sure msleep has been recorded */ msleep(1); @@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) * Pretty much the same than for the function tracer from which the selftest * has been borrowed. */ -int +__init int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr) { int ret; unsigned long count; +#ifdef CONFIG_DYNAMIC_FTRACE + if (ftrace_filter_param) { + printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); + return 0; + } +#endif + /* * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73..322e164 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; int size; @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0..d5d0cd3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) return -EINVAL; } arg = strchr(argv[1], ':'); - if (!arg) + if (!arg) { + ret = -EINVAL; goto fail_address_parse; + } *arg++ = '\0'; filename = argv[1]; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e3..1241d8c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,9 +29,9 @@ #include <linux/kvm_para.h> #include <linux/perf_event.h> -int watchdog_enabled = 1; +int watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; -static int __read_mostly watchdog_disabled; +static int __read_mostly watchdog_running; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str) else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { - if (watchdog_enabled) { + if (watchdog_user_enabled) { unsigned cpu; for_each_present_cpu(cpu) { @@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; - if (!watchdog_enabled) { - kthread_park(current); - return; - } - /* Enable the perf event */ watchdog_nmi_enable(cpu); @@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu) watchdog_nmi_disable(cpu); } +static void watchdog_cleanup(unsigned int cpu, bool online) +{ + watchdog_disable(cpu); +} + static int watchdog_should_run(unsigned int cpu) { return __this_cpu_read(hrtimer_interrupts) != @@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ -/* prepare/enable/disable routines */ -/* sysctl functions */ -#ifdef CONFIG_SYSCTL -static void watchdog_enable_all_cpus(void) +static struct smp_hotplug_thread watchdog_threads = { + .store = &softlockup_watchdog, + .thread_should_run = watchdog_should_run, + .thread_fn = watchdog, + .thread_comm = "watchdog/%u", + .setup = watchdog_enable, + .cleanup = watchdog_cleanup, + .park = watchdog_disable, + .unpark = watchdog_enable, +}; + +static int watchdog_enable_all_cpus(void) { - unsigned int cpu; + int err = 0; - if (watchdog_disabled) { - watchdog_disabled = 0; - for_each_online_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); + if (!watchdog_running) { + err = smpboot_register_percpu_thread(&watchdog_threads); + if (err) + pr_err("Failed to create watchdog threads, disabled\n"); + else + watchdog_running = 1; } + + return err; } +/* prepare/enable/disable routines */ +/* sysctl functions */ +#ifdef CONFIG_SYSCTL static void watchdog_disable_all_cpus(void) { - unsigned int cpu; - - if (!watchdog_disabled) { - watchdog_disabled = 1; - for_each_online_cpu(cpu) - kthread_park(per_cpu(softlockup_watchdog, cpu)); + if (watchdog_running) { + watchdog_running = 0; + smpboot_unregister_percpu_thread(&watchdog_threads); } } @@ -507,45 +519,48 @@ static void watchdog_disable_all_cpus(void) int proc_dowatchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret; + int err, old_thresh, old_enabled; - if (watchdog_disabled < 0) - return -ENODEV; + old_thresh = ACCESS_ONCE(watchdog_thresh); + old_enabled = ACCESS_ONCE(watchdog_user_enabled); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write) - return ret; + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (err || !write) + return err; set_sample_period(); /* * Watchdog threads shouldn't be enabled if they are - * disabled. The 'watchdog_disabled' variable check in + * disabled. The 'watchdog_running' variable check in * watchdog_*_all_cpus() function takes care of this. */ - if (watchdog_enabled && watchdog_thresh) - watchdog_enable_all_cpus(); + if (watchdog_user_enabled && watchdog_thresh) + err = watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); - return ret; + /* Restore old values on failure */ + if (err) { + watchdog_thresh = old_thresh; + watchdog_user_enabled = old_enabled; + } + + return err; } #endif /* CONFIG_SYSCTL */ -static struct smp_hotplug_thread watchdog_threads = { - .store = &softlockup_watchdog, - .thread_should_run = watchdog_should_run, - .thread_fn = watchdog, - .thread_comm = "watchdog/%u", - .setup = watchdog_enable, - .park = watchdog_disable, - .unpark = watchdog_enable, -}; - void __init lockup_detector_init(void) { set_sample_period(); - if (smpboot_register_percpu_thread(&watchdog_threads)) { - pr_err("Failed to create watchdog threads, disabled\n"); - watchdog_disabled = -ENODEV; + +#ifdef CONFIG_NO_HZ_FULL + if (watchdog_user_enabled) { + watchdog_user_enabled = 0; + pr_warning("Disabled lockup detectors by default for full dynticks\n"); + pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n"); } +#endif + + if (watchdog_user_enabled) + watchdog_enable_all_cpus(); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f02c4a4..0b72e81 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4644,7 +4644,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) * Workqueues should be brought up before normal priority CPU notifiers. * This will be registered high priority CPU notifier. */ -static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, +static int workqueue_cpu_up_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -4697,7 +4697,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, * Workqueues should be brought down after normal priority CPU notifiers. * This will be registered as low priority CPU notifier. */ -static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, +static int workqueue_cpu_down_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { |