From e12d0271774fea9fddf1e2a7952a0bffb2ee8e8b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 May 2013 17:12:28 -0400 Subject: nohz: Warn if the machine can not perform nohz_full If the user configures NO_HZ_FULL and defines nohz_full=XXX on the kernel command line, or enables NO_HZ_FULL_ALL, but nohz fails due to the machine having a unstable clock, warn about it. We do not want users thinking that they are getting the benefit of nohz when their machine can not support it. Signed-off-by: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Signed-off-by: Frederic Weisbecker diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f420813..d87d22c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -178,6 +178,11 @@ static bool can_stop_full_tick(void) */ if (!sched_clock_stable) { trace_tick_stop(0, "unstable sched clock\n"); + /* + * Don't allow the user to think they can get + * full NO_HZ with this machine. + */ + WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); return false; } #endif -- cgit v0.10.2 From b8900bc0217fac8e68085997bee2f05e6db931a2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 6 Jun 2013 15:42:53 +0200 Subject: watchdog: Register / unregister watchdog kthreads on sysctl control The user activation/deactivation of the watchdog through boot parameters or systcl is currently implemented with a dance involving kthreads parking and unparking methods: the threads are unconditionally registered on boot and they park as soon as the user want the watchdog to be disabled. This method involves a few noisy details to handle though: the watchdog kthreads may be unparked anytime due to hotplug operations, after which the watchdog internals have to decide to park again if it is user-disabled. As a result the setup() and unpark() methods need to be able to request a reparking. This is not currently supported in the kthread infrastructure so this piece of the watchdog code only works halfway. Besides, unparking/reparking the watchdog kthreads consume unnecessary cputime on hotplug operations when those could be simply ignored in the first place. As suggested by Srivatsa, let's instead only register the watchdog threads when they are needed. This way we don't need to think about hotplug operations and we don't burden the CPU onlining when the watchdog is simply disabled. Suggested-by: Srivatsa S. Bhat Signed-off-by: Frederic Weisbecker Cc: Srivatsa S. Bhat Cc: Anish Singh Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Don Zickus diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e3..52c9a9b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,7 +31,7 @@ int watchdog_enabled = 1; int __read_mostly watchdog_thresh = 10; -static int __read_mostly watchdog_disabled; +static int __read_mostly watchdog_disabled = 1; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; - if (!watchdog_enabled) { - kthread_park(current); - return; - } - /* Enable the perf event */ watchdog_nmi_enable(cpu); @@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu) watchdog_nmi_disable(cpu); } +static void watchdog_cleanup(unsigned int cpu, bool online) +{ + watchdog_disable(cpu); +} + static int watchdog_should_run(unsigned int cpu) { return __this_cpu_read(hrtimer_interrupts) != @@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ -/* prepare/enable/disable routines */ -/* sysctl functions */ -#ifdef CONFIG_SYSCTL -static void watchdog_enable_all_cpus(void) +static struct smp_hotplug_thread watchdog_threads = { + .store = &softlockup_watchdog, + .thread_should_run = watchdog_should_run, + .thread_fn = watchdog, + .thread_comm = "watchdog/%u", + .setup = watchdog_enable, + .cleanup = watchdog_cleanup, + .park = watchdog_disable, + .unpark = watchdog_enable, +}; + +static int watchdog_enable_all_cpus(void) { - unsigned int cpu; + int err = 0; if (watchdog_disabled) { - watchdog_disabled = 0; - for_each_online_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); + err = smpboot_register_percpu_thread(&watchdog_threads); + if (err) + pr_err("Failed to create watchdog threads, disabled\n"); + else + watchdog_disabled = 0; } + + return err; } +/* prepare/enable/disable routines */ +/* sysctl functions */ +#ifdef CONFIG_SYSCTL static void watchdog_disable_all_cpus(void) { - unsigned int cpu; - if (!watchdog_disabled) { watchdog_disabled = 1; - for_each_online_cpu(cpu) - kthread_park(per_cpu(softlockup_watchdog, cpu)); + smpboot_unregister_percpu_thread(&watchdog_threads); } } @@ -507,14 +519,14 @@ static void watchdog_disable_all_cpus(void) int proc_dowatchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret; + int err, old_thresh, old_enabled; - if (watchdog_disabled < 0) - return -ENODEV; + old_thresh = ACCESS_ONCE(watchdog_thresh); + old_enabled = ACCESS_ONCE(watchdog_enabled); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write) - return ret; + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (err || !write) + return err; set_sample_period(); /* @@ -523,29 +535,24 @@ int proc_dowatchdog(struct ctl_table *table, int write, * watchdog_*_all_cpus() function takes care of this. */ if (watchdog_enabled && watchdog_thresh) - watchdog_enable_all_cpus(); + err = watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); - return ret; + /* Restore old values on failure */ + if (err) { + watchdog_thresh = old_thresh; + watchdog_enabled = old_enabled; + } + + return err; } #endif /* CONFIG_SYSCTL */ -static struct smp_hotplug_thread watchdog_threads = { - .store = &softlockup_watchdog, - .thread_should_run = watchdog_should_run, - .thread_fn = watchdog, - .thread_comm = "watchdog/%u", - .setup = watchdog_enable, - .park = watchdog_disable, - .unpark = watchdog_enable, -}; - void __init lockup_detector_init(void) { set_sample_period(); - if (smpboot_register_percpu_thread(&watchdog_threads)) { - pr_err("Failed to create watchdog threads, disabled\n"); - watchdog_disabled = -ENODEV; - } + + if (watchdog_enabled) + watchdog_enable_all_cpus(); } -- cgit v0.10.2 From 3c00ea82c724fab0b98f15428a804cb45eb9ad38 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 May 2013 20:45:15 +0200 Subject: watchdog: Rename confusing state variable We have two very conflicting state variable names in the watchdog: * watchdog_enabled: This one reflects the user interface. It's set to 1 by default and can be overriden with boot options or sysctl/procfs interface. * watchdog_disabled: This is the internal toggle state that tells if watchdog threads, timers and NMI events are currently running or not. This state mostly depends on the user settings. It's a convenient state latch. Now we really need to find clearer names because those are just too confusing to encourage deep review. watchdog_enabled now becomes watchdog_user_enabled to reflect its purpose as an interface. watchdog_disabled becomes watchdog_running to suggest its role as a pure internal state. Signed-off-by: Frederic Weisbecker Cc: Srivatsa S. Bhat Cc: Anish Singh Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Don Zickus diff --git a/include/linux/nmi.h b/include/linux/nmi.h index db50840..6a45fb5 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -46,7 +46,7 @@ static inline bool trigger_all_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(int watchdog_thresh); -extern int watchdog_enabled; +extern int watchdog_user_enabled; extern int watchdog_thresh; struct ctl_table; extern int proc_dowatchdog(struct ctl_table *, int , diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf45..b080565 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -801,7 +801,7 @@ static struct ctl_table kern_table[] = { #if defined(CONFIG_LOCKUP_DETECTOR) { .procname = "watchdog", - .data = &watchdog_enabled, + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = proc_dowatchdog, @@ -828,7 +828,7 @@ static struct ctl_table kern_table[] = { }, { .procname = "nmi_watchdog", - .data = &watchdog_enabled, + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = proc_dowatchdog, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 52c9a9b..51c4f34 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,9 +29,9 @@ #include #include -int watchdog_enabled = 1; +int watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; -static int __read_mostly watchdog_disabled = 1; +static int __read_mostly watchdog_running; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str) else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { - if (watchdog_enabled) { + if (watchdog_user_enabled) { unsigned cpu; for_each_present_cpu(cpu) { @@ -490,12 +490,12 @@ static int watchdog_enable_all_cpus(void) { int err = 0; - if (watchdog_disabled) { + if (!watchdog_running) { err = smpboot_register_percpu_thread(&watchdog_threads); if (err) pr_err("Failed to create watchdog threads, disabled\n"); else - watchdog_disabled = 0; + watchdog_running = 1; } return err; @@ -506,8 +506,8 @@ static int watchdog_enable_all_cpus(void) #ifdef CONFIG_SYSCTL static void watchdog_disable_all_cpus(void) { - if (!watchdog_disabled) { - watchdog_disabled = 1; + if (watchdog_running) { + watchdog_running = 0; smpboot_unregister_percpu_thread(&watchdog_threads); } } @@ -522,7 +522,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, int err, old_thresh, old_enabled; old_thresh = ACCESS_ONCE(watchdog_thresh); - old_enabled = ACCESS_ONCE(watchdog_enabled); + old_enabled = ACCESS_ONCE(watchdog_user_enabled); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err || !write) @@ -531,10 +531,10 @@ int proc_dowatchdog(struct ctl_table *table, int write, set_sample_period(); /* * Watchdog threads shouldn't be enabled if they are - * disabled. The 'watchdog_disabled' variable check in + * disabled. The 'watchdog_running' variable check in * watchdog_*_all_cpus() function takes care of this. */ - if (watchdog_enabled && watchdog_thresh) + if (watchdog_user_enabled && watchdog_thresh) err = watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); @@ -542,7 +542,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, /* Restore old values on failure */ if (err) { watchdog_thresh = old_thresh; - watchdog_enabled = old_enabled; + watchdog_user_enabled = old_enabled; } return err; @@ -553,6 +553,6 @@ void __init lockup_detector_init(void) { set_sample_period(); - if (watchdog_enabled) + if (watchdog_user_enabled) watchdog_enable_all_cpus(); } -- cgit v0.10.2 From 940be35ac0139530d7554aa2352a8388e3d4adca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Jun 2013 13:35:42 +0200 Subject: watchdog: Boot-disable by default on full dynticks When the watchdog runs, it prevents the full dynticks CPUs from stopping their tick because the hard lockup detector uses perf events internally, which in turn rely on the periodic tick. Since this is a rather confusing behaviour that is not easy to track down and identify for those who want to test CONFIG_NO_HZ_FULL, let's default disable the watchdog on boot time when full dynticks is enabled. The user can still enable it later on runtime using proc or sysctl. Reported-by: Steven Rostedt Suggested-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Li Zhong Cc: Don Zickus Cc: Srivatsa S. Bhat Cc: Anish Singh diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51c4f34..1241d8c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -553,6 +553,14 @@ void __init lockup_detector_init(void) { set_sample_period(); +#ifdef CONFIG_NO_HZ_FULL + if (watchdog_user_enabled) { + watchdog_user_enabled = 0; + pr_warning("Disabled lockup detectors by default for full dynticks\n"); + pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n"); + } +#endif + if (watchdog_user_enabled) watchdog_enable_all_cpus(); } -- cgit v0.10.2 From 5b8621a68fdcd2baf1d3b413726f913a5254d46a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Jun 2013 13:47:31 +0200 Subject: nohz: Remove obsolete check for full dynticks CPUs to be RCU nocbs Building full dynticks now implies that all CPUs are forced into RCU nocb mode through CONFIG_RCU_NOCB_CPU_ALL. The dynamic check has become useless. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Li Zhong Cc: Borislav Petkov diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d87d22c..b157501 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -351,16 +351,6 @@ void __init tick_nohz_init(void) } cpu_notifier(tick_nohz_cpu_down_callback, 0); - - /* Make sure full dynticks CPU are also RCU nocbs */ - for_each_cpu(cpu, nohz_full_mask) { - if (!rcu_is_nocb_cpu(cpu)) { - pr_warning("NO_HZ: CPU %d is not RCU nocb: " - "cleared from nohz_full range", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); - } - } - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); } -- cgit v0.10.2