From 9c4b2867ed7c8c8784dd417ffd16e705e81eb145 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Jan 2016 23:24:22 +0100 Subject: cpuidle: menu: Fix menu_select() for CPUIDLE_DRIVER_STATE_START == 0 Commit a9ceb78bc75c (cpuidle,menu: use interactivity_req to disable polling) exposed a bug in menu_select() causing it to return -1 on systems with CPUIDLE_DRIVER_STATE_START equal to zero, although it should have returned 0. As a result, idle states are not entered by CPUs on those systems. Namely, on the systems in question data->last_state_idx is initially equal to -1 and the above commit modified the condition that would have caused it to be changed to 0 to be less likely to trigger which exposed the problem. However, setting data->last_state_idx initially to -1 doesn't make sense at all and on the affected systems it should always be set to CPUIDLE_DRIVER_STATE_START (ie. 0) unconditionally, so make that happen. Fixes: a9ceb78bc75c (cpuidle,menu: use interactivity_req to disable polling) Reported-and-tested-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 7b0971d..be0bae0 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -294,8 +294,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->needs_update = 0; } - data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; - /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) return 0; @@ -326,14 +324,19 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (latency_req > interactivity_req) latency_req = interactivity_req; - /* - * We want to default to C1 (hlt), not to busy polling - * unless the timer is happening really really soon. - */ - if (interactivity_req > 20 && - !drv->states[CPUIDLE_DRIVER_STATE_START].disabled && - dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0) + if (CPUIDLE_DRIVER_STATE_START > 0) { + data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; + /* + * We want to default to C1 (hlt), not to busy polling + * unless the timer is happening really really soon. + */ + if (interactivity_req > 20 && + !drv->states[CPUIDLE_DRIVER_STATE_START].disabled && + dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0) + data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + } else { data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + } /* * Find the idle state with the lowest power while satisfying -- cgit v0.10.2 From 46373a15f65fe862f31c19a484acdf551f2b442f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 11 Jan 2016 17:40:31 +0100 Subject: time: nohz: Expose tick_nohz_enabled The cpuidle subsystem needs it. Signed-off-by: Jean Delvare Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/tick.h b/include/linux/tick.h index e312219..97fd4e5 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -98,6 +98,7 @@ static inline void tick_broadcast_exit(void) } #ifdef CONFIG_NO_HZ_COMMON +extern int tick_nohz_enabled; extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); @@ -106,6 +107,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ +#define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7c7ec45..edfea95 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -387,7 +387,7 @@ void __init tick_nohz_init(void) /* * NO HZ enabled ? */ -static int tick_nohz_enabled __read_mostly = 1; +int tick_nohz_enabled __read_mostly = 1; unsigned long tick_nohz_active __read_mostly; /* * Enable / Disable tickless mode -- cgit v0.10.2 From 66a5f6b63996d8aa7cbe8841b38297bf3b338194 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 11 Jan 2016 17:41:53 +0100 Subject: cpuidle: Default to ladder governor on ticking systems The menu governor is currently the default on all systems. However the documentation claims that the ladder governor is preferred on ticking systems. So bump the rating of the ladder governor when NO_HZ is disabled, or when booting with nohz=off. This fixes the first half of kernel BZ #65531. Link: https://bugzilla.kernel.org/show_bug.cgi?id=65531 Signed-off-by: Jean Delvare Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 401c010..63bd5a4 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,14 @@ static struct cpuidle_governor ladder_governor = { */ static int __init init_ladder(void) { + /* + * When NO_HZ is disabled, or when booting with nohz=off, the ladder + * governor is better so give it a higher rating than the menu + * governor. + */ + if (!tick_nohz_enabled) + ladder_governor.rating = 25; + return cpuidle_register_governor(&ladder_governor); } -- cgit v0.10.2 From 10475b34f4d71cf71cfe7c5d1f27d8ff3a4eb9bc Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 11 Jan 2016 17:43:02 +0100 Subject: cpuidle: Don't enable all governors by default Since commit d6f346f2d2 (cpuidle: improve governor Kconfig options) the best cpuidle governor is selected automatically. There is little point in additionally selecting the other one as it will not be used, so don't select both governors by default. Being able to select more than one governor is still good for developers booting with cpuidle_sysfs_switch, though. This fixes the second half of kernel BZ #65531. Link: https://bugzilla.kernel.org/show_bug.cgi?id=65531 Signed-off-by: Jean Delvare Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 8c7930b..7e48eb5 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -19,11 +19,9 @@ config CPU_IDLE_MULTIPLE_DRIVERS config CPU_IDLE_GOV_LADDER bool "Ladder governor (for periodic timer tick)" - default y config CPU_IDLE_GOV_MENU bool "Menu governor (for tickless system)" - default y config DT_IDLE_STATES bool -- cgit v0.10.2 From 51164251f5c35e6596130ef0de94ffe65fe441e0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 16 Jan 2016 00:54:53 +0100 Subject: sched / idle: Drop default_idle_call() fallback from call_cpuidle() After commit 9c4b2867ed7c (cpuidle: menu: Fix menu_select() for CPUIDLE_DRIVER_STATE_START == 0) it is clear that menu_select() cannot return negative values. Moreover, ladder_select_state() will never return a negative value too, so make find_deepest_state() return non-negative values too and drop the default_idle_call() fallback from call_cpuidle(). This eliminates one branch from the idle loop and makes the governors and find_deepest_state() handle the case when all states have been disabled from sysfs consistently. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar Tested-by: Sudeep Holla diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 17a6dc0..046423b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -79,9 +79,9 @@ static int find_deepest_state(struct cpuidle_driver *drv, bool freeze) { unsigned int latency_req = 0; - int i, ret = -ENXIO; + int i, ret = 0; - for (i = 0; i < drv->state_count; i++) { + for (i = 1; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; @@ -243,7 +243,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * @drv: the cpuidle driver * @dev: the cpuidle device * - * Returns the index of the idle state. + * Returns the index of the idle state. The return value must not be negative. */ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 4a2ef5a..34852ee 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -97,12 +97,6 @@ void default_idle_call(void) static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, int next_state) { - /* Fall back to the default arch idle method on errors. */ - if (next_state < 0) { - default_idle_call(); - return next_state; - } - /* * The idle task must be scheduled, it is pointless to go to idle, just * update no idle residency and return. -- cgit v0.10.2 From 5bb1729cbdfbe974ad6385be94b14afbac97e19f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 16 Jan 2016 00:56:34 +0100 Subject: cpuidle: menu: Avoid pointless checks in menu_select() If menu_select() cannot find a suitable state to return, it will return the state index stored in data->last_state_idx. This means that it is pointless to look at the states whose indices are less than or equal to data->last_state_idx in the main loop, so don't do that. Given that those checks are done on every idle state selection, this change can save quite a bit of completely unnecessary overhead. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Tested-by: Sudeep Holla diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index be0bae0..0742b32 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -342,7 +342,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * Find the idle state with the lowest power while satisfying * our constraints. */ - for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { + for (i = data->last_state_idx + 1; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; -- cgit v0.10.2