From 82f663277d0db854e8978e5f89fd88f6df75a4a4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 May 2015 22:53:22 +0200 Subject: sched / idle: Move the default idle call code to a separate function Move the code under the "use_default" label in cpuidle_idle_call() into a separate (new) function. This just allows the subsequent changes to be more stratightforward. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano Acked-by: Peter Zijlstra (Intel) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index fefcb1f..ae7c0be 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -67,6 +67,18 @@ void __weak arch_cpu_idle(void) local_irq_enable(); } +static void default_idle_call(void) +{ + /* + * We can't use the cpuidle framework, let's use the default idle + * routine. + */ + if (current_clr_polling_and_test()) + local_irq_enable(); + else + arch_cpu_idle(); +} + /** * cpuidle_idle_call - the main idle function * @@ -105,8 +117,10 @@ static void cpuidle_idle_call(void) */ rcu_idle_enter(); - if (cpuidle_not_available(drv, dev)) - goto use_default; + if (cpuidle_not_available(drv, dev)) { + default_idle_call(); + goto exit_idle; + } /* * Suspend-to-idle ("freeze") is a system state in which all user space @@ -134,8 +148,10 @@ static void cpuidle_idle_call(void) next_state = cpuidle_select(drv, dev); } /* Fall back to the default arch idle method on errors. */ - if (next_state < 0) - goto use_default; + if (next_state < 0) { + default_idle_call(); + goto exit_idle; + } /* * The idle task must be scheduled, it is pointless to @@ -162,8 +178,10 @@ static void cpuidle_idle_call(void) /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); - if (entered_state == -EBUSY) - goto use_default; + if (entered_state == -EBUSY) { + default_idle_call(); + goto exit_idle; + } /* * Give the governor an opportunity to reflect on the outcome @@ -182,19 +200,6 @@ exit_idle: rcu_idle_exit(); start_critical_timings(); - return; - -use_default: - /* - * We can't use the cpuidle framework, let's use the default - * idle routine. - */ - if (current_clr_polling_and_test()) - local_irq_enable(); - else - arch_cpu_idle(); - - goto exit_idle; } DEFINE_PER_CPU(bool, cpu_dead_idle); -- cgit v0.10.2 From a802ea96454570f3c526dd9d7ad8c706e570444d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 May 2015 22:53:28 +0200 Subject: cpuidle: Check the sign of index in cpuidle_reflect() Avoid calling the governor's ->reflect method if the state index passed to cpuidle_reflect() is negative. This allows the analogous check to be dropped from menu_reflect(), so do that too, and ensures that arbitrary error codes can be passed to cpuidle_reflect() as the index with no adverse consequences. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano Acked-by: Peter Zijlstra (Intel) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 61c417b..3b80b77 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -249,7 +249,7 @@ int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ void cpuidle_reflect(struct cpuidle_device *dev, int index) { - if (cpuidle_curr_governor->reflect) + if (cpuidle_curr_governor->reflect && index >= 0) cpuidle_curr_governor->reflect(dev, index); } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b8a5fa1..22e4463 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -367,9 +367,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) static void menu_reflect(struct cpuidle_device *dev, int index) { struct menu_device *data = this_cpu_ptr(&menu_devices); + data->last_state_idx = index; - if (index >= 0) - data->needs_update = 1; + data->needs_update = 1; } /** -- cgit v0.10.2 From bcf6ad8a4a3d002e8bc8f6639cdc119168f4e87b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 May 2015 22:53:35 +0200 Subject: sched / idle: Eliminate the "reflect" check from cpuidle_idle_call() Since cpuidle_reflect() should only be called if the idle state to enter was selected by cpuidle_select(), there is the "reflect" variable in cpuidle_idle_call() whose value is used to determine whether or not that is the case. However, if the entire code run between the conditional setting "reflect" and the call to cpuidle_reflect() is moved to a separate function, it will be possible to call that new function in both branches of the conditional, in which case cpuidle_reflect() will only need to be called from one of them too and the "reflect" variable won't be necessary any more. This eliminates one check made by cpuidle_idle_call() on the majority of its invocations, so change the code as described. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano Acked-by: Peter Zijlstra (Intel) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index ae7c0be..9c919b4 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -79,6 +79,46 @@ static void default_idle_call(void) arch_cpu_idle(); } +static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, + int next_state) +{ + int entered_state; + + /* Fall back to the default arch idle method on errors. */ + if (next_state < 0) { + default_idle_call(); + return next_state; + } + + /* + * The idle task must be scheduled, it is pointless to go to idle, just + * update no idle residency and return. + */ + if (current_clr_polling_and_test()) { + dev->last_residency = 0; + local_irq_enable(); + return -EBUSY; + } + + /* Take note of the planned idle state. */ + idle_set_state(this_rq(), &drv->states[next_state]); + + /* + * Enter the idle state previously returned by the governor decision. + * This function will block until an interrupt occurs and will take + * care of re-enabling the local interrupts + */ + entered_state = cpuidle_enter(drv, dev, next_state); + + /* The cpu is no longer idle or about to enter idle. */ + idle_set_state(this_rq(), NULL); + + if (entered_state == -EBUSY) + default_idle_call(); + + return entered_state; +} + /** * cpuidle_idle_call - the main idle function * @@ -93,7 +133,6 @@ static void cpuidle_idle_call(void) struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; - bool reflect; /* * Check if the idle task must be rescheduled. If it is the @@ -138,56 +177,19 @@ static void cpuidle_idle_call(void) goto exit_idle; } - reflect = false; next_state = cpuidle_find_deepest_state(drv, dev); + call_cpuidle(drv, dev, next_state); } else { - reflect = true; /* * Ask the cpuidle framework to choose a convenient idle state. */ next_state = cpuidle_select(drv, dev); - } - /* Fall back to the default arch idle method on errors. */ - if (next_state < 0) { - default_idle_call(); - goto exit_idle; - } - - /* - * The idle task must be scheduled, it is pointless to - * go to idle, just update no idle residency and get - * out of this function - */ - if (current_clr_polling_and_test()) { - dev->last_residency = 0; - entered_state = next_state; - local_irq_enable(); - goto exit_idle; - } - - /* Take note of the planned idle state. */ - idle_set_state(this_rq(), &drv->states[next_state]); - - /* - * Enter the idle state previously returned by the governor decision. - * This function will block until an interrupt occurs and will take - * care of re-enabling the local interrupts - */ - entered_state = cpuidle_enter(drv, dev, next_state); - - /* The cpu is no longer idle or about to enter idle. */ - idle_set_state(this_rq(), NULL); - - if (entered_state == -EBUSY) { - default_idle_call(); - goto exit_idle; - } - - /* - * Give the governor an opportunity to reflect on the outcome - */ - if (reflect) + entered_state = call_cpuidle(drv, dev, next_state); + /* + * Give the governor an opportunity to reflect on the outcome + */ cpuidle_reflect(dev, entered_state); + } exit_idle: __current_set_polling(); -- cgit v0.10.2 From 7312280bd2ad9df1bcca236c5614091a0bd1504c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 9 May 2015 21:50:32 +0200 Subject: cpuidle: Fix the kerneldoc comment for cpuidle_enter_state() The kerneldoc comment for cpuidle_enter_state() doesn't match the function's header any more, so fix it. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 3b80b77..597f884 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -150,7 +150,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev) * cpuidle_enter_state - enter the state and update stats * @dev: cpuidle device for this cpu * @drv: cpuidle driver for this cpu - * @next_state: index into drv->states of the state to enter + * @index: index into the states table in @drv of the state to enter */ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) -- cgit v0.10.2 From faad38492814112e3e7ce94d90123bbe301fff33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 May 2015 01:18:03 +0200 Subject: sched / idle: Call idle_set_state() from cpuidle_enter_state() Introduce a wrapper function around idle_set_state() called sched_idle_set_state() that will pass this_rq() to it as the first argument and make cpuidle_enter_state() call the new function before and after entering the target state. At the same time, remove direct invocations of idle_set_state() from call_cpuidle(). This will allow the invocation of default_idle_call() to be moved from call_cpuidle() to cpuidle_enter_state() safely and call_cpuidle() to be simplified a bit as a result. Signed-off-by: Rafael J. Wysocki Reviewed-by: Preeti U Murthy Tested-by: Preeti U Murthy Tested-by: Sudeep Holla Acked-by: Kevin Hilman diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 597f884..9306dd5 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -170,6 +170,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, if (broadcast && tick_broadcast_enter()) return -EBUSY; + /* Take note of the planned idle state. */ + sched_idle_set_state(target_state); + trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ktime_get(); @@ -178,6 +181,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, time_end = ktime_get(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + /* The cpu is no longer idle or about to enter idle. */ + sched_idle_set_state(NULL); + if (broadcast) { if (WARN_ON_ONCE(!irqs_disabled())) local_irq_disable(); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 9c5e892..301eaaa 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -200,6 +200,9 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } #endif +/* kernel/sched/idle.c */ +extern void sched_idle_set_state(struct cpuidle_state *idle_state); + #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a); #else diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 9c919b4..5d9f549 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -15,6 +15,15 @@ #include "sched.h" +/** + * sched_idle_set_state - Record idle state for the current CPU. + * @idle_state: State to record. + */ +void sched_idle_set_state(struct cpuidle_state *idle_state) +{ + idle_set_state(this_rq(), idle_state); +} + static int __read_mostly cpu_idle_force_poll; void cpu_idle_poll_ctrl(bool enable) @@ -100,9 +109,6 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, return -EBUSY; } - /* Take note of the planned idle state. */ - idle_set_state(this_rq(), &drv->states[next_state]); - /* * Enter the idle state previously returned by the governor decision. * This function will block until an interrupt occurs and will take @@ -110,9 +116,6 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ entered_state = cpuidle_enter(drv, dev, next_state); - /* The cpu is no longer idle or about to enter idle. */ - idle_set_state(this_rq(), NULL); - if (entered_state == -EBUSY) default_idle_call(); -- cgit v0.10.2 From 827a5aefc542b8fb17c00de06118e5cd0e3800f2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 May 2015 01:18:46 +0200 Subject: sched / idle: Call default_idle_call() from cpuidle_enter_state() The check of the cpuidle_enter() return value against -EBUSY made in call_cpuidle() will not be necessary any more if cpuidle_enter_state() calls default_idle_call() directly when it is about to return -EBUSY, so make that happen and eliminate the check. Signed-off-by: Rafael J. Wysocki Reviewed-by: Preeti U Murthy Tested-by: Preeti U Murthy Tested-by: Sudeep Holla Acked-by: Kevin Hilman diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 9306dd5..a7b9e67 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -167,8 +167,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * local timer will be shut down. If a local timer is used from another * CPU as a broadcast timer, this call may fail if it is not available. */ - if (broadcast && tick_broadcast_enter()) + if (broadcast && tick_broadcast_enter()) { + default_idle_call(); return -EBUSY; + } /* Take note of the planned idle state. */ sched_idle_set_state(target_state); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 301eaaa..c7a6364 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -202,6 +202,7 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver( /* kernel/sched/idle.c */ extern void sched_idle_set_state(struct cpuidle_state *idle_state); +extern void default_idle_call(void); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 5d9f549..594275e 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -76,12 +76,13 @@ void __weak arch_cpu_idle(void) local_irq_enable(); } -static void default_idle_call(void) +/** + * default_idle_call - Default CPU idle routine. + * + * To use when the cpuidle framework cannot be used. + */ +void default_idle_call(void) { - /* - * We can't use the cpuidle framework, let's use the default idle - * routine. - */ if (current_clr_polling_and_test()) local_irq_enable(); else @@ -91,8 +92,6 @@ static void default_idle_call(void) static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, int next_state) { - int entered_state; - /* Fall back to the default arch idle method on errors. */ if (next_state < 0) { default_idle_call(); @@ -114,12 +113,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, * This function will block until an interrupt occurs and will take * care of re-enabling the local interrupts */ - entered_state = cpuidle_enter(drv, dev, next_state); - - if (entered_state == -EBUSY) - default_idle_call(); - - return entered_state; + return cpuidle_enter(drv, dev, next_state); } /** -- cgit v0.10.2 From 0d94039fabccaa81d87eafdac509d0dda4df2f7b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 May 2015 01:19:52 +0200 Subject: cpuidle: Select a different state on tick_broadcast_enter() failures If tick_broadcast_enter() fails in cpuidle_enter_state(), try to find another idle state to enter instead of invoking default_idle_call() immediately and returning -EBUSY which should increase the chances of saving some energy in those cases. Signed-off-by: Rafael J. Wysocki Reviewed-by: Preeti U Murthy Tested-by: Preeti U Murthy Tested-by: Sudeep Holla Acked-by: Kevin Hilman diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a7b9e67..af6dd59 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -73,7 +73,10 @@ int cpuidle_play_dead(void) } static int find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev, bool freeze) + struct cpuidle_device *dev, + unsigned int max_latency, + unsigned int forbidden_flags, + bool freeze) { unsigned int latency_req = 0; int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1; @@ -83,6 +86,8 @@ static int find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_state_usage *su = &dev->states_usage[i]; if (s->disabled || su->disable || s->exit_latency <= latency_req + || s->exit_latency > max_latency + || (s->flags & forbidden_flags) || (freeze && !s->enter_freeze)) continue; @@ -100,7 +105,7 @@ static int find_deepest_state(struct cpuidle_driver *drv, int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - return find_deepest_state(drv, dev, false); + return find_deepest_state(drv, dev, UINT_MAX, 0, false); } static void enter_freeze_proper(struct cpuidle_driver *drv, @@ -139,7 +144,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev) * that interrupts won't be enabled when it exits and allows the tick to * be frozen safely. */ - index = find_deepest_state(drv, dev, true); + index = find_deepest_state(drv, dev, UINT_MAX, 0, true); if (index >= 0) enter_freeze_proper(drv, dev, index); @@ -168,8 +173,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * CPU as a broadcast timer, this call may fail if it is not available. */ if (broadcast && tick_broadcast_enter()) { - default_idle_call(); - return -EBUSY; + index = find_deepest_state(drv, dev, target_state->exit_latency, + CPUIDLE_FLAG_TIMER_STOP, false); + if (index < 0) { + default_idle_call(); + return -EBUSY; + } + target_state = &drv->states[index]; } /* Take note of the planned idle state. */ -- cgit v0.10.2 From 7d51d97925e6cbfa2f7f14e3e3aa363b35ee5c24 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 May 2015 04:09:24 +0200 Subject: cpuidle: Do not use CPUIDLE_DRIVER_STATE_START in cpuidle.c The CPUIDLE_DRIVER_STATE_START symbol is defined as 1 only if CONFIG_ARCH_HAS_CPU_RELAX is set, otherwise it is defined as 0. However, if CONFIG_ARCH_HAS_CPU_RELAX is set, the first (index 0) entry in the cpuidle driver's table of states is overwritten with the default "poll" entry by the core. The "state" defined by the "poll" entry doesn't provide ->enter_dead and ->enter_freeze callbacks and its exit_latency is 0. For this reason, it is not necessary to use CPUIDLE_DRIVER_STATE_START in cpuidle_play_dead() (->enter_dead is NULL, so the "poll state" will be skipped by the loop). It also is arguably unuseful to return states with exit_latency equal to 0 from find_deepest_state(), so the function can be modified to start the loop from index 0 and the "poll state" will be skipped by it as a result of the check against latency_req. Signed-off-by: Rafael J. Wysocki Reviewed-by: Preeti U Murthy diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index af6dd59..7f1b8f5 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -65,7 +65,7 @@ int cpuidle_play_dead(void) return -ENODEV; /* Find lowest-power state that supports long-term idle */ - for (i = drv->state_count - 1; i >= CPUIDLE_DRIVER_STATE_START; i--) + for (i = drv->state_count - 1; i >= 0; i--) if (drv->states[i].enter_dead) return drv->states[i].enter_dead(dev, i); @@ -79,9 +79,9 @@ static int find_deepest_state(struct cpuidle_driver *drv, bool freeze) { unsigned int latency_req = 0; - int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1; + int i, ret = -ENXIO; - for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; -- cgit v0.10.2