From 1aef40e288acfb3cc28ff77528b34ef66683bed6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 Oct 2012 12:26:24 +0200 Subject: cpuidle / sysfs: change function parameter The function needs the cpuidle_device which is initially passed to the caller. The current code gets the struct device from the struct cpuidle_device, pass it the cpuidle_add_sysfs function. This function calls per_cpu(cpuidle_devices, cpu) to get the cpuidle_device. This patch pass the cpuidle_device instead and simplify the code. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f15b85..b511ac3 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -394,7 +394,6 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device); static int __cpuidle_register_device(struct cpuidle_device *dev) { int ret; - struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); if (!try_module_get(cpuidle_driver->owner)) @@ -404,7 +403,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); - ret = cpuidle_add_sysfs(cpu_dev); + ret = cpuidle_add_sysfs(dev); if (ret) goto err_sysfs; @@ -416,7 +415,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) return 0; err_coupled: - cpuidle_remove_sysfs(cpu_dev); + cpuidle_remove_sysfs(dev); wait_for_completion(&dev->kobj_unregister); err_sysfs: list_del(&dev->device_list); @@ -460,7 +459,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); if (dev->registered == 0) @@ -470,7 +468,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_disable_device(dev); - cpuidle_remove_sysfs(cpu_dev); + cpuidle_remove_sysfs(dev); list_del(&dev->device_list); wait_for_completion(&dev->kobj_unregister); per_cpu(cpuidle_devices, dev->cpu) = NULL; diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 76e7f69..2120d9e 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -29,8 +29,8 @@ extern int cpuidle_add_interface(struct device *dev); extern void cpuidle_remove_interface(struct device *dev); extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device); -extern int cpuidle_add_sysfs(struct device *dev); -extern void cpuidle_remove_sysfs(struct device *dev); +extern int cpuidle_add_sysfs(struct cpuidle_device *dev); +extern void cpuidle_remove_sysfs(struct cpuidle_device *dev); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED bool cpuidle_state_is_coupled(struct cpuidle_device *dev, diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 5f809e3..84e6285 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -408,13 +408,11 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device) * cpuidle_add_sysfs - creates a sysfs instance for the target device * @dev: the target device */ -int cpuidle_add_sysfs(struct device *cpu_dev) +int cpuidle_add_sysfs(struct cpuidle_device *dev) { - int cpu = cpu_dev->id; - struct cpuidle_device *dev; + struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; - dev = per_cpu(cpuidle_devices, cpu); error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, "cpuidle"); if (!error) @@ -426,11 +424,7 @@ int cpuidle_add_sysfs(struct device *cpu_dev) * cpuidle_remove_sysfs - deletes a sysfs instance on the target device * @dev: the target device */ -void cpuidle_remove_sysfs(struct device *cpu_dev) +void cpuidle_remove_sysfs(struct cpuidle_device *dev) { - int cpu = cpu_dev->id; - struct cpuidle_device *dev; - - dev = per_cpu(cpuidle_devices, cpu); kobject_put(&dev->kobj); } -- cgit v0.10.2 From e45a00d679a788217f35ee4214a32d6d1924160b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 Oct 2012 12:26:32 +0200 Subject: cpuidle / sysfs: move kobj initialization in the syfs file Move the kobj initialization and completion in the sysfs.c and encapsulate the code more. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index b511ac3..f4b8fc5 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -399,8 +399,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) if (!try_module_get(cpuidle_driver->owner)) return -EINVAL; - init_completion(&dev->kobj_unregister); - per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); ret = cpuidle_add_sysfs(dev); @@ -416,7 +414,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) err_coupled: cpuidle_remove_sysfs(dev); - wait_for_completion(&dev->kobj_unregister); err_sysfs: list_del(&dev->device_list); per_cpu(cpuidle_devices, dev->cpu) = NULL; @@ -470,7 +467,6 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_remove_sysfs(dev); list_del(&dev->device_list); - wait_for_completion(&dev->kobj_unregister); per_cpu(cpuidle_devices, dev->cpu) = NULL; cpuidle_coupled_unregister_device(dev); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 84e6285..ed87399 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -374,8 +374,8 @@ int cpuidle_add_state_sysfs(struct cpuidle_device *device) kobj->state_usage = &device->states_usage[i]; init_completion(&kobj->kobj_unregister); - ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, &device->kobj, - "state%d", i); + ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, + &device->kobj, "state%d", i); if (ret) { kfree(kobj); goto error_state; @@ -413,6 +413,8 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; + init_completion(&dev->kobj_unregister); + error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, "cpuidle"); if (!error) @@ -427,4 +429,5 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) void cpuidle_remove_sysfs(struct cpuidle_device *dev) { kobject_put(&dev->kobj); + wait_for_completion(&dev->kobj_unregister); } -- cgit v0.10.2 From 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 26 Oct 2012 12:26:41 +0200 Subject: cpuidle: Quickly notice prediction failure for repeat mode The prediction for future is difficult and when the cpuidle governor prediction fails and govenor possibly choose the shallower C-state than it should. How to quickly notice and find the failure becomes important for power saving. cpuidle menu governor has a method to predict the repeat pattern if there are 8 C-states residency which are continuous and the same or very close, so it will predict the next C-states residency will keep same residency time. There is a real case that turbostat utility (tools/power/x86/turbostat) at kernel 3.3 or early. turbostat utility will read 10 registers one by one at Sandybridge, so it will generate 10 IPIs to wake up idle CPUs. So cpuidle menu governor will predict it is repeat mode and there is another IPI wake up idle CPU soon, so it keeps idle CPU stay at C1 state even though CPU is totally idle. However, in the turbostat, following 10 registers reading is sleep 5 seconds by default, so the idle CPU will keep at C1 for a long time though it is idle until break event occurs. In a idle Sandybridge system, run "./turbostat -v", we will notice that deep C-state dangles between "70% ~ 99%". After patched the kernel, we will notice deep C-state stays at >99.98%. In the patch, a timer is added when menu governor detects a repeat mode and choose a shallow C-state. The timer is set to a time out value that greater than predicted time, and we conclude repeat mode prediction failure if timer is triggered. When repeat mode happens as expected, the timer is not triggered and CPU waken up from C-states and it will cancel the timer initiatively. When repeat mode does not happen, the timer will be time out and menu governor will quickly notice that the repeat mode prediction fails and then re-evaluates deeper C-states possibility. Below is another case which will clearly show the patch much benefit: #include #include #include #include #include #include #include volatile int * shutdown; volatile long * count; int delay = 20; int loop = 8; void usage(void) { fprintf(stderr, "Usage: idle_predict [options]\n" " --help -h Print this help\n" " --thread -n Thread number\n" " --loop -l Loop times in shallow Cstate\n" " --delay -t Sleep time (uS)in shallow Cstate\n"); } void *simple_loop() { int idle_num = 1; while (!(*shutdown)) { *count = *count + 1; if (idle_num % loop) usleep(delay); else { /* sleep 1 second */ usleep(1000000); idle_num = 0; } idle_num++; } } static void sighand(int sig) { *shutdown = 1; } int main(int argc, char *argv[]) { sigset_t sigset; int signum = SIGALRM; int i, c, er = 0, thread_num = 8; pthread_t pt[1024]; static char optstr[] = "n:l:t:h:"; while ((c = getopt(argc, argv, optstr)) != EOF) switch (c) { case 'n': thread_num = atoi(optarg); break; case 'l': loop = atoi(optarg); break; case 't': delay = atoi(optarg); break; case 'h': default: usage(); exit(1); } printf("thread=%d,loop=%d,delay=%d\n",thread_num,loop,delay); count = malloc(sizeof(long)); shutdown = malloc(sizeof(int)); *count = 0; *shutdown = 0; sigemptyset(&sigset); sigaddset(&sigset, signum); sigprocmask (SIG_BLOCK, &sigset, NULL); signal(SIGINT, sighand); signal(SIGTERM, sighand); for(i = 0; i < thread_num ; i++) pthread_create(&pt[i], NULL, simple_loop, NULL); for (i = 0; i < thread_num; i++) pthread_join(pt[i], NULL); exit(0); } Get powertop V2 from git://github.com/fenrus75/powertop, build powertop. After build the above test application, then run it. Test plaform can be Intel Sandybridge or other recent platforms. #./idle_predict -l 10 & #./powertop We will find that deep C-state will dangle between 40%~100% and much time spent on C1 state. It is because menu governor wrongly predict that repeat mode is kept, so it will choose the C1 shallow C-state even though it has chance to sleep 1 second in deep C-state. While after patched the kernel, we find that deep C-state will keep >99.6%. Signed-off-by: Rik van Riel Signed-off-by: Youquan Song Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 5b1f2c3..37c0ff6 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -28,6 +28,13 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 +/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ +#define MAX_DEVIATION 60 + +static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); +static DEFINE_PER_CPU(int, hrtimer_status); +/* menu hrtimer mode */ +enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; /* * Concepts and ideas behind the menu governor @@ -191,17 +198,42 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } +/* Cancel the hrtimer if it is not triggered yet */ +void menu_hrtimer_cancel(void) +{ + int cpu = smp_processor_id(); + struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); + + /* The timer is still not time out*/ + if (per_cpu(hrtimer_status, cpu)) { + hrtimer_cancel(hrtmr); + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + } +} +EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); + +/* Call back for hrtimer is triggered */ +static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) +{ + int cpu = smp_processor_id(); + + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + + return HRTIMER_NORESTART; +} + /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static void detect_repeating_patterns(struct menu_device *data) +static int detect_repeating_patterns(struct menu_device *data) { int i; uint64_t avg = 0; uint64_t stddev = 0; /* contains the square of the std deviation */ + int ret = 0; /* first calculate average and standard deviation of the past */ for (i = 0; i < INTERVALS; i++) @@ -210,7 +242,7 @@ static void detect_repeating_patterns(struct menu_device *data) /* if the avg is beyond the known next tick, it's worthless */ if (avg > data->expected_us) - return; + return 0; for (i = 0; i < INTERVALS; i++) stddev += (data->intervals[i] - avg) * @@ -223,8 +255,12 @@ static void detect_repeating_patterns(struct menu_device *data) * repeating pattern and predict we keep doing this. */ - if (avg && stddev < STDDEV_THRESH) + if (avg && stddev < STDDEV_THRESH) { data->predicted_us = avg; + ret = 1; + } + + return ret; } /** @@ -240,6 +276,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; + int repeat = 0, low_predicted = 0; + int cpu = smp_processor_id(); + struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -274,7 +313,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - detect_repeating_patterns(data); + repeat = detect_repeating_patterns(data); /* * We want to default to C1 (hlt), not to busy polling @@ -295,8 +334,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) + if (s->target_residency > data->predicted_us) { + low_predicted = 1; continue; + } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -309,6 +350,27 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) } } + /* not deepest C-state chosen for low predicted residency */ + if (low_predicted) { + unsigned int timer_us = 0; + + /* + * Set a timer to detect whether this sleep is much + * longer than repeat mode predicted. If the timer + * triggers, the code will evaluate whether to put + * the CPU into a deeper C-state. + * The timer is cancelled on CPU wakeup. + */ + timer_us = 2 * (data->predicted_us + MAX_DEVIATION); + + if (repeat && (4 * timer_us < data->expected_us)) { + hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED); + /* In repeat case, menu hrtimer is started */ + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; + } + } + return data->last_state_idx; } @@ -399,6 +461,9 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); + struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); diff --git a/include/linux/tick.h b/include/linux/tick.h index f37fceb..1a6567b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -142,4 +142,10 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ +# ifdef CONFIG_CPU_IDLE_GOV_MENU +extern void menu_hrtimer_cancel(void); +# else +static inline void menu_hrtimer_cancel(void) {} +# endif /* CONFIG_CPU_IDLE_GOV_MENU */ + #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a402608..6f33706 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -526,6 +526,8 @@ void tick_nohz_irq_exit(void) if (!ts->inidle) return; + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); __tick_nohz_idle_enter(ts); } @@ -621,6 +623,8 @@ void tick_nohz_idle_exit(void) ts->inidle = 0; + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); if (ts->idle_active || ts->tick_stopped) now = ktime_get(); -- cgit v0.10.2 From e11538d1f03914eb92af5a1a378375c05ae8520c Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 26 Oct 2012 12:26:50 +0200 Subject: cpuidle: Quickly notice prediction failure in general case The prediction for future is difficult and when the cpuidle governor prediction fails and govenor possibly choose the shallower C-state than it should. How to quickly notice and find the failure becomes important for power saving. The patch extends to general case that prediction logic get a small predicted residency, so it choose a shallow C-state though the expected residency is large . Once the prediction will be fail, the CPU will keep staying at shallow C-state for a long time. Acutally, the CPU has change enter into deep C-state. So when the expected residency is long enough but governor choose a shallow C-state, an timer will be added in order to monitor if the prediction failure. When C-state is waken up prior to the adding timer, the timer will be cancelled initiatively. When the timer is triggered and menu governor will quickly notice prediction failure and re-evaluates deeper C-states possibility. Signed-off-by: Rik van Riel Signed-off-by: Youquan Song Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 37c0ff6..43a54fd 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -34,7 +34,7 @@ static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); static DEFINE_PER_CPU(int, hrtimer_status); /* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; +enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; /* * Concepts and ideas behind the menu governor @@ -116,6 +116,13 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; * */ +/* + * The C-state residency is so long that is is worthwhile to exit + * from the shallow C-state and re-enter into a deeper C-state. + */ +static unsigned int perfect_cstate_ms __read_mostly = 30; +module_param(perfect_cstate_ms, uint, 0000); + struct menu_device { int last_state_idx; int needs_update; @@ -216,6 +223,16 @@ EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); + struct menu_device *data = &per_cpu(menu_devices, cpu); + + /* In general case, the expected residency is much larger than + * deepest C-state target residency, but prediction logic still + * predicts a small predicted residency, so the prediction + * history is totally broken if the timer is triggered. + * So reset the correction factor. + */ + if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL) + data->correction_factor[data->bucket] = RESOLUTION * DECAY; per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; @@ -353,6 +370,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* not deepest C-state chosen for low predicted residency */ if (low_predicted) { unsigned int timer_us = 0; + unsigned int perfect_us = 0; /* * Set a timer to detect whether this sleep is much @@ -363,12 +381,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) */ timer_us = 2 * (data->predicted_us + MAX_DEVIATION); + perfect_us = perfect_cstate_ms * 1000; + if (repeat && (4 * timer_us < data->expected_us)) { hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), HRTIMER_MODE_REL_PINNED); /* In repeat case, menu hrtimer is started */ per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; + } else if (perfect_us < data->expected_us) { + /* + * The next timer is long. This could be because + * we did not make a useful prediction. + * In that case, it makes sense to re-enter + * into a deeper C-state after some time. + */ + hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED); + /* In general case, menu hrtimer is started */ + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; } + } return data->last_state_idx; -- cgit v0.10.2 From d73d68dc49e09143e8e3bef10670a021c26ec4a5 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 26 Oct 2012 12:26:59 +0200 Subject: cpuidle: Set residency to 0 if target Cstate not enter When cpuidle governor choose a C-state to enter for idle CPU, but it notice that there is tasks request to be executed. So the idle CPU will not really enter the target C-state and go to run task. In this situation, it will use the residency of previous really entered target C-states. Obviously, it is not reasonable. So, this patch fix it by set the target C-state residency to 0. Signed-off-by: Rik van Riel Signed-off-by: Youquan Song Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index f4b8fc5..ce4cac7 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -144,6 +144,10 @@ int cpuidle_idle_call(void) /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(drv, dev); if (need_resched()) { + dev->last_residency = 0; + /* give the governor an opportunity to reflect on the outcome */ + if (cpuidle_curr_governor->reflect) + cpuidle_curr_governor->reflect(dev, next_state); local_irq_enable(); return 0; } -- cgit v0.10.2 From c96ca4fb76b711279be063da083f09b8d65af5c5 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 26 Oct 2012 12:27:07 +0200 Subject: cpuidle: Get typical recent sleep interval The function detect_repeating_patterns was not very useful for workloads with alternating long and short pauses, for example virtual machines handling network requests for each other (say a web and database server). Instead, try to find a recent sleep interval that is somewhere between the median and the mode sleep time, by discarding outliers to the up side and recalculating the average and standard deviation until that is no longer required. This should do something sane with a sleep interval series like: 200 180 210 10000 30 1000 170 200 The current code would simply discard such a series, while the new code will guess a typical sleep interval just shy of 200. The original patch come from Rik van Riel . Signed-off-by: Rik van Riel Signed-off-by: Youquan Song Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 43a54fd..2efee27 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -245,36 +245,59 @@ static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static int detect_repeating_patterns(struct menu_device *data) +static u32 get_typical_interval(struct menu_device *data) { - int i; - uint64_t avg = 0; - uint64_t stddev = 0; /* contains the square of the std deviation */ - int ret = 0; - - /* first calculate average and standard deviation of the past */ - for (i = 0; i < INTERVALS; i++) - avg += data->intervals[i]; - avg = avg / INTERVALS; + int i = 0, divisor = 0; + uint64_t max = 0, avg = 0, stddev = 0; + int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ + unsigned int ret = 0; - /* if the avg is beyond the known next tick, it's worthless */ - if (avg > data->expected_us) - return 0; - - for (i = 0; i < INTERVALS; i++) - stddev += (data->intervals[i] - avg) * - (data->intervals[i] - avg); +again: - stddev = stddev / INTERVALS; + /* first calculate average and standard deviation of the past */ + max = avg = divisor = stddev = 0; + for (i = 0; i < INTERVALS; i++) { + int64_t value = data->intervals[i]; + if (value <= thresh) { + avg += value; + divisor++; + if (value > max) + max = value; + } + } + do_div(avg, divisor); + for (i = 0; i < INTERVALS; i++) { + int64_t value = data->intervals[i]; + if (value <= thresh) { + int64_t diff = value - avg; + stddev += diff * diff; + } + } + do_div(stddev, divisor); + stddev = int_sqrt(stddev); /* - * now.. if stddev is small.. then assume we have a - * repeating pattern and predict we keep doing this. + * If we have outliers to the upside in our distribution, discard + * those by setting the threshold to exclude these outliers, then + * calculate the average and standard deviation again. Once we get + * down to the bottom 3/4 of our samples, stop excluding samples. + * + * This can deal with workloads that have long pauses interspersed + * with sporadic activity with a bunch of short pauses. + * + * The typical interval is obtained when standard deviation is small + * or standard deviation is small compared to the average interval. */ - - if (avg && stddev < STDDEV_THRESH) { + if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) + || stddev <= 20) { data->predicted_us = avg; ret = 1; + return ret; + + } else if ((divisor * 4) > INTERVALS * 3) { + /* Exclude the max interval */ + thresh = max - 1; + goto again; } return ret; @@ -330,7 +353,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - repeat = detect_repeating_patterns(data); + repeat = get_typical_interval(data); /* * We want to default to C1 (hlt), not to busy polling -- cgit v0.10.2 From 349631e0e411fefa2fed7e0a30b97704562dbd6b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 01:05:16 +0100 Subject: cpuidle / sysfs: move structure declaration into the sysfs.c file The structure cpuidle_state_kobj is not used anywhere except in the sysfs.c file. The definition of this structure is not needed in the cpuidle header file. This patch moves it to the sysfs.c file in order to encapsulate the code a bit more. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index ed87399..f15c1e5 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -297,6 +297,13 @@ static struct attribute *cpuidle_state_default_attrs[] = { NULL }; +struct cpuidle_state_kobj { + struct cpuidle_state *state; + struct cpuidle_state_usage *state_usage; + struct completion kobj_unregister; + struct kobject kobj; +}; + #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 279b1ea..7daf0e3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -82,13 +82,6 @@ cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data) st_usage->driver_data = data; } -struct cpuidle_state_kobj { - struct cpuidle_state *state; - struct cpuidle_state_usage *state_usage; - struct completion kobj_unregister; - struct kobject kobj; -}; - struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; -- cgit v0.10.2 From 8f3e9953e1e4ae5c11e2e880e7d85c03c0180613 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 01:09:02 +0100 Subject: cpuidle: fixup device.h header in cpuidle.h The "struct device" is only used in sysfs.c. The other .c files including the private header "cpuidle.h" do not need to pull the entire headers tree from there as they don't manipulate the "struct device". This patch fixes this by moving the header inclusion to sysfs.c and adding a forward declaration for the struct device. The number of lines generated by the preprocesor: Without this patch : 17269 loc With this patch : 16446 loc Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 2120d9e..f6b0923 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -5,8 +5,6 @@ #ifndef __DRIVER_CPUIDLE_H #define __DRIVER_CPUIDLE_H -#include - /* For internal use only */ extern struct cpuidle_governor *cpuidle_curr_governor; extern struct list_head cpuidle_governors; @@ -25,6 +23,9 @@ extern void cpuidle_uninstall_idle_handler(void); extern int cpuidle_switch_governor(struct cpuidle_governor *gov); /* sysfs */ + +struct device; + extern int cpuidle_add_interface(struct device *dev); extern void cpuidle_remove_interface(struct device *dev); extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index f15c1e5..49b1f4bc 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "cpuidle.h" -- cgit v0.10.2 From 42f67f2acab2b7179c0d1ab234869e391448dfa6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 16:44:45 +0000 Subject: cpuidle: move driver's refcount to cpuidle We want to support different cpuidle drivers co-existing together. In this case we should move the refcount to the cpuidle_driver structure to handle several drivers at a time. Signed-off-by: Daniel Lezcano Acked-by: Peter De Schrijver Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 87db387..39ba8e1 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,7 +16,6 @@ static struct cpuidle_driver *cpuidle_curr_driver; DEFINE_SPINLOCK(cpuidle_driver_lock); -int cpuidle_driver_refcount; static void set_power_states(struct cpuidle_driver *drv) { @@ -61,6 +60,8 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) if (!drv->power_specified) set_power_states(drv); + drv->refcnt = 0; + cpuidle_curr_driver = drv; spin_unlock(&cpuidle_driver_lock); @@ -92,7 +93,7 @@ void cpuidle_unregister_driver(struct cpuidle_driver *drv) spin_lock(&cpuidle_driver_lock); - if (!WARN_ON(cpuidle_driver_refcount > 0)) + if (!WARN_ON(drv->refcnt > 0)) cpuidle_curr_driver = NULL; spin_unlock(&cpuidle_driver_lock); @@ -106,7 +107,7 @@ struct cpuidle_driver *cpuidle_driver_ref(void) spin_lock(&cpuidle_driver_lock); drv = cpuidle_curr_driver; - cpuidle_driver_refcount++; + drv->refcnt++; spin_unlock(&cpuidle_driver_lock); return drv; @@ -114,10 +115,12 @@ struct cpuidle_driver *cpuidle_driver_ref(void) void cpuidle_driver_unref(void) { + struct cpuidle_driver *drv = cpuidle_curr_driver; + spin_lock(&cpuidle_driver_lock); - if (!WARN_ON(cpuidle_driver_refcount <= 0)) - cpuidle_driver_refcount--; + if (drv && !WARN_ON(drv->refcnt <= 0)) + drv->refcnt--; spin_unlock(&cpuidle_driver_lock); } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 7daf0e3..d08e1af 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -124,6 +124,7 @@ static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) struct cpuidle_driver { const char *name; struct module *owner; + int refcnt; unsigned int power_specified:1; /* set to 1 to use the core cpuidle time keeping (for all states). */ -- cgit v0.10.2 From 41682032715c2c969357c81391a442a24dd1c2c2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 16:44:46 +0000 Subject: cpuidle: move driver checking within the lock section The code is racy and the check with cpuidle_curr_driver should be done under the lock. I don't find a path in the different drivers where that could happen because the arch specific drivers are written in such way it is not possible to register a driver while it is unregistered, except maybe in a very improbable case when "intel_idle" and "processor_idle" are competing. One could unregister a driver, while the other one is registering. Signed-off-by: Daniel Lezcano Acked-by: Peter De Schrijver Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 39ba8e1..3e59075 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -85,17 +85,9 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { - if (drv != cpuidle_curr_driver) { - WARN(1, "invalid cpuidle_unregister_driver(%s)\n", - drv->name); - return; - } - spin_lock(&cpuidle_driver_lock); - - if (!WARN_ON(drv->refcnt > 0)) + if (drv == cpuidle_curr_driver && !WARN_ON(drv->refcnt > 0)) cpuidle_curr_driver = NULL; - spin_unlock(&cpuidle_driver_lock); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); -- cgit v0.10.2 From 13dd52f11a04e616900f565d6a1e5138e58d579f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 16:44:47 +0000 Subject: cpuidle: prepare the cpuidle core to handle multiple drivers This patch is a preparation for the multiple cpuidle drivers support. As the next patch will introduce the multiple drivers with the Kconfig option and we want to keep the code clean and understandable, this patch defines a set of functions for encapsulating some common parts and splits what should be done under a lock from the rest. [rjw: Modified the subject and changelog slightly.] Signed-off-by: Daniel Lezcano Acked-by: Peter De Schrijver Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 3e59075..8246662 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -39,11 +39,20 @@ static void set_power_states(struct cpuidle_driver *drv) drv->states[i].power_usage = -1 - i; } -/** - * cpuidle_register_driver - registers a driver - * @drv: the driver - */ -int cpuidle_register_driver(struct cpuidle_driver *drv) +static void __cpuidle_driver_init(struct cpuidle_driver *drv) +{ + drv->refcnt = 0; + + if (!drv->power_specified) + set_power_states(drv); +} + +static void cpuidle_set_driver(struct cpuidle_driver *drv) +{ + cpuidle_curr_driver = drv; +} + +static int __cpuidle_register_driver(struct cpuidle_driver *drv) { if (!drv || !drv->state_count) return -EINVAL; @@ -51,22 +60,38 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) if (cpuidle_disabled()) return -ENODEV; - spin_lock(&cpuidle_driver_lock); - if (cpuidle_curr_driver) { - spin_unlock(&cpuidle_driver_lock); + if (cpuidle_get_driver()) return -EBUSY; - } - if (!drv->power_specified) - set_power_states(drv); + __cpuidle_driver_init(drv); - drv->refcnt = 0; + cpuidle_set_driver(drv); - cpuidle_curr_driver = drv; + return 0; +} + +static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) +{ + if (drv != cpuidle_get_driver()) + return; + + if (!WARN_ON(drv->refcnt > 0)) + cpuidle_set_driver(NULL); +} +/** + * cpuidle_register_driver - registers a driver + * @drv: the driver + */ +int cpuidle_register_driver(struct cpuidle_driver *drv) +{ + int ret; + + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv); spin_unlock(&cpuidle_driver_lock); - return 0; + return ret; } EXPORT_SYMBOL_GPL(cpuidle_register_driver); @@ -86,8 +111,7 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver); void cpuidle_unregister_driver(struct cpuidle_driver *drv) { spin_lock(&cpuidle_driver_lock); - if (drv == cpuidle_curr_driver && !WARN_ON(drv->refcnt > 0)) - cpuidle_curr_driver = NULL; + __cpuidle_unregister_driver(drv); spin_unlock(&cpuidle_driver_lock); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); @@ -98,7 +122,7 @@ struct cpuidle_driver *cpuidle_driver_ref(void) spin_lock(&cpuidle_driver_lock); - drv = cpuidle_curr_driver; + drv = cpuidle_get_driver(); drv->refcnt++; spin_unlock(&cpuidle_driver_lock); @@ -107,7 +131,7 @@ struct cpuidle_driver *cpuidle_driver_ref(void) void cpuidle_driver_unref(void) { - struct cpuidle_driver *drv = cpuidle_curr_driver; + struct cpuidle_driver *drv = cpuidle_get_driver(); spin_lock(&cpuidle_driver_lock); -- cgit v0.10.2 From bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 16:44:48 +0000 Subject: cpuidle: support multiple drivers With the tegra3 and the big.LITTLE [1] new architectures, several cpus with different characteristics (latencies and states) can co-exists on the system. The cpuidle framework has the limitation of handling only identical cpus. This patch removes this limitation by introducing the multiple driver support for cpuidle. This option is configurable at compile time and should be enabled for the architectures mentioned above. So there is no impact for the other platforms if the option is disabled. The option defaults to 'n'. Note the multiple drivers support is also compatible with the existing drivers, even if just one driver is needed, all the cpu will be tied to this driver using an extra small chunk of processor memory. The multiple driver support use a per-cpu driver pointer instead of a global variable and the accessor to this variable are done from a cpu context. In order to keep the compatibility with the existing drivers, the function 'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register the specified driver for all the cpus. The semantic for the output of /sys/devices/system/cpu/cpuidle/current_driver remains the same except the driver name will be related to the current cpu. The /sys/devices/system/cpu/cpu[0-9]/cpuidle/driver/name files are added allowing to read the per cpu driver name. [1] http://lwn.net/Articles/481055/ Signed-off-by: Daniel Lezcano Acked-by: Peter De Schrijver Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index a76b689..234ae65 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -9,6 +9,15 @@ config CPU_IDLE If you're using an ACPI-enabled platform, you should say Y here. +config CPU_IDLE_MULTIPLE_DRIVERS + bool "Support multiple cpuidle drivers" + depends on CPU_IDLE + default n + help + Allows the cpuidle framework to use different drivers for each CPU. + This is useful if you have a system with different CPU latencies and + states. If unsure say N. + config CPU_IDLE_GOV_LADDER bool depends on CPU_IDLE diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index ce4cac7..711dd83 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -68,7 +68,7 @@ static cpuidle_enter_t cpuidle_enter_ops; int cpuidle_play_dead(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int i, dead_state = -1; int power_usage = -1; @@ -128,7 +128,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; int next_state, entered_state; if (off) @@ -141,6 +141,8 @@ int cpuidle_idle_call(void) if (!dev || !dev->enabled) return -EBUSY; + drv = cpuidle_get_cpu_driver(dev); + /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(drv, dev); if (need_resched()) { @@ -312,15 +314,19 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} int cpuidle_enable_device(struct cpuidle_device *dev) { int ret, i; - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; if (!dev) return -EINVAL; if (dev->enabled) return 0; + + drv = cpuidle_get_cpu_driver(dev); + if (!drv || !cpuidle_curr_governor) return -EIO; + if (!dev->state_count) dev->state_count = drv->state_count; @@ -335,7 +341,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) poll_idle_init(drv); - if ((ret = cpuidle_add_state_sysfs(dev))) + ret = cpuidle_add_device_sysfs(dev); + if (ret) return ret; if (cpuidle_curr_governor->enable && @@ -356,7 +363,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return 0; fail_sysfs: - cpuidle_remove_state_sysfs(dev); + cpuidle_remove_device_sysfs(dev); return ret; } @@ -372,17 +379,20 @@ EXPORT_SYMBOL_GPL(cpuidle_enable_device); */ void cpuidle_disable_device(struct cpuidle_device *dev) { + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + if (!dev || !dev->enabled) return; - if (!cpuidle_get_driver() || !cpuidle_curr_governor) + + if (!drv || !cpuidle_curr_governor) return; dev->enabled = 0; if (cpuidle_curr_governor->disable) - cpuidle_curr_governor->disable(cpuidle_get_driver(), dev); + cpuidle_curr_governor->disable(drv, dev); - cpuidle_remove_state_sysfs(dev); + cpuidle_remove_device_sysfs(dev); enabled_devices--; } @@ -398,9 +408,9 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device); static int __cpuidle_register_device(struct cpuidle_device *dev) { int ret; - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - if (!try_module_get(cpuidle_driver->owner)) + if (!try_module_get(drv->owner)) return -EINVAL; per_cpu(cpuidle_devices, dev->cpu) = dev; @@ -421,7 +431,7 @@ err_coupled: err_sysfs: list_del(&dev->device_list); per_cpu(cpuidle_devices, dev->cpu) = NULL; - module_put(cpuidle_driver->owner); + module_put(drv->owner); return ret; } @@ -460,7 +470,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); if (dev->registered == 0) return; @@ -477,7 +487,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_resume_and_unlock(); - module_put(cpuidle_driver->owner); + module_put(drv->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index f6b0923..ee97e96 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -28,8 +28,8 @@ struct device; extern int cpuidle_add_interface(struct device *dev); extern void cpuidle_remove_interface(struct device *dev); -extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); -extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device); +extern int cpuidle_add_device_sysfs(struct cpuidle_device *device); +extern void cpuidle_remove_device_sysfs(struct cpuidle_device *device); extern int cpuidle_add_sysfs(struct cpuidle_device *dev); extern void cpuidle_remove_sysfs(struct cpuidle_device *dev); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 8246662..3af841f 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -14,9 +14,11 @@ #include "cpuidle.h" -static struct cpuidle_driver *cpuidle_curr_driver; DEFINE_SPINLOCK(cpuidle_driver_lock); +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu); +static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu); + static void set_power_states(struct cpuidle_driver *drv) { int i; @@ -47,12 +49,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) set_power_states(drv); } -static void cpuidle_set_driver(struct cpuidle_driver *drv) -{ - cpuidle_curr_driver = drv; -} - -static int __cpuidle_register_driver(struct cpuidle_driver *drv) +static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) { if (!drv || !drv->state_count) return -EINVAL; @@ -60,23 +57,84 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv) if (cpuidle_disabled()) return -ENODEV; - if (cpuidle_get_driver()) + if (__cpuidle_get_cpu_driver(cpu)) return -EBUSY; __cpuidle_driver_init(drv); - cpuidle_set_driver(drv); + __cpuidle_set_cpu_driver(drv, cpu); return 0; } -static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) +static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu) { - if (drv != cpuidle_get_driver()) + if (drv != __cpuidle_get_cpu_driver(cpu)) return; if (!WARN_ON(drv->refcnt > 0)) - cpuidle_set_driver(NULL); + __cpuidle_set_cpu_driver(NULL, cpu); +} + +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS + +static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers); + +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + per_cpu(cpuidle_drivers, cpu) = drv; +} + +static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +{ + return per_cpu(cpuidle_drivers, cpu); +} + +static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv) +{ + int cpu; + for_each_present_cpu(cpu) + __cpuidle_unregister_driver(drv, cpu); +} + +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv) +{ + int ret = 0; + int i, cpu; + + for_each_present_cpu(cpu) { + ret = __cpuidle_register_driver(drv, cpu); + if (ret) + break; + } + + if (ret) + for_each_present_cpu(i) { + if (i == cpu) + break; + __cpuidle_unregister_driver(drv, i); + } + + + return ret; +} + +int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + int ret; + + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + spin_lock(&cpuidle_driver_lock); + __cpuidle_unregister_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); } /** @@ -88,7 +146,7 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) int ret; spin_lock(&cpuidle_driver_lock); - ret = __cpuidle_register_driver(drv); + ret = __cpuidle_register_all_cpu_driver(drv); spin_unlock(&cpuidle_driver_lock); return ret; @@ -96,13 +154,48 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) EXPORT_SYMBOL_GPL(cpuidle_register_driver); /** - * cpuidle_get_driver - return the current driver + * cpuidle_unregister_driver - unregisters a driver + * @drv: the driver */ -struct cpuidle_driver *cpuidle_get_driver(void) +void cpuidle_unregister_driver(struct cpuidle_driver *drv) +{ + spin_lock(&cpuidle_driver_lock); + __cpuidle_unregister_all_cpu_driver(drv); + spin_unlock(&cpuidle_driver_lock); +} +EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); + +#else + +static struct cpuidle_driver *cpuidle_curr_driver; + +static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + cpuidle_curr_driver = drv; +} + +static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) { return cpuidle_curr_driver; } -EXPORT_SYMBOL_GPL(cpuidle_get_driver); + +/** + * cpuidle_register_driver - registers a driver + * @drv: the driver + */ +int cpuidle_register_driver(struct cpuidle_driver *drv) +{ + int ret, cpu; + + cpu = get_cpu(); + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + put_cpu(); + + return ret; +} +EXPORT_SYMBOL_GPL(cpuidle_register_driver); /** * cpuidle_unregister_driver - unregisters a driver @@ -110,11 +203,50 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { + int cpu; + + cpu = get_cpu(); spin_lock(&cpuidle_driver_lock); - __cpuidle_unregister_driver(drv); + __cpuidle_unregister_driver(drv, cpu); spin_unlock(&cpuidle_driver_lock); + put_cpu(); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); +#endif + +/** + * cpuidle_get_driver - return the current driver + */ +struct cpuidle_driver *cpuidle_get_driver(void) +{ + struct cpuidle_driver *drv; + int cpu; + + cpu = get_cpu(); + drv = __cpuidle_get_cpu_driver(cpu); + put_cpu(); + + return drv; +} +EXPORT_SYMBOL_GPL(cpuidle_get_driver); + +/** + * cpuidle_get_cpu_driver - return the driver tied with a cpu + */ +struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) +{ + struct cpuidle_driver *drv; + + if (!dev) + return NULL; + + spin_lock(&cpuidle_driver_lock); + drv = __cpuidle_get_cpu_driver(dev->cpu); + spin_unlock(&cpuidle_driver_lock); + + return drv; +} +EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver); struct cpuidle_driver *cpuidle_driver_ref(void) { diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 49b1f4bc..3409429 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -364,17 +364,17 @@ static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) } /** - * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes + * cpuidle_add_state_sysfs - adds cpuidle states sysfs attributes * @device: the target device */ -int cpuidle_add_state_sysfs(struct cpuidle_device *device) +static int cpuidle_add_state_sysfs(struct cpuidle_device *device) { int i, ret = -ENOMEM; struct cpuidle_state_kobj *kobj; - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ - for (i = 0; i < device->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); if (!kobj) goto error_state; @@ -401,10 +401,10 @@ error_state: } /** - * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes + * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes * @device: the target device */ -void cpuidle_remove_state_sysfs(struct cpuidle_device *device) +static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) { int i; @@ -412,6 +412,168 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device) cpuidle_free_state_kobj(device, i); } +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS +#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj) +#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr) + +#define define_one_driver_ro(_name, show) \ + static struct cpuidle_driver_attr attr_driver_##_name = \ + __ATTR(_name, 0644, show, NULL) + +struct cpuidle_driver_kobj { + struct cpuidle_driver *drv; + struct completion kobj_unregister; + struct kobject kobj; +}; + +struct cpuidle_driver_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_driver *, char *); + ssize_t (*store)(struct cpuidle_driver *, const char *, size_t); +}; + +static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf) +{ + ssize_t ret; + + spin_lock(&cpuidle_driver_lock); + ret = sprintf(buf, "%s\n", drv ? drv->name : "none"); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +static void cpuidle_driver_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + complete(&driver_kobj->kobj_unregister); +} + +static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr, + char * buf) +{ + int ret = -EIO; + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr); + + if (dattr->show) + ret = dattr->show(driver_kobj->drv, buf); + + return ret; +} + +static ssize_t cpuidle_driver_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + int ret = -EIO; + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr); + + if (dattr->store) + ret = dattr->store(driver_kobj->drv, buf, size); + + return ret; +} + +define_one_driver_ro(name, show_driver_name); + +static const struct sysfs_ops cpuidle_driver_sysfs_ops = { + .show = cpuidle_driver_show, + .store = cpuidle_driver_store, +}; + +static struct attribute *cpuidle_driver_default_attrs[] = { + &attr_driver_name.attr, + NULL +}; + +static struct kobj_type ktype_driver_cpuidle = { + .sysfs_ops = &cpuidle_driver_sysfs_ops, + .default_attrs = cpuidle_driver_default_attrs, + .release = cpuidle_driver_sysfs_release, +}; + +/** + * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute + * @device: the target device + */ +static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_driver_kobj *kdrv; + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int ret; + + kdrv = kzalloc(sizeof(*kdrv), GFP_KERNEL); + if (!kdrv) + return -ENOMEM; + + kdrv->drv = drv; + init_completion(&kdrv->kobj_unregister); + + ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle, + &dev->kobj, "driver"); + if (ret) { + kfree(kdrv); + return ret; + } + + kobject_uevent(&kdrv->kobj, KOBJ_ADD); + dev->kobj_driver = kdrv; + + return ret; +} + +/** + * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute + * @device: the target device + */ +static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_driver_kobj *kdrv = dev->kobj_driver; + kobject_put(&kdrv->kobj); + wait_for_completion(&kdrv->kobj_unregister); + kfree(kdrv); +} +#else +static inline int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) +{ + return 0; +} + +static inline void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) +{ + ; +} +#endif + +/** + * cpuidle_add_device_sysfs - adds device specific sysfs attributes + * @device: the target device + */ +int cpuidle_add_device_sysfs(struct cpuidle_device *device) +{ + int ret; + + ret = cpuidle_add_state_sysfs(device); + if (ret) + return ret; + + ret = cpuidle_add_driver_sysfs(device); + if (ret) + cpuidle_remove_state_sysfs(device); + return ret; +} + +/** + * cpuidle_remove_device_sysfs : removes device specific sysfs attributes + * @device : the target device + */ +void cpuidle_remove_device_sysfs(struct cpuidle_device *device) +{ + cpuidle_remove_driver_sysfs(device); + cpuidle_remove_state_sysfs(device); +} + /** * cpuidle_add_sysfs - creates a sysfs instance for the target device * @dev: the target device diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d08e1af..3711b34 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -91,7 +91,7 @@ struct cpuidle_device { int state_count; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; - + struct cpuidle_driver_kobj *kobj_driver; struct list_head device_list; struct kobject kobj; struct completion kobj_unregister; @@ -157,6 +157,10 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)); extern int cpuidle_play_dead(void); +extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu); +extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu); + #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } @@ -183,7 +187,6 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)) { return -ENODEV; } static inline int cpuidle_play_dead(void) {return -ENODEV; } - #endif #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED -- cgit v0.10.2 From a093b93ee0e08cd73a07848752bc09ecea68cb13 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Fri, 23 Nov 2012 00:05:03 +0100 Subject: cpuidle: fix a suspicious RCU usage in menu governor I saw this suspicious RCU usage on the next tree of 11/15 [ 67.123404] =============================== [ 67.123413] [ INFO: suspicious RCU usage. ] [ 67.123423] 3.7.0-rc5-next-20121115-dirty #1 Not tainted [ 67.123434] ------------------------------- [ 67.123444] include/trace/events/timer.h:186 suspicious rcu_dereference_check() usage! [ 67.123458] [ 67.123458] other info that might help us debug this: [ 67.123458] [ 67.123474] [ 67.123474] RCU used illegally from idle CPU! [ 67.123474] rcu_scheduler_active = 1, debug_locks = 0 [ 67.123493] RCU used illegally from extended quiescent state! [ 67.123507] 1 lock held by swapper/1/0: [ 67.123516] #0: (&cpu_base->lock){-.-...}, at: [] .__hrtimer_start_range_ns+0x28c/0x524 [ 67.123555] [ 67.123555] stack backtrace: [ 67.123566] Call Trace: [ 67.123576] [c0000001e2ccb920] [c00000000001275c] .show_stack+0x78/0x184 (unreliable) [ 67.123599] [c0000001e2ccb9d0] [c0000000000c15a0] .lockdep_rcu_suspicious+0x120/0x148 [ 67.123619] [c0000001e2ccba70] [c00000000009601c] .enqueue_hrtimer+0x1c0/0x1c8 [ 67.123639] [c0000001e2ccbb00] [c000000000097aa0] .__hrtimer_start_range_ns+0x37c/0x524 [ 67.123660] [c0000001e2ccbc20] [c0000000005c9698] .menu_select+0x508/0x5bc [ 67.123678] [c0000001e2ccbd20] [c0000000005c740c] .cpuidle_idle_call+0xa8/0x6e4 [ 67.123699] [c0000001e2ccbdd0] [c0000000000459a0] .pSeries_idle+0x10/0x34 [ 67.123717] [c0000001e2ccbe40] [c000000000014dc8] .cpu_idle+0x130/0x280 [ 67.123738] [c0000001e2ccbee0] [c0000000006ffa8c] .start_secondary+0x378/0x384 [ 67.123758] [c0000001e2ccbf90] [c00000000000936c] .start_secondary_prolog+0x10/0x14 hrtimer_start was added in 198fd638 and ae515197. The patch below tries to use RCU_NONIDLE around it to avoid the above report. Signed-off-by: Li Zhong Acked-by: Paul E. McKenney Reviewed-by: Rik van Riel Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 2efee27..bd40b94 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -407,8 +407,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) perfect_us = perfect_cstate_ms * 1000; if (repeat && (4 * timer_us < data->expected_us)) { - hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED); + RCU_NONIDLE(hrtimer_start(hrtmr, + ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED)); /* In repeat case, menu hrtimer is started */ per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; } else if (perfect_us < data->expected_us) { @@ -418,8 +419,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * In that case, it makes sense to re-enter * into a deeper C-state after some time. */ - hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED); + RCU_NONIDLE(hrtimer_start(hrtmr, + ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED)); /* In general case, menu hrtimer is started */ per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; } -- cgit v0.10.2 From a474a515497ef3566cfc17a2cab3d54d6d50ff1c Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 27 Nov 2012 14:17:58 +0100 Subject: cpuidle: Measure idle state durations with monotonic clock Many cpuidle drivers measure their time spent in an idle state by reading the wallclock time before and after idling and calculating the difference. This leads to erroneous results when the wallclock time gets updated by another processor in the meantime, adding that clock adjustment to the idle state's time counter. If the clock adjustment was negative, the result is even worse due to an erroneous cast from int to unsigned long long of the last_residency variable. The negative 32 bit integer will zero-extend and result in a forward time jump of roughly four billion milliseconds or 1.3 hours on the idle state residency counter. This patch changes all affected cpuidle drivers to either use the monotonic clock for their measurements or make use of the generic time measurement wrapper in cpuidle.c, which was already working correctly. Some superfluous CLIs/STIs in the ACPI code are removed (interrupts should always already be disabled before entering the idle function, and not get reenabled until the generic wrapper has performed its second measurement). It also removes the erroneous cast, making sure that negative residency values are applied correctly even though they should not appear anymore. Signed-off-by: Julius Werner Reviewed-by: Preeti U Murthy Tested-by: Daniel Lezcano Acked-by: Daniel Lezcano Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 45d00e5..4d806b4 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -36,7 +36,7 @@ static struct cpuidle_state *cpuidle_state_table; static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before) { - *kt_before = ktime_get_real(); + *kt_before = ktime_get(); *in_purr = mfspr(SPRN_PURR); /* * Indicate to the HV that we are idle. Now would be @@ -50,7 +50,7 @@ static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before) get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr; get_lppaca()->idle = 0; - return ktime_to_us(ktime_sub(ktime_get_real(), kt_before)); + return ktime_to_us(ktime_sub(ktime_get(), kt_before)); } static int snooze_loop(struct cpuidle_device *dev, diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e8086c7..f1a5da4 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -735,31 +735,18 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - ktime_t kt1, kt2; - s64 idle_time; struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); pr = __this_cpu_read(processors); - dev->last_residency = 0; if (unlikely(!pr)) return -EINVAL; - local_irq_disable(); - - lapic_timer_state_broadcast(pr, cx, 1); - kt1 = ktime_get_real(); acpi_idle_do_entry(cx); - kt2 = ktime_get_real(); - idle_time = ktime_to_us(ktime_sub(kt2, kt1)); - - /* Update device last_residency*/ - dev->last_residency = (int)idle_time; - local_irq_enable(); lapic_timer_state_broadcast(pr, cx, 0); return index; @@ -806,19 +793,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); - ktime_t kt1, kt2; - s64 idle_time_ns; - s64 idle_time; pr = __this_cpu_read(processors); - dev->last_residency = 0; if (unlikely(!pr)) return -EINVAL; - local_irq_disable(); - - if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -829,7 +809,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; - local_irq_enable(); return -EINVAL; } } @@ -843,22 +822,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); - kt1 = ktime_get_real(); /* Tell the scheduler that we are going deep-idle: */ sched_clock_idle_sleep_event(); acpi_idle_do_entry(cx); - kt2 = ktime_get_real(); - idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1)); - idle_time = idle_time_ns; - do_div(idle_time, NSEC_PER_USEC); - /* Update device last_residency*/ - dev->last_residency = (int)idle_time; + sched_clock_idle_wakeup_event(0); - /* Tell the scheduler how much we idled: */ - sched_clock_idle_wakeup_event(idle_time_ns); - - local_irq_enable(); if (cx->entry_method != ACPI_CSTATE_FFH) current_thread_info()->status |= TS_POLLING; @@ -883,13 +852,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); - ktime_t kt1, kt2; - s64 idle_time_ns; - s64 idle_time; - pr = __this_cpu_read(processors); - dev->last_residency = 0; if (unlikely(!pr)) return -EINVAL; @@ -899,16 +863,11 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, return drv->states[drv->safe_state_index].enter(dev, drv, drv->safe_state_index); } else { - local_irq_disable(); acpi_safe_halt(); - local_irq_enable(); return -EBUSY; } } - local_irq_disable(); - - if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -919,7 +878,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; - local_irq_enable(); return -EINVAL; } } @@ -934,7 +892,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, */ lapic_timer_state_broadcast(pr, cx, 1); - kt1 = ktime_get_real(); /* * disable bus master * bm_check implies we need ARB_DIS @@ -965,18 +922,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, c3_cpu_count--; raw_spin_unlock(&c3_lock); } - kt2 = ktime_get_real(); - idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1)); - idle_time = idle_time_ns; - do_div(idle_time, NSEC_PER_USEC); - - /* Update device last_residency*/ - dev->last_residency = (int)idle_time; - /* Tell the scheduler how much we idled: */ - sched_clock_idle_wakeup_event(idle_time_ns); + sched_clock_idle_wakeup_event(0); - local_irq_enable(); if (cx->entry_method != ACPI_CSTATE_FFH) current_thread_info()->status |= TS_POLLING; @@ -987,6 +935,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct cpuidle_driver acpi_idle_driver = { .name = "acpi_idle", .owner = THIS_MODULE, + .en_core_tk_irqen = 1, }; /** diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 711dd83..8df53dd 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -109,8 +109,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, /* This can be moved to within driver enter routine * but that results in multiple copies of same code. */ - dev->states_usage[entered_state].time += - (unsigned long long)dev->last_residency; + dev->states_usage[entered_state].time += dev->last_residency; dev->states_usage[entered_state].usage++; } else { dev->last_residency = 0; diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b0f6b4c..c49c04d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -56,7 +56,6 @@ #include #include #include -#include /* ktime_get_real() */ #include #include #include @@ -72,6 +71,7 @@ static struct cpuidle_driver intel_idle_driver = { .name = "intel_idle", .owner = THIS_MODULE, + .en_core_tk_irqen = 1, }; /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; @@ -281,8 +281,6 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); unsigned int cstate; - ktime_t kt_before, kt_after; - s64 usec_delta; int cpu = smp_processor_id(); cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; @@ -297,8 +295,6 @@ static int intel_idle(struct cpuidle_device *dev, if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); - kt_before = ktime_get_real(); - stop_critical_timings(); if (!need_resched()) { @@ -310,17 +306,9 @@ static int intel_idle(struct cpuidle_device *dev, start_critical_timings(); - kt_after = ktime_get_real(); - usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before)); - - local_irq_enable(); - if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - /* Update cpuidle counters */ - dev->last_residency = (int)usec_delta; - return index; } -- cgit v0.10.2