From ddbd550d503c9cdefcd6674a0ef168d57d3f0917 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 13 Dec 2010 18:28:22 -0500 Subject: intel_idle: update Sandy Bridge core C-state residency targets Signed-off-by: Len Brown diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c131d58..94a6526 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -122,7 +122,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x00, .flags = CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, - .target_residency = 4, + .target_residency = 1, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "SNB-C3", @@ -130,7 +130,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x10, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, - .target_residency = 160, + .target_residency = 211, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "SNB-C6", @@ -138,7 +138,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x20, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, - .target_residency = 208, + .target_residency = 345, .enter = &intel_idle }, { /* MWAIT C4 */ .name = "SNB-C7", @@ -146,7 +146,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x30, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, - .target_residency = 300, + .target_residency = 345, .enter = &intel_idle }, }; -- cgit v0.10.2 From d8c216cfa57e8a579f41729cbb88c97835d9ac8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Jan 2011 00:29:20 +0100 Subject: cpuidle: Make cpuidle_enable_device() call poll_idle_init() The following scenario is possible with the current cpuidle code and the ACPI cpuidle driver: (1) acpi_processor_cst_has_changed() is called, (2) cpuidle_disable_device() is called, (3) cpuidle_remove_state_sysfs() is called to remove the (presumably outdated) states info from sysfs, (3) acpi_processor_get_power_info() is called, the first entry in the pr->power.states[] table is filled with zeros, (4) acpi_processor_setup_cpuidle() is called and it doesn't fill the first entry in pr->power.states[], (5) cpuidle_enable_device() is called, (6) __cpuidle_register_device() is _not_ called, since the device has already been registered, (7) Consequently, poll_idle_init() is _not_ called either, (8) cpuidle_add_state_sysfs() is called to create the sysfs attributes for the new states and it uses the bogus first table entry from acpi_processor_get_power_info() for creating state0. This problem is avoided if cpuidle_enable_device() unconditionally calls poll_idle_init(). Reported-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown cc: stable@kernel.org diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a507108..97df791 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -154,6 +154,45 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); +#ifdef CONFIG_ARCH_HAS_CPU_RELAX +static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) +{ + ktime_t t1, t2; + s64 diff; + int ret; + + t1 = ktime_get(); + local_irq_enable(); + while (!need_resched()) + cpu_relax(); + + t2 = ktime_get(); + diff = ktime_to_us(ktime_sub(t2, t1)); + if (diff > INT_MAX) + diff = INT_MAX; + + ret = (int) diff; + return ret; +} + +static void poll_idle_init(struct cpuidle_device *dev) +{ + struct cpuidle_state *state = &dev->states[0]; + + cpuidle_set_statedata(state, NULL); + + snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); + state->exit_latency = 0; + state->target_residency = 0; + state->power_usage = -1; + state->flags = CPUIDLE_FLAG_POLL; + state->enter = poll_idle; +} +#else +static void poll_idle_init(struct cpuidle_device *dev) {} +#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ + /** * cpuidle_enable_device - enables idle PM for a CPU * @dev: the CPU @@ -178,6 +217,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return ret; } + poll_idle_init(dev); + if ((ret = cpuidle_add_state_sysfs(dev))) return ret; @@ -232,45 +273,6 @@ void cpuidle_disable_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_disable_device); -#ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) -{ - ktime_t t1, t2; - s64 diff; - int ret; - - t1 = ktime_get(); - local_irq_enable(); - while (!need_resched()) - cpu_relax(); - - t2 = ktime_get(); - diff = ktime_to_us(ktime_sub(t2, t1)); - if (diff > INT_MAX) - diff = INT_MAX; - - ret = (int) diff; - return ret; -} - -static void poll_idle_init(struct cpuidle_device *dev) -{ - struct cpuidle_state *state = &dev->states[0]; - - cpuidle_set_statedata(state, NULL); - - snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); - snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); - state->exit_latency = 0; - state->target_residency = 0; - state->power_usage = -1; - state->flags = CPUIDLE_FLAG_POLL; - state->enter = poll_idle; -} -#else -static void poll_idle_init(struct cpuidle_device *dev) {} -#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ - /** * __cpuidle_register_device - internal register function called before register * and enable routines @@ -291,8 +293,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) init_completion(&dev->kobj_unregister); - poll_idle_init(dev); - /* * cpuidle driver should set the dev->power_specified bit * before registering the device if the driver provides -- cgit v0.10.2 From d18960494f65ca4fa0d67c865aaca99452070d15 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 3 Nov 2010 17:06:14 +0100 Subject: ACPI, intel_idle: Cleanup idle= internal variables Having four variables for the same thing: idle_halt, idle_nomwait, force_mwait and boot_option_idle_overrides is rather confusing and unnecessary complex. if idle= boot param is passed, only set up one variable: boot_option_idle_overrides Introduces following functional changes/fixes: - intel_idle driver does not register if any idle=xy boot param is passed. - processor_idle.c will also not register a cpuidle driver and get active if idle=halt is passed. Before a cpuidle driver with one (C1, halt) state got registered Now the default_idle function will be used which finally uses the same idle call to enter sleep state (safe_halt()), but without registering a whole cpuidle driver. That means idle= param will always avoid cpuidle drivers to register with one exception (same behavior as before): idle=nomwait may still register acpi_idle cpuidle driver, but C1 will not use mwait, but hlt. This can be a workaround for IO based deeper sleep states where C1 mwait causes problems. Signed-off-by: Thomas Renninger cc: x86@kernel.org Signed-off-by: Len Brown diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 348e44d..03afe79 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -717,8 +717,9 @@ prefetchw (const void *x) #define spin_lock_prefetch(x) prefetchw(x) extern unsigned long boot_option_idle_override; -extern unsigned long idle_halt; -extern unsigned long idle_nomwait; + +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT, + IDLE_NOMWAIT, IDLE_POLL}; #endif /* !__ASSEMBLY__ */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 16f1c7b..6d33c5c 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -53,12 +53,8 @@ void (*ia64_mark_idle)(int); -unsigned long boot_option_idle_override = 0; +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -unsigned long idle_halt; -EXPORT_SYMBOL(idle_halt); -unsigned long idle_nomwait; -EXPORT_SYMBOL(idle_nomwait); void (*pm_idle) (void); EXPORT_SYMBOL(pm_idle); void (*pm_power_off) (void); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cae9c3c..b79bd98 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -762,10 +762,11 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; -extern unsigned long idle_halt; -extern unsigned long idle_nomwait; extern bool c1e_detected; +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, + IDLE_POLL, IDLE_FORCE_MWAIT}; + extern void enable_sep_cpu(void); extern int sysenter_setup(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868..b647215 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -22,11 +22,6 @@ #include #include -unsigned long idle_halt; -EXPORT_SYMBOL(idle_halt); -unsigned long idle_nomwait; -EXPORT_SYMBOL(idle_nomwait); - struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -328,7 +323,7 @@ long sys_execve(const char __user *name, /* * Idle related variables and functions */ -unsigned long boot_option_idle_override = 0; +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); /* @@ -499,7 +494,6 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ -static int __cpuinitdata force_mwait; #define MWAIT_INFO 0x05 #define MWAIT_ECX_EXTENDED_INFO 0x01 @@ -509,7 +503,7 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - if (force_mwait) + if (boot_option_idle_override == IDLE_FORCE_MWAIT) return 1; if (c->cpuid_level < MWAIT_INFO) @@ -629,9 +623,10 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else if (!strcmp(str, "halt")) { + boot_option_idle_override = IDLE_POLL; + } else if (!strcmp(str, "mwait")) { + boot_option_idle_override = IDLE_FORCE_MWAIT; + } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is * forced to be used for CPU idle. In such case CPU C2/C3 @@ -640,8 +635,7 @@ static int __init idle_setup(char *str) * the boot_option_idle_override. */ pm_idle = default_idle; - idle_halt = 1; - return 0; + boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* * If the boot option of "idle=nomwait" is added, @@ -649,12 +643,10 @@ static int __init idle_setup(char *str) * states. In such case it won't touch the variable * of boot_option_idle_override. */ - idle_nomwait = 1; - return 0; + boot_option_idle_override = IDLE_NOMWAIT; } else return -1; - boot_option_idle_override = 1; return 0; } early_param("idle", idle_setup); diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index bec561c..3c1a2fe 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -23,7 +23,7 @@ static int set_no_mwait(const struct dmi_system_id *id) { printk(KERN_NOTICE PREFIX "%s detected - " "disabling mwait for CPU C-states\n", id->ident); - idle_nomwait = 1; + boot_option_idle_override = IDLE_NOMWAIT; return 0; } @@ -283,7 +283,7 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in) { acpi_status status = AE_OK; - if (idle_nomwait) { + if (boot_option_idle_override == IDLE_NOMWAIT) { /* * If mwait is disabled for CPU C-states, the C2C3_FFH access * mode will be disabled in the parameter of _PDC object. diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index dcb38f8..eefd4aa 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -79,6 +79,13 @@ module_param(bm_check_disable, uint, 0000); static unsigned int latency_factor __read_mostly = 2; module_param(latency_factor, uint, 0644); +static int disabled_by_idle_boot_param(void) +{ + return boot_option_idle_override == IDLE_POLL || + boot_option_idle_override == IDLE_FORCE_MWAIT || + boot_option_idle_override == IDLE_HALT; +} + /* * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. * For now disable this. Probably a bug somewhere else. @@ -455,7 +462,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) continue; } if (cx.type == ACPI_STATE_C1 && - (idle_halt || idle_nomwait)) { + (boot_option_idle_override == IDLE_NOMWAIT)) { /* * In most cases the C1 space_id obtained from * _CST object is FIXED_HARDWARE access mode. @@ -1058,7 +1065,7 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) { int ret = 0; - if (boot_option_idle_override) + if (disabled_by_idle_boot_param()) return 0; if (!pr) @@ -1089,19 +1096,10 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, acpi_status status = 0; static int first_run; - if (boot_option_idle_override) + if (disabled_by_idle_boot_param()) return 0; if (!first_run) { - if (idle_halt) { - /* - * When the boot option of "idle=halt" is added, halt - * is used for CPU IDLE. - * In such case C2/C3 is meaningless. So the max_cstate - * is set to one. - */ - max_cstate = 1; - } dmi_check_system(processor_power_dmi_table); max_cstate = acpi_processor_cstate_check(max_cstate); if (max_cstate < ACPI_C_STATES_MAX) @@ -1142,7 +1140,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, int acpi_processor_power_exit(struct acpi_processor *pr, struct acpi_device *device) { - if (boot_option_idle_override) + if (disabled_by_idle_boot_param()) return 0; cpuidle_unregister_device(&pr->power.dev); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 94a6526..21d3871 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -404,6 +404,10 @@ static int __init intel_idle_init(void) { int retval; + /* Do not load intel_idle at all for now if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return -ENODEV; + retval = intel_idle_probe(); if (retval) return retval; -- cgit v0.10.2 From 720f1c3010db6a411358b962a2007969117840bc Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:43 +0100 Subject: cpuidle: Rename X86 specific idle poll state[0] from C0 to POLL C0 means and is well know as "not idle". All documentation out there uses this term as "running"/"not idle" state. Also Linux userspace tools (e.g. cpufreq-aperf and turbostat) show C0 residency which there is correct, but means something totally else than cpuidle "POLL" state. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 97df791..37e4460 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -181,7 +181,7 @@ static void poll_idle_init(struct cpuidle_device *dev) cpuidle_set_statedata(state, NULL); - snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); state->exit_latency = 0; state->target_residency = 0; -- cgit v0.10.2 From 0aae9f923bcc476a8e4725dd3ac37547b9816ee5 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:22:56 -0500 Subject: ACPI: processor_idle: delete use of NOP CPUIDLE_FLAGs CPUIDLE_FLAG_SHALLOW CPUIDLE_FLAG_BALANCED CPUIDLE_FLAG_DEEP CPUIDLE_FLAG_CHECK_BM were set by acpi_processor_setup_cpuidle(), but never used by cpuidle or by acpi_idle. So stop setting them. Signed-off-by: Len Brown diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index eefd4aa..70599d2 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1023,7 +1023,6 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) state->flags = 0; switch (cx->type) { case ACPI_STATE_C1: - state->flags |= CPUIDLE_FLAG_SHALLOW; if (cx->entry_method == ACPI_CSTATE_FFH) state->flags |= CPUIDLE_FLAG_TIME_VALID; @@ -1032,16 +1031,13 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) break; case ACPI_STATE_C2: - state->flags |= CPUIDLE_FLAG_BALANCED; state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; dev->safe_state = state; break; case ACPI_STATE_C3: - state->flags |= CPUIDLE_FLAG_DEEP; state->flags |= CPUIDLE_FLAG_TIME_VALID; - state->flags |= CPUIDLE_FLAG_CHECK_BM; state->enter = pr->flags.bm_check ? acpi_idle_enter_bm : acpi_idle_enter_simple; -- cgit v0.10.2 From d247632c08c674864d438733280422ddb7130ff8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:34:59 -0500 Subject: cpuidle: delete NOP CPUIDLE_FLAG_POLL it serves no purpose Signed-off-by: Len Brown diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 37e4460..dd5f1ea 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -186,7 +186,7 @@ static void poll_idle_init(struct cpuidle_device *dev) state->exit_latency = 0; state->target_residency = 0; state->power_usage = -1; - state->flags = CPUIDLE_FLAG_POLL; + state->flags = 0; state->enter = poll_idle; } #else diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1be416b..dee3285 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -48,7 +48,6 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ #define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ -#define CPUIDLE_FLAG_POLL (0x10) /* no latency, no savings */ #define CPUIDLE_FLAG_SHALLOW (0x20) /* low latency, minimal savings */ #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ -- cgit v0.10.2 From 03d8b083511a7be2c69b750b8a4e412a21189291 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:43:26 -0500 Subject: SH, cpuidle: delete use of NOP CPUIDLE_FLAGS_SHALLOW set but not checked. Signed-off-by: Len Brown diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c index 83972aa..c19e2a9 100644 --- a/arch/sh/kernel/cpu/shmobile/cpuidle.c +++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c @@ -81,7 +81,6 @@ void sh_mobile_setup_cpuidle(void) state->target_residency = 1 * 2; state->power_usage = 3; state->flags = 0; - state->flags |= CPUIDLE_FLAG_SHALLOW; state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = cpuidle_sleep_enter; -- cgit v0.10.2 From 642f11c53f17aee991d97d14e6922172849ef227 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:45:00 -0500 Subject: cpuidle: delete unused CPUIDLE_FLAG_SHALLOW, BALANCED, DEEP definitions Signed-off-by: Len Brown diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index dee3285..c252953 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -48,9 +48,6 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ #define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ -#define CPUIDLE_FLAG_SHALLOW (0x20) /* low latency, minimal savings */ -#define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ -#define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ #define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ -- cgit v0.10.2 From 956d033fb2eb3f8818260cdf01644bf4dc1a9e33 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:51:20 -0500 Subject: cpuidle: CPUIDLE_FLAG_TLB_FLUSHED is specific to intel_idle Signed-off-by: Len Brown diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 21d3871..8256309 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -82,6 +82,14 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state); static struct cpuidle_state *cpuidle_state_table; /* + * Set this flag for states where the HW flushes the TLB for us + * and so we don't need cross-calls to keep it consistent. + * If this flag is set, SW flushes the TLB, so even if the + * HW doesn't do the flushing, this flag is safe to use. + */ +#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 + +/* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. * Thus C0 is a dummy. diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index c252953..6be722c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -49,7 +49,6 @@ struct cpuidle_state { #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ #define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ -#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -- cgit v0.10.2 From 5392083748a340f68052c0b83a7ce28b10324972 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:56:15 -0500 Subject: cpuidle: CPUIDLE_FLAG_CHECK_BM is omap3_idle specific Signed-off-by: Len Brown diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 0d50b45..f9be4e3 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -47,6 +47,8 @@ #define OMAP3_STATE_MAX OMAP3_STATE_C7 +#define CPUIDLE_FLAG_CHECK_BM 0x10000 /* use omap3_enter_idle_bm() */ + struct omap3_processor_cx { u8 valid; u8 type; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 6be722c..36719ea 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -47,7 +47,6 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ -#define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -- cgit v0.10.2 From 2a2d31c8dc6f1ebcf5eab1d93a0cb0fb4ed57c7c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 10 Jan 2011 09:38:12 +0800 Subject: intel_idle: open broadcast clock event Intel_idle driver uses CLOCK_EVT_NOTIFY_BROADCAST_ENTER CLOCK_EVT_NOTIFY_BROADCAST_EXIT for broadcast clock events. The _ENTER/_EXIT doesn't really open broadcast clock events, please see processor_idle.c for an example. In some situation, this will cause boot hang, because some CPUs enters idle but local APIC timer stalls. Reported-and-tested-by: Yan Zheng Signed-off-by: Shaohua Li cc: stable@kernel.org Signed-off-by: Len Brown diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8256309..fc39358 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -59,6 +59,8 @@ #include /* ktime_get_real() */ #include #include +#include +#include #include #define INTEL_IDLE_VERSION "0.4" @@ -73,6 +75,7 @@ static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; static unsigned int mwait_substates; +#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ @@ -252,6 +255,39 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) return usec_delta; } +static void __setup_broadcast_timer(void *arg) +{ + unsigned long reason = (unsigned long)arg; + int cpu = smp_processor_id(); + + reason = reason ? + CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; + + clockevents_notify(reason, &cpu); +} + +static int __cpuinit setup_broadcast_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + + switch (action & 0xf) { + case CPU_ONLINE: + smp_call_function_single(hotcpu, __setup_broadcast_timer, + (void *)true, 1); + break; + case CPU_DOWN_PREPARE: + smp_call_function_single(hotcpu, __setup_broadcast_timer, + (void *)false, 1); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata setup_broadcast_notifier = { + .notifier_call = setup_broadcast_cpuhp_notify, +}; + /* * intel_idle_probe() */ @@ -314,7 +350,11 @@ static int intel_idle_probe(void) } if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_reliable_states = 0xFFFFFFFF; + lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; + else { + smp_call_function(__setup_broadcast_timer, (void *)true, 1); + register_cpu_notifier(&setup_broadcast_notifier); + } pr_debug(PREFIX "v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -441,6 +481,11 @@ static void __exit intel_idle_exit(void) intel_idle_cpuidle_devices_uninit(); cpuidle_unregister_driver(&intel_idle_driver); + if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) { + smp_call_function(__setup_broadcast_timer, (void *)false, 1); + unregister_cpu_notifier(&setup_broadcast_notifier); + } + return; } -- cgit v0.10.2 From f77cfe4ea21760268c0277fa3e4b02dfd2a2c2f4 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:44 +0100 Subject: cpuidle/x86/perf: fix power:cpu_idle double end events and throw cpu_idle events from the cpuidle layer Currently intel_idle and acpi_idle driver show double cpu_idle "exit idle" events -> this patch fixes it and makes cpu_idle events throwing less complex. It also introduces cpu_idle events for all architectures which use the cpuidle subsystem, namely: - arch/arm/mach-at91/cpuidle.c - arch/arm/mach-davinci/cpuidle.c - arch/arm/mach-kirkwood/cpuidle.c - arch/arm/mach-omap2/cpuidle34xx.c - arch/drivers/acpi/processor_idle.c (for all cases, not only mwait) - arch/x86/kernel/process.c (did throw events before, but was a mess) - drivers/idle/intel_idle.c (did throw events before) Convention should be: Fire cpu_idle events inside the current pm_idle function (not somewhere down the the callee tree) to keep things easy. Current possible pm_idle functions in X86: c1e_idle, poll_idle, cpuidle_idle_call, mwait_idle, default_idle -> this is really easy is now. This affects userspace: The type field of the cpu_idle power event can now direclty get mapped to: /sys/devices/system/cpu/cpuX/cpuidle/stateX/{name,desc,usage,time,...} instead of throwing very CPU/mwait specific values. This change is not visible for the intel_idle driver. For the acpi_idle driver it should only be visible if the vendor misses out C-states in his BIOS. Another (perf timechart) patch reads out cpuidle info of cpu_idle events from: /sys/.../cpuidle/stateX/*, then the cpuidle events are mapped to the correct C-/cpuidle state again, even if e.g. vendors miss out C-states in their BIOS and for example only export C1 and C3. -> everything is fine. Signed-off-by: Thomas Renninger CC: Robert Schoene CC: Jean Pihet CC: Arjan van de Ven CC: Ingo Molnar CC: Frederic Weisbecker CC: linux-pm@lists.linux-foundation.org CC: linux-acpi@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-perf-users@vger.kernel.org CC: linux-omap@vger.kernel.org Signed-off-by: Len Brown diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 09c08a1..67e96e6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -386,6 +386,8 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); /* loop is done by the caller */ @@ -443,8 +445,6 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); - trace_cpu_idle((ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -471,6 +471,8 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b9befa..8d12878 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,8 +57,6 @@ #include #include -#include - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); /* @@ -113,8 +111,6 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c818a7..bd387e8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,8 +51,6 @@ #include #include -#include - asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); @@ -141,10 +139,6 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, - smp_processor_id()); - /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 386888f..e4855c3 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -96,7 +96,15 @@ static void cpuidle_idle_call(void) /* enter the state and update stats */ dev->last_state = target_state; + + trace_power_start(POWER_CSTATE, next_state, dev->cpu); + trace_cpu_idle(next_state, dev->cpu); + dev->last_residency = target_state->enter(dev, target_state); + + trace_power_end(dev->cpu); + trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); + if (dev->last_state) target_state = dev->last_state; @@ -106,8 +114,6 @@ static void cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } /** diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 56ac09d..60fa6ec 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -220,8 +220,6 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) kt_before = ktime_get_real(); stop_critical_timings(); - trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); - trace_cpu_idle((eax >> 4) + 1, cpu); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); -- cgit v0.10.2