From 4881f603d7b82df2bc15efd2a272f973a3bf8df1 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 25 Apr 2014 08:44:59 +0800 Subject: PM / hibernate: use unsigned local variables in swsusp_show_speed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit do_div() needs 'u64' type, or it reports warning. And negative number is meaningless for "speed", so change all signed to unsigned within swsusp_show_speed(). The related warning (with allmodconfig for unicore32): CC kernel/power/hibernate.o kernel/power/hibernate.c: In function ‘swsusp_show_speed’: kernel/power/hibernate.c:237: warning: comparison of distinct pointer types lacks a cast Signed-off-by: Chen Gang [rjw: Subject] Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f4f2073..de4b989 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -228,19 +228,23 @@ static void platform_recover(int platform_mode) void swsusp_show_speed(struct timeval *start, struct timeval *stop, unsigned nr_pages, char *msg) { - s64 elapsed_centisecs64; - int centisecs; - int k; - int kps; + u64 elapsed_centisecs64; + unsigned int centisecs; + unsigned int k; + unsigned int kps; elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + /* + * If "(s64)elapsed_centisecs64 < 0", it will print long elapsed time, + * it is obvious enough for what went wrong. + */ do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); centisecs = elapsed_centisecs64; if (centisecs == 0) centisecs = 1; /* avoid div-by-zero */ k = nr_pages * (PAGE_SIZE / 1024); kps = (k * 100) / centisecs; - printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", + printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n", msg, k, centisecs / 100, centisecs % 100, kps / 1000, (kps % 1000) / 10); -- cgit v0.10.2 From 2c730785d9532d2a9c46e059bd6a6c9a764c539f Mon Sep 17 00:00:00 2001 From: Sebastian Capella Date: Mon, 21 Apr 2014 17:30:46 -0700 Subject: PM / hibernate: no kernel_power_off when pm_power_off NULL Reboot logic in kernel/reboot will avoid calling kernel_power_off when pm_power_off is null, and instead uses kernel_halt. Change hibernate's power_down to follow the behavior in the reboot call. Calling the notifier twice (once for SYS_POWER_OFF and again for SYS_HALT) causes a panic during hibernation on Kirkwood Openblocks A6 board. Signed-off-by: Sebastian Capella Reported-by: Ezequiel Garcia Reviewed-by: Pavel Machek Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index de4b989..1f08ac7 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -599,7 +599,8 @@ static void power_down(void) case HIBERNATION_PLATFORM: hibernation_platform_enter(); case HIBERNATION_SHUTDOWN: - kernel_power_off(); + if (pm_power_off) + kernel_power_off(); break; #ifdef CONFIG_SUSPEND case HIBERNATION_SUSPEND: @@ -627,7 +628,8 @@ static void power_down(void) * corruption after resume. */ printk(KERN_CRIT "PM: Please power down manually\n"); - while(1); + while (1) + cpu_relax(); } /** -- cgit v0.10.2 From 8a54cd5bd6ebf009b96ec79510b593f7ba5c0ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 6 May 2014 13:01:56 +0200 Subject: PM / hibernate: Documentation: Fix script for unswapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit System can have mmaped also character devices (e.g dri devices by X) or deleted files. Running cat on character devices is really bad idea (system can hang) so run cat only on regular files. Also mmaped files can have spaces in filenames. Signed-off-by: Pali Rohár [rjw: Subject] Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index 079160e..f732a83 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -220,7 +220,10 @@ Q: After resuming, system is paging heavily, leading to very bad interactivity. A: Try running -cat `cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u` > /dev/null +cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file +do + test -f "$file" && cat "$file" > /dev/null +done after resume. swapoff -a; swapon -a may also be useful. -- cgit v0.10.2 From 317cf7e5e85e3ef9f23fc6dd8b2945ab4a258140 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 9 May 2014 23:32:08 +0200 Subject: PM / hibernate: convert simple_strtoul to kstrtoul Replace obsolete function. Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1f08ac7..2377ff7 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1115,7 +1115,10 @@ static int __init resumewait_setup(char *str) static int __init resumedelay_setup(char *str) { - resume_delay = simple_strtoul(str, NULL, 0); + int rc = kstrtoul(str, 0, (unsigned long *)&resume_delay); + + if (rc) + return rc; return 1; } -- cgit v0.10.2 From f6514be5fe7fe796041b673bad769510414ff2b9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 May 2014 19:08:46 +0300 Subject: PM / hibernate: Fix memory corruption in resumedelay_setup() In the original code "resume_delay" is an int so on 64 bits, the call to kstrtoul() will cause memory corruption. We may as well fix a style issue here as well and make "resume_delay" unsigned int, since that's what we pass to ssleep(). Fixes: 317cf7e5e85e (PM / hibernate: convert simple_strtoul to kstrtoul) Signed-off-by: Dan Carpenter Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2377ff7..df88d55 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -35,7 +35,7 @@ static int nocompress; static int noresume; static int resume_wait; -static int resume_delay; +static unsigned int resume_delay; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; @@ -1115,7 +1115,7 @@ static int __init resumewait_setup(char *str) static int __init resumedelay_setup(char *str) { - int rc = kstrtoul(str, 0, (unsigned long *)&resume_delay); + int rc = kstrtouint(str, 0, &resume_delay); if (rc) return rc; -- cgit v0.10.2 From aae4518b3124b29f8dc81c829c704fd2df72e98b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 May 2014 02:46:50 +0200 Subject: PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily Currently, some subsystems (e.g. PCI and the ACPI PM domain) have to resume all runtime-suspended devices during system suspend, mostly because those devices may need to be reprogrammed due to different wakeup settings for system sleep and for runtime PM. For some devices, though, it's OK to remain in runtime suspend throughout a complete system suspend/resume cycle (if the device was in runtime suspend at the start of the cycle). We would like to do this whenever possible, to avoid the overhead of extra power-up and power-down events. However, problems may arise because the device's descendants may require it to be at full power at various points during the cycle. Therefore the most straightforward way to do this safely is if the device and all its descendants can remain runtime suspended until the complete stage of system resume. To this end, introduce a new device PM flag, power.direct_complete and modify the PM core to use that flag as follows. If the ->prepare() callback of a device returns a positive number, the PM core will regard that as an indication that it may leave the device runtime-suspended. It will then check if the system power transition in progress is a suspend (and not hibernation in particular) and if the device is, indeed, runtime-suspended. In that case, the PM core will set the device's power.direct_complete flag. Otherwise it will clear power.direct_complete for the device and it also will later clear it for the device's parent (if there's one). Next, the PM core will not invoke the ->suspend() ->suspend_late(), ->suspend_irq(), ->resume_irq(), ->resume_early(), or ->resume() callbacks for all devices having power.direct_complete set. It will invoke their ->complete() callbacks, however, and those callbacks are then responsible for resuming the devices as appropriate, if necessary. For example, in some cases they may need to queue up runtime resume requests for the devices using pm_request_resume(). Changelog partly based on an Alan Stern's description of the idea (http://marc.info/?l=linux-pm&m=139940466625569&w=2). Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 86d5e4f..343ffad 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -479,7 +479,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Out; if (!dev->power.is_noirq_suspended) @@ -605,7 +605,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Out; if (!dev->power.is_late_suspended) @@ -735,6 +735,12 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + if (dev->power.direct_complete) { + /* Match the pm_runtime_disable() in __device_suspend(). */ + pm_runtime_enable(dev); + goto Complete; + } + dpm_wait(dev->parent, async); dpm_watchdog_set(&wd, dev); device_lock(dev); @@ -1007,7 +1013,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a goto Complete; } - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Complete; dpm_wait_for_children(dev, async); @@ -1146,7 +1152,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as goto Complete; } - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Complete; dpm_wait_for_children(dev, async); @@ -1332,6 +1338,17 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + if (dev->power.direct_complete) { + if (pm_runtime_status_suspended(dev)) { + pm_runtime_disable(dev); + if (pm_runtime_suspended_if_enabled(dev)) + goto Complete; + + pm_runtime_enable(dev); + } + dev->power.direct_complete = false; + } + dpm_watchdog_set(&wd, dev); device_lock(dev); @@ -1382,10 +1399,19 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) End: if (!error) { + struct device *parent = dev->parent; + dev->power.is_suspended = true; - if (dev->power.wakeup_path - && dev->parent && !dev->parent->power.ignore_children) - dev->parent->power.wakeup_path = true; + if (parent) { + spin_lock_irq(&parent->power.lock); + + dev->parent->power.direct_complete = false; + if (dev->power.wakeup_path + && !dev->parent->power.ignore_children) + dev->parent->power.wakeup_path = true; + + spin_unlock_irq(&parent->power.lock); + } } device_unlock(dev); @@ -1487,7 +1513,7 @@ static int device_prepare(struct device *dev, pm_message_t state) { int (*callback)(struct device *) = NULL; char *info = NULL; - int error = 0; + int ret = 0; if (dev->power.syscore) return 0; @@ -1523,17 +1549,27 @@ static int device_prepare(struct device *dev, pm_message_t state) callback = dev->driver->pm->prepare; } - if (callback) { - error = callback(dev); - suspend_report_result(callback, error); - } + if (callback) + ret = callback(dev); device_unlock(dev); - if (error) + if (ret < 0) { + suspend_report_result(callback, ret); pm_runtime_put(dev); - - return error; + return ret; + } + /* + * A positive return value from ->prepare() means "this device appears + * to be runtime-suspended and its state is fine, so if it really is + * runtime-suspended, you can leave it in that state provided that you + * will do the same thing with all of its descendants". This only + * applies to suspend transitions, however. + */ + spin_lock_irq(&dev->power.lock); + dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND; + spin_unlock_irq(&dev->power.lock); + return 0; } /** diff --git a/include/linux/pm.h b/include/linux/pm.h index d915d03..72c0fe0 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -93,13 +93,23 @@ typedef struct pm_message { * been registered) to recover from the race condition. * This method is executed for all kinds of suspend transitions and is * followed by one of the suspend callbacks: @suspend(), @freeze(), or - * @poweroff(). The PM core executes subsystem-level @prepare() for all - * devices before starting to invoke suspend callbacks for any of them, so - * generally devices may be assumed to be functional or to respond to - * runtime resume requests while @prepare() is being executed. However, - * device drivers may NOT assume anything about the availability of user - * space at that time and it is NOT valid to request firmware from within - * @prepare() (it's too late to do that). It also is NOT valid to allocate + * @poweroff(). If the transition is a suspend to memory or standby (that + * is, not related to hibernation), the return value of @prepare() may be + * used to indicate to the PM core to leave the device in runtime suspend + * if applicable. Namely, if @prepare() returns a positive number, the PM + * core will understand that as a declaration that the device appears to be + * runtime-suspended and it may be left in that state during the entire + * transition and during the subsequent resume if all of its descendants + * are left in runtime suspend too. If that happens, @complete() will be + * executed directly after @prepare() and it must ensure the proper + * functioning of the device after the system resume. + * The PM core executes subsystem-level @prepare() for all devices before + * starting to invoke suspend callbacks for any of them, so generally + * devices may be assumed to be functional or to respond to runtime resume + * requests while @prepare() is being executed. However, device drivers + * may NOT assume anything about the availability of user space at that + * time and it is NOT valid to request firmware from within @prepare() + * (it's too late to do that). It also is NOT valid to allocate * substantial amounts of memory from @prepare() in the GFP_KERNEL mode. * [To work around these limitations, drivers may register suspend and * hibernation notifiers to be executed before the freezing of tasks.] @@ -112,7 +122,16 @@ typedef struct pm_message { * of the other devices that the PM core has unsuccessfully attempted to * suspend earlier). * The PM core executes subsystem-level @complete() after it has executed - * the appropriate resume callbacks for all devices. + * the appropriate resume callbacks for all devices. If the corresponding + * @prepare() at the beginning of the suspend transition returned a + * positive number and the device was left in runtime suspend (without + * executing any suspend and resume callbacks for it), @complete() will be + * the only callback executed for the device during resume. In that case, + * @complete() must be prepared to do whatever is necessary to ensure the + * proper functioning of the device after the system resume. To this end, + * @complete() can check the power.direct_complete flag of the device to + * learn whether (unset) or not (set) the previous suspend and resume + * callbacks have been executed for it. * * @suspend: Executed before putting the system into a sleep state in which the * contents of main memory are preserved. The exact action to perform @@ -546,6 +565,7 @@ struct dev_pm_info { bool is_late_suspended:1; bool ignore_children:1; bool early_init:1; /* Owned by the PM core */ + bool direct_complete:1; /* Owned by the PM core */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2a5897a..43fd671 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -101,6 +101,11 @@ static inline bool pm_runtime_status_suspended(struct device *dev) return dev->power.runtime_status == RPM_SUSPENDED; } +static inline bool pm_runtime_suspended_if_enabled(struct device *dev) +{ + return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1; +} + static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; @@ -150,6 +155,7 @@ static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } +static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} -- cgit v0.10.2 From f71495f3f0c5f0801823d1235b271a4a415d3df8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 May 2014 02:47:37 +0200 Subject: PM / sleep: Update device PM documentation to cover direct_complete Update the device PM documentation in devices.txt and runtime_pm.txt to reflect the changes in the system suspend and resume handling related to the introduction of the new power.direct_complete flag. Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 47d46df..d172bce 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -2,6 +2,7 @@ Device Power Management Copyright (c) 2010-2011 Rafael J. Wysocki , Novell Inc. Copyright (c) 2010 Alan Stern +Copyright (c) 2014 Intel Corp., Rafael J. Wysocki Most of the code in Linux is device drivers, so most of the Linux power @@ -326,6 +327,20 @@ the phases are: driver in some way for the upcoming system power transition, but it should not put the device into a low-power state. + For devices supporting runtime power management, the return value of the + prepare callback can be used to indicate to the PM core that it may + safely leave the device in runtime suspend (if runtime-suspended + already), provided that all of the device's descendants are also left in + runtime suspend. Namely, if the prepare callback returns a positive + number and that happens for all of the descendants of the device too, + and all of them (including the device itself) are runtime-suspended, the + PM core will skip the suspend, suspend_late and suspend_noirq suspend + phases as well as the resume_noirq, resume_early and resume phases of + the following system resume for all of these devices. In that case, + the complete callback will be called directly after the prepare callback + and is entirely responsible for bringing the device back to the + functional state as appropriate. + 2. The suspend methods should quiesce the device to stop it from performing I/O. They also may save the device registers and put it into the appropriate low-power state, depending on the bus type the device is on, @@ -400,12 +415,23 @@ When resuming from freeze, standby or memory sleep, the phases are: the resume callbacks occur; it's not necessary to wait until the complete phase. + Moreover, if the preceding prepare callback returned a positive number, + the device may have been left in runtime suspend throughout the whole + system suspend and resume (the suspend, suspend_late, suspend_noirq + phases of system suspend and the resume_noirq, resume_early, resume + phases of system resume may have been skipped for it). In that case, + the complete callback is entirely responsible for bringing the device + back to the functional state after system suspend if necessary. [For + example, it may need to queue up a runtime resume request for the device + for this purpose.] To check if that is the case, the complete callback + can consult the device's power.direct_complete flag. Namely, if that + flag is set when the complete callback is being run, it has been called + directly after the preceding prepare and special action may be required + to make the device work correctly afterward. + At the end of these phases, drivers should be as functional as they were before suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are -gated on. Even if the device was in a low-power state before the system sleep -because of runtime power management, afterwards it should be back in its -full-power state. There are multiple reasons why it's best to do this; they are -discussed in more detail in Documentation/power/runtime_pm.txt. +gated on. However, the details here may again be platform-specific. For example, some systems support multiple "run" states, and the mode in effect at diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 5f96daf..e1bee8a 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -2,6 +2,7 @@ Runtime Power Management Framework for I/O Devices (C) 2009-2011 Rafael J. Wysocki , Novell Inc. (C) 2010 Alan Stern +(C) 2014 Intel Corp., Rafael J. Wysocki 1. Introduction @@ -444,6 +445,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: bool pm_runtime_status_suspended(struct device *dev); - return true if the device's runtime PM status is 'suspended' + bool pm_runtime_suspended_if_enabled(struct device *dev); + - return true if the device's runtime PM status is 'suspended' and its + 'power.disable_depth' field is equal to 1 + void pm_runtime_allow(struct device *dev); - set the power.runtime_auto flag for the device and decrease its usage counter (used by the /sys/devices/.../power/control interface to @@ -644,6 +649,18 @@ place (in particular, if the system is not waking up from hibernation), it may be more efficient to leave the devices that had been suspended before the system suspend began in the suspended state. +To this end, the PM core provides a mechanism allowing some coordination between +different levels of device hierarchy. Namely, if a system suspend .prepare() +callback returns a positive number for a device, that indicates to the PM core +that the device appears to be runtime-suspended and its state is fine, so it +may be left in runtime suspend provided that all of its descendants are also +left in runtime suspend. If that happens, the PM core will not execute any +system suspend and resume callbacks for all of those devices, except for the +complete callback, which is then entirely responsible for handling the device +as appropriate. This only applies to system suspend transitions that are not +related to hibernation (see Documentation/power/devices.txt for more +information). + The PM core does its best to reduce the probability of race conditions between the runtime PM and system suspend/resume (and hibernation) callbacks by carrying out the following operations: -- cgit v0.10.2 From 27ddcc6596e50cb8f03d2e83248897667811d8f6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 May 2014 13:40:47 +0200 Subject: PM / sleep: Add state field to pm_states[] entries To allow sleep states corresponding to the "mem", "standby" and "freeze" lables to be different from the pm_states[] indexes of those strings, introduce struct pm_sleep_state, consisting of a string label and a state number, and turn pm_states[] into an array of objects of that type. This modification should not lead to any functional changes. Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/main.c b/kernel/power/main.c index 6271bc4..8e81843 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -293,12 +293,12 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, { char *s = buf; #ifdef CONFIG_SUSPEND - int i; + suspend_state_t i; + + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + if (valid_state(i)) + s += sprintf(s,"%s ", pm_states[i].label); - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (pm_states[i] && valid_state(i)) - s += sprintf(s,"%s ", pm_states[i]); - } #endif #ifdef CONFIG_HIBERNATION s += sprintf(s, "%s\n", "disk"); @@ -314,7 +314,7 @@ static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_MIN; - const char * const *s; + struct pm_sleep_state *s; #endif char *p; int len; @@ -328,7 +328,7 @@ static suspend_state_t decode_state(const char *buf, size_t n) #ifdef CONFIG_SUSPEND for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + if (len == strlen(s->label) && !strncmp(buf, s->label, len)) return state; #endif @@ -448,7 +448,7 @@ static ssize_t autosleep_show(struct kobject *kobj, #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) return sprintf(buf, "%s\n", valid_state(state) ? - pm_states[state] : "error"); + pm_states[state].label : "error"); #endif #ifdef CONFIG_HIBERNATION return sprintf(buf, "disk\n"); diff --git a/kernel/power/power.h b/kernel/power/power.h index 15f37ea..99539c5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -178,8 +178,13 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); #ifdef CONFIG_SUSPEND +struct pm_sleep_state { + const char *label; + suspend_state_t state; +}; + /* kernel/power/suspend.c */ -extern const char *const pm_states[]; +extern struct pm_sleep_state pm_states[]; extern bool valid_state(suspend_state_t state); extern int suspend_devices_and_enter(suspend_state_t state); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 155721f..5d93b138 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -31,10 +31,10 @@ #include "power.h" -const char *const pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_FREEZE] = "freeze", - [PM_SUSPEND_STANDBY] = "standby", - [PM_SUSPEND_MEM] = "mem", +struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_FREEZE] = { "freeze", PM_SUSPEND_FREEZE }, + [PM_SUSPEND_STANDBY] = { "standby", PM_SUSPEND_STANDBY }, + [PM_SUSPEND_MEM] = { "mem", PM_SUSPEND_MEM }, }; static const struct platform_suspend_ops *suspend_ops; @@ -343,7 +343,7 @@ static int enter_state(suspend_state_t state) sys_sync(); printk("done.\n"); - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label); error = suspend_prepare(state); if (error) goto Unlock; @@ -351,7 +351,7 @@ static int enter_state(suspend_state_t state) if (suspend_test(TEST_FREEZER)) goto Finish; - pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pr_debug("PM: Entering %s sleep\n", pm_states[state].label); pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); pm_restore_gfp_mask(); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 9b2a1d5..d4e3ab1 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -92,13 +92,13 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) } if (state == PM_SUSPEND_MEM) { - printk(info_test, pm_states[state]); + printk(info_test, pm_states[state].label); status = pm_suspend(state); if (status == -ENODEV) state = PM_SUSPEND_STANDBY; } if (state == PM_SUSPEND_STANDBY) { - printk(info_test, pm_states[state]); + printk(info_test, pm_states[state].label); status = pm_suspend(state); } if (status < 0) @@ -136,18 +136,16 @@ static char warn_bad_state[] __initdata = static int __init setup_test_suspend(char *value) { - unsigned i; + suspend_state_t i; /* "=mem" ==> "mem" */ value++; - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (!pm_states[i]) - continue; - if (strcmp(pm_states[i], value) != 0) - continue; - test_state = (__force suspend_state_t) i; - return 0; - } + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + if (!strcmp(pm_states[i].label, value)) { + test_state = pm_states[i].state; + return 0; + } + printk(warn_bad_state, value); return 0; } @@ -165,7 +163,7 @@ static int __init test_suspend(void) if (test_state == PM_SUSPEND_ON) goto done; if (!valid_state(test_state)) { - printk(warn_bad_state, pm_states[test_state]); + printk(warn_bad_state, pm_states[test_state].label); goto done; } -- cgit v0.10.2 From 43e8317b0bba1d6eb85f38a4a233d82d7c20d732 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 May 2014 13:40:53 +0200 Subject: PM / sleep: Use valid_state() for platform-dependent sleep states only Use the observation that, for platform-dependent sleep states (PM_SUSPEND_STANDBY, PM_SUSPEND_MEM), a given state is either always supported or always unsupported and store that information in pm_states[] instead of calling valid_state() every time we need to check it. Also do not use valid_state() for PM_SUSPEND_FREEZE, which is always valid, and move the pm_test_level validity check for PM_SUSPEND_FREEZE directly into enter_state(). Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/main.c b/kernel/power/main.c index 8e81843..9f51f0a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -296,7 +296,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) - if (valid_state(i)) + if (pm_states[i].state) s += sprintf(s,"%s ", pm_states[i].label); #endif @@ -328,8 +328,9 @@ static suspend_state_t decode_state(const char *buf, size_t n) #ifdef CONFIG_SUSPEND for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) - if (len == strlen(s->label) && !strncmp(buf, s->label, len)) - return state; + if (s->state && len == strlen(s->label) + && !strncmp(buf, s->label, len)) + return s->state; #endif return PM_SUSPEND_ON; @@ -447,7 +448,7 @@ static ssize_t autosleep_show(struct kobject *kobj, #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) - return sprintf(buf, "%s\n", valid_state(state) ? + return sprintf(buf, "%s\n", pm_states[state].state ? pm_states[state].label : "error"); #endif #ifdef CONFIG_HIBERNATION diff --git a/kernel/power/power.h b/kernel/power/power.h index 99539c5..c60f13b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -186,14 +186,12 @@ struct pm_sleep_state { /* kernel/power/suspend.c */ extern struct pm_sleep_state pm_states[]; -extern bool valid_state(suspend_state_t state); extern int suspend_devices_and_enter(suspend_state_t state); #else /* !CONFIG_SUSPEND */ static inline int suspend_devices_and_enter(suspend_state_t state) { return -ENOSYS; } -static inline bool valid_state(suspend_state_t state) { return false; } #endif /* !CONFIG_SUSPEND */ #ifdef CONFIG_PM_TEST_SUSPEND diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5d93b138..00aca60 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -32,9 +32,9 @@ #include "power.h" struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_FREEZE] = { "freeze", PM_SUSPEND_FREEZE }, - [PM_SUSPEND_STANDBY] = { "standby", PM_SUSPEND_STANDBY }, - [PM_SUSPEND_MEM] = { "mem", PM_SUSPEND_MEM }, + [PM_SUSPEND_FREEZE] = { .label = "freeze", .state = PM_SUSPEND_FREEZE }, + [PM_SUSPEND_STANDBY] = { .label = "standby", }, + [PM_SUSPEND_MEM] = { .label = "mem", }, }; static const struct platform_suspend_ops *suspend_ops; @@ -68,42 +68,34 @@ void freeze_wake(void) } EXPORT_SYMBOL_GPL(freeze_wake); +static bool valid_state(suspend_state_t state) +{ + /* + * PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level + * support and need to be valid to the low level + * implementation, no valid callback implies that none are valid. + */ + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); +} + /** * suspend_set_ops - Set the global suspend method table. * @ops: Suspend operations to use. */ void suspend_set_ops(const struct platform_suspend_ops *ops) { + suspend_state_t i; + lock_system_sleep(); + suspend_ops = ops; + for (i = PM_SUSPEND_STANDBY; i <= PM_SUSPEND_MEM; i++) + pm_states[i].state = valid_state(i) ? i : 0; + unlock_system_sleep(); } EXPORT_SYMBOL_GPL(suspend_set_ops); -bool valid_state(suspend_state_t state) -{ - if (state == PM_SUSPEND_FREEZE) { -#ifdef CONFIG_PM_DEBUG - if (pm_test_level != TEST_NONE && - pm_test_level != TEST_FREEZER && - pm_test_level != TEST_DEVICES && - pm_test_level != TEST_PLATFORM) { - printk(KERN_WARNING "Unsupported pm_test mode for " - "freeze state, please choose " - "none/freezer/devices/platform.\n"); - return false; - } -#endif - return true; - } - /* - * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel - * support and need to be valid to the lowlevel - * implementation, no valid callback implies that none are valid. - */ - return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); -} - /** * suspend_valid_only_mem - Generic memory-only valid callback. * @@ -330,9 +322,17 @@ static int enter_state(suspend_state_t state) { int error; - if (!valid_state(state)) - return -ENODEV; - + if (state == PM_SUSPEND_FREEZE) { +#ifdef CONFIG_PM_DEBUG + if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) { + pr_warning("PM: Unsupported test mode for freeze state," + "please choose none/freezer/devices/platform.\n"); + return -EAGAIN; + } +#endif + } else if (!valid_state(state)) { + return -EINVAL; + } if (!mutex_trylock(&pm_mutex)) return -EBUSY; diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index d4e3ab1..269b097 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -162,7 +162,7 @@ static int __init test_suspend(void) /* PM is initialized by now; is that state testable? */ if (test_state == PM_SUSPEND_ON) goto done; - if (!valid_state(test_state)) { + if (!pm_states[test_state].state) { printk(warn_bad_state, pm_states[test_state].label); goto done; } -- cgit v0.10.2 From 0399d4db3edf5c58b6ec7f672f089f5085e49ed5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 May 2014 13:40:59 +0200 Subject: PM / sleep: Introduce command line argument for sleep state enumeration On some systems the platform doesn't support neither PM_SUSPEND_MEM nor PM_SUSPEND_STANDBY, so PM_SUSPEND_FREEZE is the only available system sleep state. However, some user space frameworks only use the "mem" and (sometimes) "standby" sleep state labels, so the users of those systems need to modify user space in order to be able to use system suspend at all and that is not always possible. For this reason, add a new kernel command line argument, relative_sleep_states, allowing the users of those systems to change the way in which the kernel assigns labels to system sleep states. Namely, for relative_sleep_states=1, the "mem", "standby" and "freeze" labels will enumerate the available system sleem states from the deepest to the shallowest, respectively, so that "mem" is always present in /sys/power/state and the other state strings may or may not be presend depending on what is supported by the platform. Update system sleep states documentation to reflect this change. Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 64c9276..f455181 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -7,19 +7,30 @@ Description: subsystem. What: /sys/power/state -Date: August 2006 +Date: May 2014 Contact: Rafael J. Wysocki Description: - The /sys/power/state file controls the system power state. - Reading from this file returns what states are supported, - which is hard-coded to 'freeze' (Low-Power Idle), 'standby' - (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk' - (Suspend-to-Disk). + The /sys/power/state file controls system sleep states. + Reading from this file returns the available sleep state + labels, which may be "mem", "standby", "freeze" and "disk" + (hibernation). The meanings of the first three labels depend on + the relative_sleep_states command line argument as follows: + 1) relative_sleep_states = 1 + "mem", "standby", "freeze" represent non-hibernation sleep + states from the deepest ("mem", always present) to the + shallowest ("freeze"). "standby" and "freeze" may or may + not be present depending on the capabilities of the + platform. "freeze" can only be present if "standby" is + present. + 2) relative_sleep_states = 0 (default) + "mem" - "suspend-to-RAM", present if supported. + "standby" - "power-on suspend", present if supported. + "freeze" - "suspend-to-idle", always present. Writing to this file one of these strings causes the system to - transition into that state. Please see the file - Documentation/power/states.txt for a description of each of - these states. + transition into the corresponding state, if available. See + Documentation/power/states.txt for a description of what + "suspend-to-RAM", "power-on suspend" and "suspend-to-idle" mean. What: /sys/power/disk Date: September 2006 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4384217..e19a88b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2889,6 +2889,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [KNL, SMP] Set scheduler's default relax_domain_level. See Documentation/cgroups/cpusets.txt. + relative_sleep_states= + [SUSPEND] Use sleep state labeling where the deepest + state available other than hibernation is always "mem". + Format: { "0" | "1" } + 0 -- Traditional sleep state labels. + 1 -- Relative sleep state labels. + reserve= [KNL,BUGS] Force the kernel to ignore some iomem area reservetop= [X86-32] diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt index 442d43d..50f3ef9 100644 --- a/Documentation/power/states.txt +++ b/Documentation/power/states.txt @@ -1,62 +1,87 @@ +System Power Management Sleep States -System Power Management States +(C) 2014 Intel Corp., Rafael J. Wysocki +The kernel supports up to four system sleep states generically, although three +of them depend on the platform support code to implement the low-level details +for each state. -The kernel supports four power management states generically, though -one is generic and the other three are dependent on platform support -code to implement the low-level details for each state. -This file describes each state, what they are -commonly called, what ACPI state they map to, and what string to write -to /sys/power/state to enter that state +The states are represented by strings that can be read or written to the +/sys/power/state file. Those strings may be "mem", "standby", "freeze" and +"disk", where the last one always represents hibernation (Suspend-To-Disk) and +the meaning of the remaining ones depends on the relative_sleep_states command +line argument. -state: Freeze / Low-Power Idle +For relative_sleep_states=1, the strings "mem", "standby" and "freeze" label the +available non-hibernation sleep states from the deepest to the shallowest, +respectively. In that case, "mem" is always present in /sys/power/state, +because there is at least one non-hibernation sleep state in every system. If +the given system supports two non-hibernation sleep states, "standby" is present +in /sys/power/state in addition to "mem". If the system supports three +non-hibernation sleep states, "freeze" will be present in /sys/power/state in +addition to "mem" and "standby". + +For relative_sleep_states=0, which is the default, the following descriptions +apply. + +state: Suspend-To-Idle ACPI state: S0 -String: "freeze" +Label: "freeze" -This state is a generic, pure software, light-weight, low-power state. -It allows more energy to be saved relative to idle by freezing user +This state is a generic, pure software, light-weight, system sleep state. +It allows more energy to be saved relative to runtime idle by freezing user space and putting all I/O devices into low-power states (possibly lower-power than available at run time), such that the processors can spend more time in their idle states. -This state can be used for platforms without Standby/Suspend-to-RAM + +This state can be used for platforms without Power-On Suspend/Suspend-to-RAM support, or it can be used in addition to Suspend-to-RAM (memory sleep) -to provide reduced resume latency. +to provide reduced resume latency. It is always supported. State: Standby / Power-On Suspend ACPI State: S1 -String: "standby" +Label: "standby" -This state offers minimal, though real, power savings, while providing -a very low-latency transition back to a working system. No operating -state is lost (the CPU retains power), so the system easily starts up +This state, if supported, offers moderate, though real, power savings, while +providing a relatively low-latency transition back to a working system. No +operating state is lost (the CPU retains power), so the system easily starts up again where it left off. -We try to put devices in a low-power state equivalent to D1, which -also offers low power savings, but low resume latency. Not all devices -support D1, and those that don't are left on. +In addition to freezing user space and putting all I/O devices into low-power +states, which is done for Suspend-To-Idle too, nonboot CPUs are taken offline +and all low-level system functions are suspended during transitions into this +state. For this reason, it should allow more energy to be saved relative to +Suspend-To-Idle, but the resume latency will generally be greater than for that +state. State: Suspend-to-RAM ACPI State: S3 -String: "mem" +Label: "mem" -This state offers significant power savings as everything in the -system is put into a low-power state, except for memory, which is -placed in self-refresh mode to retain its contents. +This state, if supported, offers significant power savings as everything in the +system is put into a low-power state, except for memory, which should be placed +into the self-refresh mode to retain its contents. All of the steps carried out +when entering Power-On Suspend are also carried out during transitions to STR. +Additional operations may take place depending on the platform capabilities. In +particular, on ACPI systems the kernel passes control to the BIOS (platform +firmware) as the last step during STR transitions and that usually results in +powering down some more low-level components that aren't directly controlled by +the kernel. -System and device state is saved and kept in memory. All devices are -suspended and put into D3. In many cases, all peripheral buses lose -power when entering STR, so devices must be able to handle the -transition back to the On state. +System and device state is saved and kept in memory. All devices are suspended +and put into low-power states. In many cases, all peripheral buses lose power +when entering STR, so devices must be able to handle the transition back to the +"on" state. -For at least ACPI, STR requires some minimal boot-strapping code to -resume the system from STR. This may be true on other platforms. +For at least ACPI, STR requires some minimal boot-strapping code to resume the +system from it. This may be the case on other platforms too. State: Suspend-to-disk ACPI State: S4 -String: "disk" +Label: "disk" This state offers the greatest power savings, and can be used even in the absence of low-level platform support for power management. This diff --git a/kernel/power/main.c b/kernel/power/main.c index 9f51f0a..573410d 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -279,14 +279,14 @@ static inline void pm_print_times_init(void) {} struct kobject *power_kobj; /** - * state - control system power state. + * state - control system sleep states. * - * show() returns what states are supported, which is hard-coded to - * 'freeze' (Low-Power Idle), 'standby' (Power-On Suspend), - * 'mem' (Suspend-to-RAM), and 'disk' (Suspend-to-Disk). + * show() returns available sleep state labels, which may be "mem", "standby", + * "freeze" and "disk" (hibernation). See Documentation/power/states.txt for a + * description of what they mean. * - * store() accepts one of those strings, translates it into the - * proper enumerated value, and initiates a suspend transition. + * store() accepts one of those strings, translates it into the proper + * enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 00aca60..338a6f1 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -78,6 +78,26 @@ static bool valid_state(suspend_state_t state) return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); } +/* + * If this is set, the "mem" label always corresponds to the deepest sleep state + * available, the "standby" label corresponds to the second deepest sleep state + * available (if any), and the "freeze" label corresponds to the remaining + * available sleep state (if there is one). + */ +static bool relative_states; + +static int __init sleep_states_setup(char *str) +{ + relative_states = !strncmp(str, "1", 1); + if (relative_states) { + pm_states[PM_SUSPEND_MEM].state = PM_SUSPEND_FREEZE; + pm_states[PM_SUSPEND_FREEZE].state = 0; + } + return 1; +} + +__setup("relative_sleep_states=", sleep_states_setup); + /** * suspend_set_ops - Set the global suspend method table. * @ops: Suspend operations to use. @@ -85,12 +105,20 @@ static bool valid_state(suspend_state_t state) void suspend_set_ops(const struct platform_suspend_ops *ops) { suspend_state_t i; + int j = PM_SUSPEND_MAX - 1; lock_system_sleep(); suspend_ops = ops; - for (i = PM_SUSPEND_STANDBY; i <= PM_SUSPEND_MEM; i++) - pm_states[i].state = valid_state(i) ? i : 0; + for (i = PM_SUSPEND_MEM; i >= PM_SUSPEND_STANDBY; i--) + if (valid_state(i)) + pm_states[j--].state = i; + else if (!relative_states) + pm_states[j--].state = 0; + + pm_states[j--].state = PM_SUSPEND_FREEZE; + while (j >= PM_SUSPEND_MIN) + pm_states[j--].state = 0; unlock_system_sleep(); } -- cgit v0.10.2 From 0c5ff0ef80c2561ef20721299ecfc39c5a42f694 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 28 May 2014 15:23:35 +0800 Subject: PM / sleep: unregister wakeup source when disabling device wakeup When enabling a device' wakeup capability, a wakeup source is created for the device automatically. But the wakeup source is not unregistered when disabling the device' wakeup capability. This results in zombie wakeup sources, after devices/drivers are unregistered. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2d56f41..eb1bd2e 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -318,10 +318,16 @@ int device_init_wakeup(struct device *dev, bool enable) { int ret = 0; + if (!dev) + return -EINVAL; + if (enable) { device_set_wakeup_capable(dev, true); ret = device_wakeup_enable(dev); } else { + if (dev->power.can_wakeup) + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); } -- cgit v0.10.2 From 057b0a7518e4b8fca26201715996d6d928a62300 Mon Sep 17 00:00:00 2001 From: Niv Yehezkel Date: Sat, 31 May 2014 06:26:01 -0400 Subject: PM / hibernate: fixed typo in comment Fix a trivial comment typo (s/mam/map) in kernel/power/swap.c. Signed-off-by: Niv Yehezkel Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8c9a481..aaa3261 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -567,7 +567,7 @@ static int lzo_compress_threadfn(void *data) /** * save_image_lzo - Save the suspend image data compressed with LZO. - * @handle: Swap mam handle to use for saving the image. + * @handle: Swap map handle to use for saving the image. * @snapshot: Image to read data from. * @nr_to_write: Number of pages to save. */ -- cgit v0.10.2