From 013c074f8642d8e815ad670601f8e27155a74b57 Mon Sep 17 00:00:00 2001 From: "Strashko, Grygorii" Date: Tue, 10 Nov 2015 11:42:34 +0200 Subject: PM / sleep: prohibit devices probing during suspend/hibernation It is unsafe [1] if probing of devices will happen during suspend or hibernation and system behavior will be unpredictable in this case. So, let's prohibit device's probing in dpm_prepare() and defer their probing instead. The normal behavior will be restored in dpm_complete(). This patch introduces new DD core APIs: device_block_probing() It will disable probing of devices and defer their probes instead. device_unblock_probing() It will restore normal behavior and trigger re-probing of deferred devices. [1] https://lkml.org/lkml/2015/9/11/554 Signed-off-by: Grygorii Strashko Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/base.h b/drivers/base/base.h index 1782f3a..e05db38 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -131,6 +131,8 @@ extern void device_remove_groups(struct device *dev, extern char *make_class_name(const char *name, struct kobject *kobj); extern int devres_release_all(struct device *dev); +extern void device_block_probing(void); +extern void device_unblock_probing(void); /* /sys/devices directory */ extern struct kset *devices_kset; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a641cf3..b605f73 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -55,6 +55,13 @@ static struct workqueue_struct *deferred_wq; static atomic_t deferred_trigger_count = ATOMIC_INIT(0); /* + * In some cases, like suspend to RAM or hibernation, It might be reasonable + * to prohibit probing of devices as it could be unsafe. + * Once defer_all_probes is true all drivers probes will be forcibly deferred. + */ +static bool defer_all_probes; + +/* * deferred_probe_work_func() - Retry probing devices in the active list. */ static void deferred_probe_work_func(struct work_struct *work) @@ -172,6 +179,30 @@ static void driver_deferred_probe_trigger(void) } /** + * device_block_probing() - Block/defere device's probes + * + * It will disable probing of devices and defer their probes instead. + */ +void device_block_probing(void) +{ + defer_all_probes = true; + /* sync with probes to avoid races. */ + wait_for_device_probe(); +} + +/** + * device_unblock_probing() - Unblock/enable device's probes + * + * It will restore normal behavior and trigger re-probing of deferred + * devices. + */ +void device_unblock_probing(void) +{ + defer_all_probes = false; + driver_deferred_probe_trigger(); +} + +/** * deferred_probe_initcall() - Enable probing of deferred devices * * We don't want to get in the way when the bulk of drivers are getting probed. @@ -277,9 +308,20 @@ static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); static int really_probe(struct device *dev, struct device_driver *drv) { - int ret = 0; + int ret = -EPROBE_DEFER; int local_trigger_count = atomic_read(&deferred_trigger_count); + if (defer_all_probes) { + /* + * Value of defer_all_probes can be set only by + * device_defer_all_probes_enable() which, in turn, will call + * wait_for_device_probe() right after that to avoid any races. + */ + dev_dbg(dev, "Driver %s force probe deferral\n", drv->name); + driver_deferred_probe_add(dev); + return ret; + } + atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); @@ -393,6 +435,10 @@ int driver_probe_done(void) */ void wait_for_device_probe(void) { + /* wait for the deferred probe workqueue to finish */ + if (driver_deferred_probe_enable) + flush_workqueue(deferred_wq); + /* wait for the known devices to complete their probing */ wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1710c26..9d626ac 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -963,6 +963,9 @@ void dpm_complete(pm_message_t state) } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); + + /* Allow device probing and trigger re-probing of deferred devices */ + device_unblock_probing(); trace_suspend_resume(TPS("dpm_complete"), state.event, false); } @@ -1624,6 +1627,20 @@ int dpm_prepare(pm_message_t state) trace_suspend_resume(TPS("dpm_prepare"), state.event, true); might_sleep(); + /* + * Give a chance for the known devices to complete their probes, before + * disable probing of devices. This sync point is important at least + * at boot time + hibernation restore. + */ + wait_for_device_probe(); + /* + * It is unsafe if probing of devices will happen during suspend or + * hibernation and system behavior will be unpredictable in this case. + * So, let's prohibit device's probing here and defer their probes + * instead. The normal behavior will be restored in dpm_complete(). + */ + device_block_probing(); + mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_list)) { struct device *dev = to_device(dpm_list.next); -- cgit v0.10.2 From 5de85b9d57aba3ed2e04759e6db3b9e826dd0b06 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 18 Nov 2015 11:48:39 +0100 Subject: PM / runtime: Re-init runtime PM states at probe error and driver unbind There are two common expectations among several subsystems/drivers that deploys runtime PM support, but which isn't met by the driver core. Expectation 1) At ->probe() the subsystem/driver expects the runtime PM status of the device to be RPM_SUSPENDED, which is the initial status being assigned at device registration. This expectation is especially common among some of those subsystems/ drivers that manages devices with an attached PM domain, as those requires the ->runtime_resume() callback at the PM domain level to be invoked during ->probe(). Moreover these subsystems/drivers entirely relies on runtime PM resources being managed at the PM domain level, thus don't implement their own set of runtime PM callbacks. These are two scenarios that suffers from this unmet expectation. i) A failed ->probe() sequence requests probe deferral: ->probe() ... pm_runtime_enable() pm_runtime_get_sync() ... err: pm_runtime_put() pm_runtime_disable() ... As there are no guarantees that such sequence turns the runtime PM status of the device into RPM_SUSPENDED, the re-trying ->probe() may start with the status in RPM_ACTIVE. In such case the runtime PM core won't invoke the ->runtime_resume() callback because of a pm_runtime_get_sync(), as it considers the device to be already runtime resumed. ii) A driver re-bind sequence: At driver unbind, the subsystem/driver's >remove() callback invokes a sequence of runtime PM APIs, to undo actions during ->probe() and to put the device into low power state. ->remove() ... pm_runtime_put() pm_runtime_disable() ... Similar as in the failing ->probe() case, this sequence don't guarantee the runtime PM status of the device to turn into RPM_SUSPENDED. Trying to re-bind the driver thus causes the same issue as when re-trying ->probe(), in the probe deferral scenario. Expectation 2) Drivers that invokes the pm_runtime_irq_safe() API during ->probe(), triggers the runtime PM core to increase the usage count for the device's parent and permanently make it runtime resumed. The usage count is only dropped at device removal, which also allows it to be runtime suspended again. A re-trying ->probe() repeats the call to pm_runtime_irq_safe() and thus once more triggers the usage count of the device's parent to be increased. This leads to not only an imbalance issue of the usage count of the device's parent, but also to keep it runtime resumed permanently even if ->probe() fails. To address these issues, let's change the policy of the driver core to meet these expectations. More precisely, at ->probe() failures and driver unbind, restore the initial states of runtime PM. Although to still allow subsystem's to control PM for devices that doesn't ->probe() successfully, don't restore the initial states unless runtime PM is disabled. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a641cf3..cd2d79b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -340,6 +340,7 @@ probe_failed: dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); + pm_runtime_reinit(dev); switch (ret) { case -EPROBE_DEFER: @@ -695,6 +696,7 @@ static void __device_release_driver(struct device *dev) dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); + pm_runtime_reinit(dev); klist_remove(&dev->p->knode_driver); if (dev->bus) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 998fa6b..8b06193 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -18,6 +18,7 @@ static inline void pm_runtime_early_init(struct device *dev) } extern void pm_runtime_init(struct device *dev); +extern void pm_runtime_reinit(struct device *dev); extern void pm_runtime_remove(struct device *dev); struct wake_irq { @@ -84,6 +85,7 @@ static inline void pm_runtime_early_init(struct device *dev) } static inline void pm_runtime_init(struct device *dev) {} +static inline void pm_runtime_reinit(struct device *dev) {} static inline void pm_runtime_remove(struct device *dev) {} static inline int dpm_sysfs_add(struct device *dev) { return 0; } diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e1a10a0..ab3fcd9 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1390,18 +1390,32 @@ void pm_runtime_init(struct device *dev) } /** + * pm_runtime_reinit - Re-initialize runtime PM fields in given device object. + * @dev: Device object to re-initialize. + */ +void pm_runtime_reinit(struct device *dev) +{ + if (!pm_runtime_enabled(dev)) { + if (dev->power.runtime_status == RPM_ACTIVE) + pm_runtime_set_suspended(dev); + if (dev->power.irq_safe) { + spin_lock_irq(&dev->power.lock); + dev->power.irq_safe = 0; + spin_unlock_irq(&dev->power.lock); + if (dev->parent) + pm_runtime_put(dev->parent); + } + } +} + +/** * pm_runtime_remove - Prepare for removing a device from device hierarchy. * @dev: Device object being removed from device hierarchy. */ void pm_runtime_remove(struct device *dev) { __pm_runtime_disable(dev, false); - - /* Change the status back to 'suspended' to match the initial status. */ - if (dev->power.runtime_status == RPM_ACTIVE) - pm_runtime_set_suspended(dev); - if (dev->power.irq_safe && dev->parent) - pm_runtime_put(dev->parent); + pm_runtime_reinit(dev); } /** -- cgit v0.10.2 From 7b06a6d7bff563d82ddf8769617632f26793a83e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 Dec 2015 01:54:47 +0100 Subject: MAINTAINERS: Add an entry for the PM core Add a MAINTAINERS entry for the PM core with myself as the maintainer and linux-pm as the mailing list. This actually documents the current state of things. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman diff --git a/MAINTAINERS b/MAINTAINERS index 69c8a9c..8e4019c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8438,6 +8438,17 @@ F: fs/timerfd.c F: include/linux/timer* F: kernel/time/*timer* +POWER MANAGEMENT CORE +M: "Rafael J. Wysocki" +L: linux-pm@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +S: Supported +F: drivers/base/power/ +F: include/linux/pm.h +F: include/linux/pm_* +F: include/linux/powercap.h +F: drivers/powercap/ + POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS M: Sebastian Reichel M: Dmitry Eremin-Solenikov -- cgit v0.10.2 From 299f2ffed329c1a2ea8d6e90f0df26b885d16e08 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 6 Dec 2015 17:33:45 +0100 Subject: PCI / PM: constify pci_platform_pm_ops structure The pci_platform_pm_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a32ba75..8400f80 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -529,7 +529,7 @@ static bool acpi_pci_need_resume(struct pci_dev *dev) return !!adev->power.flags.dsw_present; } -static struct pci_platform_pm_ops acpi_pci_platform_pm = { +static const struct pci_platform_pm_ops acpi_pci_platform_pm = { .is_manageable = acpi_pci_power_manageable, .set_state = acpi_pci_set_power_state, .choose_state = acpi_pci_choose_state, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 314db8c..d1a7105 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -527,9 +527,9 @@ static void pci_restore_bars(struct pci_dev *dev) pci_update_resource(dev, i); } -static struct pci_platform_pm_ops *pci_platform_pm; +static const struct pci_platform_pm_ops *pci_platform_pm; -int pci_set_platform_pm(struct pci_platform_pm_ops *ops) +int pci_set_platform_pm(const struct pci_platform_pm_ops *ops) { if (!ops->is_manageable || !ops->set_state || !ops->choose_state || !ops->sleep_wake) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d390fc1..f6f151a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -68,7 +68,7 @@ struct pci_platform_pm_ops { bool (*need_resume)(struct pci_dev *dev); }; -int pci_set_platform_pm(struct pci_platform_pm_ops *ops); +int pci_set_platform_pm(const struct pci_platform_pm_ops *ops); void pci_update_current_state(struct pci_dev *dev, pci_power_t state); void pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); -- cgit v0.10.2 From 76fc35ddf8c075aa0e3f52384591d613b906ebb6 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 8 Dec 2015 16:17:25 +0200 Subject: PCI / PM: Fix small typo in documentation cuased -> caused Signed-off-by: Jarkko Nikula Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index b0e911e..4455888 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -999,7 +999,7 @@ from its probe routine to make runtime PM work for the device. It is important to remember that the driver's runtime_suspend() callback may be executed right after the usage counter has been decremented, because -user space may already have cuased the pm_runtime_allow() helper function +user space may already have caused the pm_runtime_allow() helper function unblocking the runtime PM of the device to run via sysfs, so the driver must be prepared to cope with that. -- cgit v0.10.2 From a436b6a19f57656a6557439523923d89eb4a880d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Dec 2015 02:54:26 +0100 Subject: PM / runtime: Add new helper for conditional usage count incrementation Introduce a new runtime PM function, pm_runtime_get_if_in_use(), that will increment the device's runtime PM usage counter and return 1 if its status is RPM_ACTIVE and its usage counter is greater than 0 at the same time (0 will be returned otherwise). This is useful for things that should only be done if the device is active (from the runtime PM perspective) and used by somebody (as indicated by the usage counter) already and they are not worth bothering otherwise. Requested-by: Imre Deak Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 0784bc3..7328cf8 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -371,6 +371,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: - increment the device's usage counter, run pm_runtime_resume(dev) and return its result + int pm_runtime_get_if_in_use(struct device *dev); + - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the + runtime PM status is RPM_ACTIVE and the runtime PM usage counter is + nonzero, increment the counter and return 1; otherwise return 0 without + changing the counter + void pm_runtime_put_noidle(struct device *dev); - decrement the device's usage counter diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index ab3fcd9..4c70550 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -966,6 +966,30 @@ int __pm_runtime_resume(struct device *dev, int rpmflags) EXPORT_SYMBOL_GPL(__pm_runtime_resume); /** + * pm_runtime_get_if_in_use - Conditionally bump up the device's usage counter. + * @dev: Device to handle. + * + * Return -EINVAL if runtime PM is disabled for the device. + * + * If that's not the case and if the device's runtime PM status is RPM_ACTIVE + * and the runtime PM usage counter is nonzero, increment the counter and + * return 1. Otherwise return 0 without changing the counter. + */ +int pm_runtime_get_if_in_use(struct device *dev) +{ + unsigned long flags; + int retval; + + spin_lock_irqsave(&dev->power.lock, flags); + retval = dev->power.disable_depth > 0 ? -EINVAL : + dev->power.runtime_status == RPM_ACTIVE + && atomic_inc_not_zero(&dev->power.usage_count); + spin_unlock_irqrestore(&dev->power.lock, flags); + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use); + +/** * __pm_runtime_set_status - Set runtime PM status of a device. * @dev: Device to handle. * @status: New runtime PM status of the device. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 3bdbb41..7af093d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -39,6 +39,7 @@ extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); +extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); @@ -143,6 +144,10 @@ static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } +static inline int pm_runtime_get_if_in_use(struct device *dev) +{ + return -EINVAL; +} static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } -- cgit v0.10.2 From 4295733eee4e69eda432d95765b7762dc6013271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Tue, 29 Dec 2015 11:03:21 +0100 Subject: PM / core: fix typo in documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation for detach() said attach. Signed-off-by: Manuel Pégourié-Gonnard Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index f32b802..f48e333 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach); /** * dev_pm_domain_detach - Detach a device from its PM domain. - * @dev: Device to attach. + * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. * * This functions will reverse the actions from dev_pm_domain_attach() and thus -- cgit v0.10.2