From 311aab73d273eb22be976055f6cab224f7279d5e Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 8 Aug 2011 23:39:36 +0200 Subject: PM / Runtime: Add might_sleep() to runtime PM functions Some of the entry points to pm runtime are not safe to call in atomic context unless pm_runtime_irq_safe() has been called. Inspecting the code, it is not immediately obvious that the functions sleep at all, as they run inside a spin_lock_irqsave, but under some conditions they can drop the lock and turn on irqs. If a driver incorrectly calls the pm_runtime apis, it can cause sleeping and irq processing when it expects to stay in atomic context. Add might_sleep_if to the majority of the __pm_runtime_* entry points to enforce correct usage. Add pm_runtime_put_sync_autosuspend to the list of functions that can be called in atomic context. Signed-off-by: Colin Cross Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 4ce5450..62f37bc 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -484,6 +484,7 @@ pm_runtime_resume() pm_runtime_get_sync() pm_runtime_put_sync() pm_runtime_put_sync_suspend() +pm_runtime_put_sync_autosuspend() 5. Runtime PM Initialization, Device Probing and Removal diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index acb3f83..04e18ab 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -732,13 +732,16 @@ EXPORT_SYMBOL_GPL(pm_schedule_suspend); * return immediately if it is larger than zero. Then carry out an idle * notification, either synchronous or asynchronous. * - * This routine may be called in atomic context if the RPM_ASYNC flag is set. + * This routine may be called in atomic context if the RPM_ASYNC flag is set, + * or if pm_runtime_irq_safe() has been called. */ int __pm_runtime_idle(struct device *dev, int rpmflags) { unsigned long flags; int retval; + might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); + if (rpmflags & RPM_GET_PUT) { if (!atomic_dec_and_test(&dev->power.usage_count)) return 0; @@ -761,13 +764,16 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle); * return immediately if it is larger than zero. Then carry out a suspend, * either synchronous or asynchronous. * - * This routine may be called in atomic context if the RPM_ASYNC flag is set. + * This routine may be called in atomic context if the RPM_ASYNC flag is set, + * or if pm_runtime_irq_safe() has been called. */ int __pm_runtime_suspend(struct device *dev, int rpmflags) { unsigned long flags; int retval; + might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); + if (rpmflags & RPM_GET_PUT) { if (!atomic_dec_and_test(&dev->power.usage_count)) return 0; @@ -789,13 +795,16 @@ EXPORT_SYMBOL_GPL(__pm_runtime_suspend); * If the RPM_GET_PUT flag is set, increment the device's usage count. Then * carry out a resume, either synchronous or asynchronous. * - * This routine may be called in atomic context if the RPM_ASYNC flag is set. + * This routine may be called in atomic context if the RPM_ASYNC flag is set, + * or if pm_runtime_irq_safe() has been called. */ int __pm_runtime_resume(struct device *dev, int rpmflags) { unsigned long flags; int retval; + might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); + if (rpmflags & RPM_GET_PUT) atomic_inc(&dev->power.usage_count); -- cgit v0.10.2 From 5b1b0b812a7b1a5b968c5d06d90d1cb88621b941 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 19 Aug 2011 23:49:48 +0200 Subject: PM / Runtime: Add macro to test for runtime PM events This patch (as1482) adds a macro for testing whether or not a pm_message value represents an autosuspend or autoresume (i.e., a runtime PM) event. Encapsulating this notion seems preferable to open-coding the test all over the place. Signed-off-by: Alan Stern Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index c9ffa9c..e8662a5 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt @@ -439,10 +439,10 @@ cause autosuspends to fail with -EBUSY if the driver needs to use the device. External suspend calls should never be allowed to fail in this way, -only autosuspend calls. The driver can tell them apart by checking -the PM_EVENT_AUTO bit in the message.event argument to the suspend -method; this bit will be set for internal PM events (autosuspend) and -clear for external PM events. +only autosuspend calls. The driver can tell them apart by applying +the PMSG_IS_AUTO() macro to the message argument to the suspend +method; it will return True for internal PM events (autosuspend) and +False for external PM events. Mutual exclusion diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 91d13a9..91b190c 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1103,7 +1103,7 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) return 0; spin_lock_irq(&data->txlock); - if (!((message.event & PM_EVENT_AUTO) && data->tx_in_flight)) { + if (!(PMSG_IS_AUTO(message) && data->tx_in_flight)) { set_bit(BTUSB_SUSPENDING, &data->flags); spin_unlock_irq(&data->txlock); } else { diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c index 9d8710f..1782693 100644 --- a/drivers/hid/hid-picolcd.c +++ b/drivers/hid/hid-picolcd.c @@ -2409,7 +2409,7 @@ static int picolcd_raw_event(struct hid_device *hdev, #ifdef CONFIG_PM static int picolcd_suspend(struct hid_device *hdev, pm_message_t message) { - if (message.event & PM_EVENT_AUTO) + if (PMSG_IS_AUTO(message)) return 0; picolcd_suspend_backlight(hid_get_drvdata(hdev)); diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index ad978f5..a9fa294 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -1332,7 +1332,7 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) struct usbhid_device *usbhid = hid->driver_data; int status; - if (message.event & PM_EVENT_AUTO) { + if (PMSG_IS_AUTO(message)) { spin_lock_irq(&usbhid->lock); /* Sync with error handler */ if (!test_bit(HID_RESET_PENDING, &usbhid->iofl) && !test_bit(HID_CLEAR_HALT, &usbhid->iofl) @@ -1367,7 +1367,7 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) return -EIO; } - if (!ignoreled && (message.event & PM_EVENT_AUTO)) { + if (!ignoreled && PMSG_IS_AUTO(message)) { spin_lock_irq(&usbhid->lock); if (test_bit(HID_LED_ON, &usbhid->iofl)) { spin_unlock_irq(&usbhid->lock); @@ -1380,8 +1380,7 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) hid_cancel_delayed_stuff(usbhid); hid_cease_io(usbhid); - if ((message.event & PM_EVENT_AUTO) && - test_bit(HID_KEYS_PRESSED, &usbhid->iofl)) { + if (PMSG_IS_AUTO(message) && test_bit(HID_KEYS_PRESSED, &usbhid->iofl)) { /* lost race against keypresses */ status = hid_start_in(hid); if (status < 0) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index ce395fe..f1c435b 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1470,7 +1470,7 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message) if (!dev->suspend_count++) { spin_lock_irq(&dev->txq.lock); /* don't autosuspend while transmitting */ - if (dev->txq.qlen && (message.event & PM_EVENT_AUTO)) { + if (dev->txq.qlen && PMSG_IS_AUTO(message)) { spin_unlock_irq(&dev->txq.lock); return -EBUSY; } else { diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index 298f2b0..9a644d0 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -599,7 +599,7 @@ void i2400mu_disconnect(struct usb_interface *iface) * * As well, the device might refuse going to sleep for whichever * reason. In this case we just fail. For system suspend/hibernate, - * we *can't* fail. We check PM_EVENT_AUTO to see if the + * we *can't* fail. We check PMSG_IS_AUTO to see if the * suspend call comes from the USB stack or from the system and act * in consequence. * @@ -615,7 +615,7 @@ int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg) struct i2400m *i2400m = &i2400mu->i2400m; #ifdef CONFIG_PM - if (pm_msg.event & PM_EVENT_AUTO) + if (PMSG_IS_AUTO(pm_msg)) is_autosuspend = 1; #endif diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index dac7676..94e6c5c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1305,7 +1305,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) struct acm *acm = usb_get_intfdata(intf); int cnt; - if (message.event & PM_EVENT_AUTO) { + if (PMSG_IS_AUTO(message)) { int b; spin_lock_irq(&acm->write_lock); diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 2b9ff51..42f180a 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -798,11 +798,11 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) dev_dbg(&desc->intf->dev, "wdm%d_suspend\n", intf->minor); /* if this is an autosuspend the caller does the locking */ - if (!(message.event & PM_EVENT_AUTO)) + if (!PMSG_IS_AUTO(message)) mutex_lock(&desc->lock); spin_lock_irq(&desc->iuspin); - if ((message.event & PM_EVENT_AUTO) && + if (PMSG_IS_AUTO(message) && (test_bit(WDM_IN_USE, &desc->flags) || test_bit(WDM_RESPONDING, &desc->flags))) { spin_unlock_irq(&desc->iuspin); @@ -815,7 +815,7 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) kill_urbs(desc); cancel_work_sync(&desc->rxwork); } - if (!(message.event & PM_EVENT_AUTO)) + if (!PMSG_IS_AUTO(message)) mutex_unlock(&desc->lock); return rv; diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 34e3da5..e030428 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1046,8 +1046,7 @@ static int usb_resume_device(struct usb_device *udev, pm_message_t msg) /* Non-root devices on a full/low-speed bus must wait for their * companion high-speed root hub, in case a handoff is needed. */ - if (!(msg.event & PM_EVENT_AUTO) && udev->parent && - udev->bus->hs_companion) + if (!PMSG_IS_AUTO(msg) && udev->parent && udev->bus->hs_companion) device_pm_wait_for_dev(&udev->dev, &udev->bus->hs_companion->root_hub->dev); @@ -1075,7 +1074,7 @@ static int usb_suspend_interface(struct usb_device *udev, if (driver->suspend) { status = driver->suspend(intf, msg); - if (status && !(msg.event & PM_EVENT_AUTO)) + if (status && !PMSG_IS_AUTO(msg)) dev_err(&intf->dev, "%s error %d\n", "suspend", status); } else { @@ -1189,7 +1188,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) status = usb_suspend_interface(udev, intf, msg); /* Ignore errors during system sleep transitions */ - if (!(msg.event & PM_EVENT_AUTO)) + if (!PMSG_IS_AUTO(msg)) status = 0; if (status != 0) break; @@ -1199,7 +1198,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) status = usb_suspend_device(udev, msg); /* Again, ignore errors during system sleep transitions */ - if (!(msg.event & PM_EVENT_AUTO)) + if (!PMSG_IS_AUTO(msg)) status = 0; } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 8669ba3..da582f4 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1960,7 +1960,7 @@ int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg) int old_state = hcd->state; dev_dbg(&rhdev->dev, "bus %s%s\n", - (msg.event & PM_EVENT_AUTO ? "auto-" : ""), "suspend"); + (PMSG_IS_AUTO(msg) ? "auto-" : ""), "suspend"); if (HCD_DEAD(hcd)) { dev_dbg(&rhdev->dev, "skipped %s of dead bus\n", "suspend"); return 0; @@ -1996,7 +1996,7 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg) int old_state = hcd->state; dev_dbg(&rhdev->dev, "usb %s%s\n", - (msg.event & PM_EVENT_AUTO ? "auto-" : ""), "resume"); + (PMSG_IS_AUTO(msg) ? "auto-" : ""), "resume"); if (HCD_DEAD(hcd)) { dev_dbg(&rhdev->dev, "skipped %s of dead bus\n", "resume"); return 0; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a428aa0..ee50e0b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2342,7 +2342,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) dev_dbg(&udev->dev, "won't remote wakeup, status %d\n", status); /* bail if autosuspend is requested */ - if (msg.event & PM_EVENT_AUTO) + if (PMSG_IS_AUTO(msg)) return status; } } @@ -2367,12 +2367,12 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) USB_CTRL_SET_TIMEOUT); /* System sleep transitions should never fail */ - if (!(msg.event & PM_EVENT_AUTO)) + if (!PMSG_IS_AUTO(msg)) status = 0; } else { /* device has up to 10 msec to fully suspend */ dev_dbg(&udev->dev, "usb %ssuspend\n", - (msg.event & PM_EVENT_AUTO ? "auto-" : "")); + (PMSG_IS_AUTO(msg) ? "auto-" : "")); usb_set_device_state(udev, USB_STATE_SUSPENDED); msleep(10); } @@ -2523,7 +2523,7 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) } else { /* drive resume for at least 20 msec */ dev_dbg(&udev->dev, "usb %sresume\n", - (msg.event & PM_EVENT_AUTO ? "auto-" : "")); + (PMSG_IS_AUTO(msg) ? "auto-" : "")); msleep(25); /* Virtual root hubs can trigger on GET_PORT_STATUS to @@ -2625,7 +2625,7 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) udev = hdev->children [port1-1]; if (udev && udev->can_submit) { dev_warn(&intf->dev, "port %d nyet suspended\n", port1); - if (msg.event & PM_EVENT_AUTO) + if (PMSG_IS_AUTO(msg)) return -EBUSY; } } diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index d5d136a..b18179b 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -1009,7 +1009,7 @@ static int sierra_suspend(struct usb_serial *serial, pm_message_t message) struct sierra_intf_private *intfdata; int b; - if (message.event & PM_EVENT_AUTO) { + if (PMSG_IS_AUTO(message)) { intfdata = serial->private; spin_lock_irq(&intfdata->susp_lock); b = intfdata->in_flight; diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index e4fad5e..d555ca9 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -651,7 +651,7 @@ int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message) dbg("%s entered", __func__); - if (message.event & PM_EVENT_AUTO) { + if (PMSG_IS_AUTO(message)) { spin_lock_irq(&intfdata->susp_lock); b = intfdata->in_flight; spin_unlock_irq(&intfdata->susp_lock); diff --git a/include/linux/pm.h b/include/linux/pm.h index f7c84c9..18de9f8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -366,6 +366,8 @@ extern struct dev_pm_ops generic_subsys_pm_ops; #define PMSG_AUTO_RESUME ((struct pm_message) \ { .event = PM_EVENT_AUTO_RESUME, }) +#define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0) + /** * Device run-time power management status. * diff --git a/sound/usb/card.c b/sound/usb/card.c index 781d9e6..d5754fa 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -628,7 +628,7 @@ static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message) if (chip == (void *)-1L) return 0; - if (!(message.event & PM_EVENT_AUTO)) { + if (!PMSG_IS_AUTO(message)) { snd_power_change_state(chip->card, SNDRV_CTL_POWER_D3hot); if (!chip->num_suspended_intf++) { list_for_each(p, &chip->pcm_list) { -- cgit v0.10.2 From c4bb3160c8823d3a1e581d7e05fb8b343097e7c8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:04 +0200 Subject: PM / Domains: Implement subdomain counters as atomic fields Currently, pm_genpd_poweron() and pm_genpd_poweroff() need to take the parent PM domain's lock in order to modify the parent's counter of active subdomains in a nonracy way. This causes the locking to be considerably complex and in fact is not necessary, because the subdomain counters may be implemented as atomic fields and they won't have to be modified under a lock. Replace the unsigned in sd_count field in struct generic_pm_domain by an atomic_t one and modify the code in drivers/base/power/domain.c to take this change into account. This patch doesn't change the locking yet, that is going to be done in a separate subsequent patch. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1c37457..20e2b52 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -29,10 +29,20 @@ static struct generic_pm_domain *dev_to_genpd(struct device *dev) return pd_to_genpd(dev->pm_domain); } -static void genpd_sd_counter_dec(struct generic_pm_domain *genpd) +static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd) { - if (!WARN_ON(genpd->sd_count == 0)) - genpd->sd_count--; + bool ret = false; + + if (!WARN_ON(atomic_read(&genpd->sd_count) == 0)) + ret = !!atomic_dec_and_test(&genpd->sd_count); + + return ret; +} + +static void genpd_sd_counter_inc(struct generic_pm_domain *genpd) +{ + atomic_inc(&genpd->sd_count); + smp_mb__after_atomic_inc(); } static void genpd_acquire_lock(struct generic_pm_domain *genpd) @@ -118,7 +128,7 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) genpd_set_active(genpd); if (parent) - parent->sd_count++; + genpd_sd_counter_inc(parent); out: mutex_unlock(&genpd->lock); @@ -254,7 +264,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) || genpd->resume_count > 0) return 0; - if (genpd->sd_count > 0) + if (atomic_read(&genpd->sd_count) > 0) return -EBUSY; not_suspended = 0; @@ -325,8 +335,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd->status = GPD_STATE_POWER_OFF; if (parent) { - genpd_sd_counter_dec(parent); - if (parent->sd_count == 0) + if (genpd_sd_counter_dec(parent)) genpd_queue_power_off_work(parent); genpd_release_lock(parent); @@ -506,7 +515,8 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd) if (genpd->status == GPD_STATE_POWER_OFF) return; - if (genpd->suspended_count != genpd->device_count || genpd->sd_count > 0) + if (genpd->suspended_count != genpd->device_count + || atomic_read(&genpd->sd_count) > 0) return; if (genpd->power_off) @@ -1167,7 +1177,7 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, list_add_tail(&new_subdomain->sd_node, &genpd->sd_list); new_subdomain->parent = genpd; if (subdomain->status != GPD_STATE_POWER_OFF) - genpd->sd_count++; + genpd_sd_counter_inc(genpd); out: mutex_unlock(&new_subdomain->lock); @@ -1242,7 +1252,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); genpd->in_progress = 0; - genpd->sd_count = 0; + atomic_set(&genpd->sd_count, 0); genpd->status = is_off ? GPD_STATE_POWER_OFF : GPD_STATE_ACTIVE; init_waitqueue_head(&genpd->status_wait_queue); genpd->poweroff_task = NULL; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f9ec173..81c5782 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -33,7 +33,7 @@ struct generic_pm_domain { struct dev_power_governor *gov; struct work_struct power_off_work; unsigned int in_progress; /* Number of devices being suspended now */ - unsigned int sd_count; /* Number of subdomains with power "on" */ + atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ wait_queue_head_t status_wait_queue; struct task_struct *poweroff_task; /* Powering off task */ -- cgit v0.10.2 From 3c07cbc488bfd1ad1abf64d09cc692339b5f8a83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:14 +0200 Subject: PM / Domains: Do not take parent locks to modify subdomain counters After the subdomain counter in struct generic_pm_domain has been changed into an atomic_t field, it is possible to modify pm_genpd_poweron() and pm_genpd_poweroff() so that they don't take the parents locks. This requires pm_genpd_poweron() to increment the parent's subdomain counter before calling itself recursively for the parent and to decrement it if an error is to be returned. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 20e2b52..ef25b6f 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -93,12 +93,7 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) int ret = 0; start: - if (parent) { - genpd_acquire_lock(parent); - mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); - } else { - mutex_lock(&genpd->lock); - } + mutex_lock(&genpd->lock); if (genpd->status == GPD_STATE_ACTIVE || (genpd->prepared_count > 0 && genpd->suspend_power_off)) @@ -109,31 +104,33 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) goto out; } - if (parent && parent->status != GPD_STATE_ACTIVE) { + if (parent) { + genpd_sd_counter_inc(parent); + mutex_unlock(&genpd->lock); - genpd_release_lock(parent); ret = pm_genpd_poweron(parent); - if (ret) + if (ret) { + genpd_sd_counter_dec(parent); return ret; + } + parent = NULL; goto start; } - if (genpd->power_on) { + if (genpd->power_on) ret = genpd->power_on(genpd); - if (ret) - goto out; - } - genpd_set_active(genpd); - if (parent) - genpd_sd_counter_inc(parent); + if (ret) { + if (genpd->parent) + genpd_sd_counter_dec(genpd->parent); + } else { + genpd_set_active(genpd); + } out: mutex_unlock(&genpd->lock); - if (parent) - genpd_release_lock(parent); return ret; } @@ -293,7 +290,8 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd->poweroff_task = current; list_for_each_entry_reverse(dle, &genpd->dev_list, node) { - ret = __pm_genpd_save_device(dle, genpd); + ret = atomic_read(&genpd->sd_count) == 0 ? + __pm_genpd_save_device(dle, genpd) : -EBUSY; if (ret) { genpd_set_active(genpd); goto out; @@ -308,38 +306,32 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } } - parent = genpd->parent; - if (parent) { - mutex_unlock(&genpd->lock); - - genpd_acquire_lock(parent); - mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); - - if (genpd_abort_poweroff(genpd)) { - genpd_release_lock(parent); + if (genpd->power_off) { + if (atomic_read(&genpd->sd_count) > 0) { + ret = -EBUSY; goto out; } - } - if (genpd->power_off) { + /* + * If sd_count > 0 at this point, one of the children hasn't + * managed to call pm_genpd_poweron() for the parent yet after + * incrementing it. In that case pm_genpd_poweron() will wait + * for us to drop the lock, so we can call .power_off() and let + * the pm_genpd_poweron() restore power for us (this shouldn't + * happen very often). + */ ret = genpd->power_off(genpd); if (ret == -EBUSY) { genpd_set_active(genpd); - if (parent) - genpd_release_lock(parent); - goto out; } } genpd->status = GPD_STATE_POWER_OFF; - if (parent) { - if (genpd_sd_counter_dec(parent)) - genpd_queue_power_off_work(parent); - - genpd_release_lock(parent); - } + parent = genpd->parent; + if (parent && genpd_sd_counter_dec(parent)) + genpd_queue_power_off_work(parent); out: genpd->poweroff_task = NULL; -- cgit v0.10.2 From 9e08cf429697090d0fac57d493dc7b6de17a5eee Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:22 +0200 Subject: PM / Domains: Make pm_genpd_poweron() always survive parent removal If pm_genpd_remove_subdomain() is called to remove a PM domain's subdomain and pm_genpd_poweron() is called for that subdomain at the same time, and the pm_genpd_poweron() called by it recursively for the parent returns an error, the first pm_genpd_poweron()'s error code path will attempt to decrement the subdomain counter of a PM domain that it's not a subdomain of any more. Rearrange the code in pm_genpd_poweron() to prevent this from happening. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ef25b6f..dc423a9 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -89,12 +89,14 @@ static void genpd_set_active(struct generic_pm_domain *genpd) */ int pm_genpd_poweron(struct generic_pm_domain *genpd) { - struct generic_pm_domain *parent = genpd->parent; + struct generic_pm_domain *parent; int ret = 0; - start: mutex_lock(&genpd->lock); + parent = genpd->parent; + + start: if (genpd->status == GPD_STATE_ACTIVE || (genpd->prepared_count > 0 && genpd->suspend_power_off)) goto out; @@ -110,29 +112,34 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) mutex_unlock(&genpd->lock); ret = pm_genpd_poweron(parent); - if (ret) { - genpd_sd_counter_dec(parent); - return ret; - } + + mutex_lock(&genpd->lock); + + if (ret) + goto err; parent = NULL; goto start; } - if (genpd->power_on) + if (genpd->power_on) { ret = genpd->power_on(genpd); - - if (ret) { - if (genpd->parent) - genpd_sd_counter_dec(genpd->parent); - } else { - genpd_set_active(genpd); + if (ret) + goto err; } + genpd_set_active(genpd); + out: mutex_unlock(&genpd->lock); return ret; + + err: + if (genpd->parent) + genpd_sd_counter_dec(genpd->parent); + + goto out; } #endif /* CONFIG_PM */ -- cgit v0.10.2 From 3f241775c30365c33a0d2f6d40f4cf12470f48c6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:29 +0200 Subject: PM / Domains: Add "wait for parent" status for generic PM domains The next patch will make it possible for a generic PM domain to have multiple parents (i.e. multiple PM domains it depends on). To prepare for that change it is necessary to change pm_genpd_poweron() so that it doesn't jump to the start label after running itself recursively for the parent domain. For this purpose, introduce a new PM domain status value GPD_STATE_WAIT_PARENT that will be set by pm_genpd_poweron() before calling itself recursively for the parent domain and modify the code in drivers/base/power/domain.c so that the GPD_STATE_WAIT_PARENT status is guaranteed to be preserved during the execution of pm_genpd_poweron() for the parent. This change also causes pm_genpd_add_subdomain() and pm_genpd_remove_subdomain() to wait for started pm_genpd_poweron() to complete and allows pm_genpd_runtime_resume() to avoid dropping the lock after powering on the PM domain. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index dc423a9..1f4b132 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -81,45 +81,59 @@ static void genpd_set_active(struct generic_pm_domain *genpd) } /** - * pm_genpd_poweron - Restore power to a given PM domain and its parents. + * __pm_genpd_poweron - Restore power to a given PM domain and its parents. * @genpd: PM domain to power up. * * Restore power to @genpd and all of its parents so that it is possible to * resume a device belonging to it. */ -int pm_genpd_poweron(struct generic_pm_domain *genpd) +int __pm_genpd_poweron(struct generic_pm_domain *genpd) + __releases(&genpd->lock) __acquires(&genpd->lock) { - struct generic_pm_domain *parent; + DEFINE_WAIT(wait); int ret = 0; - mutex_lock(&genpd->lock); + /* If the domain's parent is being waited for, we have to wait too. */ + for (;;) { + prepare_to_wait(&genpd->status_wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (genpd->status != GPD_STATE_WAIT_PARENT) + break; + mutex_unlock(&genpd->lock); - parent = genpd->parent; + schedule(); + + mutex_lock(&genpd->lock); + } + finish_wait(&genpd->status_wait_queue, &wait); - start: if (genpd->status == GPD_STATE_ACTIVE || (genpd->prepared_count > 0 && genpd->suspend_power_off)) - goto out; + return 0; if (genpd->status != GPD_STATE_POWER_OFF) { genpd_set_active(genpd); - goto out; + return 0; } - if (parent) { - genpd_sd_counter_inc(parent); + if (genpd->parent) { + genpd_sd_counter_inc(genpd->parent); + genpd->status = GPD_STATE_WAIT_PARENT; mutex_unlock(&genpd->lock); - ret = pm_genpd_poweron(parent); + ret = pm_genpd_poweron(genpd->parent); mutex_lock(&genpd->lock); + /* + * The "wait for parent" status is guaranteed not to change + * while the parent is powering on. + */ + genpd->status = GPD_STATE_POWER_OFF; + wake_up_all(&genpd->status_wait_queue); if (ret) goto err; - - parent = NULL; - goto start; } if (genpd->power_on) { @@ -130,16 +144,27 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) genpd_set_active(genpd); - out: - mutex_unlock(&genpd->lock); - - return ret; + return 0; err: if (genpd->parent) genpd_sd_counter_dec(genpd->parent); - goto out; + return ret; +} + +/** + * pm_genpd_poweron - Restore power to a given PM domain and its parents. + * @genpd: PM domain to power up. + */ +int pm_genpd_poweron(struct generic_pm_domain *genpd) +{ + int ret; + + mutex_lock(&genpd->lock); + ret = __pm_genpd_poweron(genpd); + mutex_unlock(&genpd->lock); + return ret; } #endif /* CONFIG_PM */ @@ -225,7 +250,8 @@ static void __pm_genpd_restore_device(struct dev_list_entry *dle, */ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd) { - return genpd->status == GPD_STATE_ACTIVE || genpd->resume_count > 0; + return genpd->status == GPD_STATE_WAIT_PARENT + || genpd->status == GPD_STATE_ACTIVE || genpd->resume_count > 0; } /** @@ -261,11 +287,13 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) /* * Do not try to power off the domain in the following situations: * (1) The domain is already in the "power off" state. - * (2) System suspend is in progress. + * (2) The domain is waiting for its parent to power up. * (3) One of the domain's devices is being resumed right now. + * (4) System suspend is in progress. */ - if (genpd->status == GPD_STATE_POWER_OFF || genpd->prepared_count > 0 - || genpd->resume_count > 0) + if (genpd->status == GPD_STATE_POWER_OFF + || genpd->status == GPD_STATE_WAIT_PARENT + || genpd->resume_count > 0 || genpd->prepared_count > 0) return 0; if (atomic_read(&genpd->sd_count) > 0) @@ -299,14 +327,15 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) list_for_each_entry_reverse(dle, &genpd->dev_list, node) { ret = atomic_read(&genpd->sd_count) == 0 ? __pm_genpd_save_device(dle, genpd) : -EBUSY; + + if (genpd_abort_poweroff(genpd)) + goto out; + if (ret) { genpd_set_active(genpd); goto out; } - if (genpd_abort_poweroff(genpd)) - goto out; - if (genpd->status == GPD_STATE_REPEAT) { genpd->poweroff_task = NULL; goto start; @@ -432,11 +461,12 @@ static int pm_genpd_runtime_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - ret = pm_genpd_poweron(genpd); - if (ret) - return ret; - mutex_lock(&genpd->lock); + ret = __pm_genpd_poweron(genpd); + if (ret) { + mutex_unlock(&genpd->lock); + return ret; + } genpd->status = GPD_STATE_BUSY; genpd->resume_count++; for (;;) { diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 81c5782..97e3f8e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -13,6 +13,7 @@ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ + GPD_STATE_WAIT_PARENT, /* PM domain's parent is being waited for */ GPD_STATE_BUSY, /* Something is happening to the PM domain */ GPD_STATE_REPEAT, /* Power off in progress, to be repeated */ GPD_STATE_POWER_OFF, /* PM domain is off */ -- cgit v0.10.2 From 5063ce1571b73865cbdcd92db002e85809750c97 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:40 +0200 Subject: PM / Domains: Allow generic PM domains to have multiple masters Currently, for a given generic PM domain there may be only one parent domain (i.e. a PM domain it depends on). However, there is at least one real-life case in which there should be two parents (masters) for one PM domain (the A3RV domain on SH7372 turns out to depend on the A4LC domain and it depends on the A4R domain and the same time). For this reason, allow a PM domain to have multiple parents (masters) by introducing objects representing links between PM domains. The (logical) links between PM domains represent relationships in which one domain is a master (i.e. it is depended on) and another domain is a slave (i.e. it depends on the master) with the rule that the slave cannot be powered on if the master is not powered on and the master cannot be powered off if the slave is not powered off. Each struct generic_pm_domain object representing a PM domain has two lists of links, a list of links in which it is a master and a list of links in which it is a slave. The first of these lists replaces the list of subdomains and the second one is used in place of the parent pointer. Each link is represented by struct gpd_link object containing pointers to the master and the slave and two struct list_head members allowing it to hook into two lists (the master's list of "master" links and the slave's list of "slave" links). This allows the code to get to the link from each side (either from the master or from the slave) and follow it in each direction. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1f4b132..8fc538d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -81,19 +81,20 @@ static void genpd_set_active(struct generic_pm_domain *genpd) } /** - * __pm_genpd_poweron - Restore power to a given PM domain and its parents. + * __pm_genpd_poweron - Restore power to a given PM domain and its masters. * @genpd: PM domain to power up. * - * Restore power to @genpd and all of its parents so that it is possible to + * Restore power to @genpd and all of its masters so that it is possible to * resume a device belonging to it. */ int __pm_genpd_poweron(struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { + struct gpd_link *link; DEFINE_WAIT(wait); int ret = 0; - /* If the domain's parent is being waited for, we have to wait too. */ + /* If the domain's master is being waited for, we have to wait too. */ for (;;) { prepare_to_wait(&genpd->status_wait_queue, &wait, TASK_UNINTERRUPTIBLE); @@ -116,24 +117,31 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) return 0; } - if (genpd->parent) { - genpd_sd_counter_inc(genpd->parent); + /* + * The list is guaranteed not to change while the loop below is being + * executed, unless one of the masters' .power_on() callbacks fiddles + * with it. + */ + list_for_each_entry(link, &genpd->slave_links, slave_node) { + genpd_sd_counter_inc(link->master); genpd->status = GPD_STATE_WAIT_PARENT; mutex_unlock(&genpd->lock); - ret = pm_genpd_poweron(genpd->parent); + ret = pm_genpd_poweron(link->master); mutex_lock(&genpd->lock); /* * The "wait for parent" status is guaranteed not to change - * while the parent is powering on. + * while the master is powering on. */ genpd->status = GPD_STATE_POWER_OFF; wake_up_all(&genpd->status_wait_queue); - if (ret) + if (ret) { + genpd_sd_counter_dec(link->master); goto err; + } } if (genpd->power_on) { @@ -147,14 +155,14 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) return 0; err: - if (genpd->parent) - genpd_sd_counter_dec(genpd->parent); + list_for_each_entry_continue_reverse(link, &genpd->slave_links, slave_node) + genpd_sd_counter_dec(link->master); return ret; } /** - * pm_genpd_poweron - Restore power to a given PM domain and its parents. + * pm_genpd_poweron - Restore power to a given PM domain and its masters. * @genpd: PM domain to power up. */ int pm_genpd_poweron(struct generic_pm_domain *genpd) @@ -278,8 +286,8 @@ void genpd_queue_power_off_work(struct generic_pm_domain *genpd) static int pm_genpd_poweroff(struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { - struct generic_pm_domain *parent; struct dev_list_entry *dle; + struct gpd_link *link; unsigned int not_suspended; int ret = 0; @@ -287,7 +295,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) /* * Do not try to power off the domain in the following situations: * (1) The domain is already in the "power off" state. - * (2) The domain is waiting for its parent to power up. + * (2) The domain is waiting for its master to power up. * (3) One of the domain's devices is being resumed right now. * (4) System suspend is in progress. */ @@ -349,8 +357,8 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } /* - * If sd_count > 0 at this point, one of the children hasn't - * managed to call pm_genpd_poweron() for the parent yet after + * If sd_count > 0 at this point, one of the subdomains hasn't + * managed to call pm_genpd_poweron() for the master yet after * incrementing it. In that case pm_genpd_poweron() will wait * for us to drop the lock, so we can call .power_off() and let * the pm_genpd_poweron() restore power for us (this shouldn't @@ -365,9 +373,10 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd->status = GPD_STATE_POWER_OFF; - parent = genpd->parent; - if (parent && genpd_sd_counter_dec(parent)) - genpd_queue_power_off_work(parent); + list_for_each_entry(link, &genpd->slave_links, slave_node) { + genpd_sd_counter_dec(link->master); + genpd_queue_power_off_work(link->master); + } out: genpd->poweroff_task = NULL; @@ -527,11 +536,11 @@ static inline void __pm_genpd_runtime_resume(struct device *dev, #ifdef CONFIG_PM_SLEEP /** - * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its parents. + * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. * * Check if the given PM domain can be powered off (during system suspend or - * hibernation) and do that if so. Also, in that case propagate to its parent. + * hibernation) and do that if so. Also, in that case propagate to its masters. * * This function is only called in "noirq" stages of system power transitions, * so it need not acquire locks (all of the "noirq" callbacks are executed @@ -539,7 +548,7 @@ static inline void __pm_genpd_runtime_resume(struct device *dev, */ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd) { - struct generic_pm_domain *parent = genpd->parent; + struct gpd_link *link; if (genpd->status == GPD_STATE_POWER_OFF) return; @@ -552,9 +561,10 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd) genpd->power_off(genpd); genpd->status = GPD_STATE_POWER_OFF; - if (parent) { - genpd_sd_counter_dec(parent); - pm_genpd_sync_poweroff(parent); + + list_for_each_entry(link, &genpd->slave_links, slave_node) { + genpd_sd_counter_dec(link->master); + pm_genpd_sync_poweroff(link->master); } } @@ -1173,7 +1183,7 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain) { - struct generic_pm_domain *subdomain; + struct gpd_link *link; int ret = 0; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(new_subdomain)) @@ -1196,16 +1206,23 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(subdomain, &genpd->sd_list, sd_node) { - if (subdomain == new_subdomain) { + list_for_each_entry(link, &genpd->slave_links, slave_node) { + if (link->slave == new_subdomain && link->master == genpd) { ret = -EINVAL; goto out; } } - list_add_tail(&new_subdomain->sd_node, &genpd->sd_list); - new_subdomain->parent = genpd; - if (subdomain->status != GPD_STATE_POWER_OFF) + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + ret = -ENOMEM; + goto out; + } + link->master = genpd; + list_add_tail(&link->master_node, &genpd->master_links); + link->slave = new_subdomain; + list_add_tail(&link->slave_node, &new_subdomain->slave_links); + if (new_subdomain->status != GPD_STATE_POWER_OFF) genpd_sd_counter_inc(genpd); out: @@ -1218,22 +1235,22 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, /** * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain. * @genpd: Master PM domain to remove the subdomain from. - * @target: Subdomain to be removed. + * @subdomain: Subdomain to be removed. */ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *target) + struct generic_pm_domain *subdomain) { - struct generic_pm_domain *subdomain; + struct gpd_link *link; int ret = -EINVAL; - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(target)) + if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) return -EINVAL; start: genpd_acquire_lock(genpd); - list_for_each_entry(subdomain, &genpd->sd_list, sd_node) { - if (subdomain != target) + list_for_each_entry(link, &genpd->master_links, master_node) { + if (link->slave != subdomain) continue; mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); @@ -1245,8 +1262,9 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, goto start; } - list_del(&subdomain->sd_node); - subdomain->parent = NULL; + list_del(&link->master_node); + list_del(&link->slave_node); + kfree(link); if (subdomain->status != GPD_STATE_POWER_OFF) genpd_sd_counter_dec(genpd); @@ -1273,10 +1291,9 @@ void pm_genpd_init(struct generic_pm_domain *genpd, if (IS_ERR_OR_NULL(genpd)) return; - INIT_LIST_HEAD(&genpd->sd_node); - genpd->parent = NULL; + INIT_LIST_HEAD(&genpd->master_links); + INIT_LIST_HEAD(&genpd->slave_links); INIT_LIST_HEAD(&genpd->dev_list); - INIT_LIST_HEAD(&genpd->sd_list); mutex_init(&genpd->lock); genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 97e3f8e..5f5154d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -26,9 +26,8 @@ struct dev_power_governor { struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ - struct list_head sd_node; /* Node in the parent's subdomain list */ - struct generic_pm_domain *parent; /* Parent PM domain */ - struct list_head sd_list; /* List of dubdomains */ + struct list_head master_links; /* Links with PM domain as a master */ + struct list_head slave_links; /* Links with PM domain as a slave */ struct list_head dev_list; /* List of devices */ struct mutex lock; struct dev_power_governor *gov; @@ -55,6 +54,13 @@ static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) return container_of(pd, struct generic_pm_domain, domain); } +struct gpd_link { + struct generic_pm_domain *master; + struct list_head master_node; + struct generic_pm_domain *slave; + struct list_head slave_node; +}; + struct dev_list_entry { struct list_head node; struct device *dev; -- cgit v0.10.2 From 17877eb5a900f32bb5827a7b2109b6c9adff5fc3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:50 +0200 Subject: PM / Domains: Rename GPD_STATE_WAIT_PARENT to GPD_STATE_WAIT_MASTER Since it is now possible for a PM domain to have multiple masters instead of one parent, rename the "wait for parent" status to reflect the new situation. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 8fc538d..c06f8f8 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -98,7 +98,7 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) for (;;) { prepare_to_wait(&genpd->status_wait_queue, &wait, TASK_UNINTERRUPTIBLE); - if (genpd->status != GPD_STATE_WAIT_PARENT) + if (genpd->status != GPD_STATE_WAIT_MASTER) break; mutex_unlock(&genpd->lock); @@ -124,7 +124,7 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) */ list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_inc(link->master); - genpd->status = GPD_STATE_WAIT_PARENT; + genpd->status = GPD_STATE_WAIT_MASTER; mutex_unlock(&genpd->lock); @@ -258,7 +258,7 @@ static void __pm_genpd_restore_device(struct dev_list_entry *dle, */ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd) { - return genpd->status == GPD_STATE_WAIT_PARENT + return genpd->status == GPD_STATE_WAIT_MASTER || genpd->status == GPD_STATE_ACTIVE || genpd->resume_count > 0; } @@ -300,7 +300,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) * (4) System suspend is in progress. */ if (genpd->status == GPD_STATE_POWER_OFF - || genpd->status == GPD_STATE_WAIT_PARENT + || genpd->status == GPD_STATE_WAIT_MASTER || genpd->resume_count > 0 || genpd->prepared_count > 0) return 0; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 5f5154d..bf679f5 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -13,7 +13,7 @@ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ - GPD_STATE_WAIT_PARENT, /* PM domain's parent is being waited for */ + GPD_STATE_WAIT_MASTER, /* PM domain's master is being waited for */ GPD_STATE_BUSY, /* Something is happening to the PM domain */ GPD_STATE_REPEAT, /* Power off in progress, to be repeated */ GPD_STATE_POWER_OFF, /* PM domain is off */ -- cgit v0.10.2 From bc0403ff16e5305c3a14c2b0826616ceaabbf058 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 23:43:59 +0200 Subject: PM / Domains: Rename argument of pm_genpd_add_subdomain() Change the name of the second argument of pm_genpd_add_subdomain() so that it is (a) shorter and (b) in agreement with the name of the second argument of pm_genpd_add_subdomain(). Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c06f8f8..1fc6cc9 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1178,36 +1178,36 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, /** * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. * @genpd: Master PM domain to add the subdomain to. - * @new_subdomain: Subdomain to be added. + * @subdomain: Subdomain to be added. */ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *new_subdomain) + struct generic_pm_domain *subdomain) { struct gpd_link *link; int ret = 0; - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(new_subdomain)) + if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) return -EINVAL; start: genpd_acquire_lock(genpd); - mutex_lock_nested(&new_subdomain->lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); - if (new_subdomain->status != GPD_STATE_POWER_OFF - && new_subdomain->status != GPD_STATE_ACTIVE) { - mutex_unlock(&new_subdomain->lock); + if (subdomain->status != GPD_STATE_POWER_OFF + && subdomain->status != GPD_STATE_ACTIVE) { + mutex_unlock(&subdomain->lock); genpd_release_lock(genpd); goto start; } if (genpd->status == GPD_STATE_POWER_OFF - && new_subdomain->status != GPD_STATE_POWER_OFF) { + && subdomain->status != GPD_STATE_POWER_OFF) { ret = -EINVAL; goto out; } list_for_each_entry(link, &genpd->slave_links, slave_node) { - if (link->slave == new_subdomain && link->master == genpd) { + if (link->slave == subdomain && link->master == genpd) { ret = -EINVAL; goto out; } @@ -1220,13 +1220,13 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, } link->master = genpd; list_add_tail(&link->master_node, &genpd->master_links); - link->slave = new_subdomain; - list_add_tail(&link->slave_node, &new_subdomain->slave_links); - if (new_subdomain->status != GPD_STATE_POWER_OFF) + link->slave = subdomain; + list_add_tail(&link->slave_node, &subdomain->slave_links); + if (subdomain->status != GPD_STATE_POWER_OFF) genpd_sd_counter_inc(genpd); out: - mutex_unlock(&new_subdomain->lock); + mutex_unlock(&subdomain->lock); genpd_release_lock(genpd); return ret; -- cgit v0.10.2 From 111058c3ff29a6a25216b31789046c2a330baa7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 Aug 2011 13:35:39 +0200 Subject: ARM / shmobile: Make A3RV be a subdomain of A4LC on SH7372 Instead of coding the undocumented dependencies between power domains A3RV and A4LC on SH7372 directly into the low-level power up/down routines, make A3RV be a subdomain of A4LC, which will cause the same dependecies to hold. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index ce595ce..713cd21 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -494,9 +494,12 @@ extern struct sh7372_pm_domain sh7372_a3sg; extern void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd); extern void sh7372_add_device_to_domain(struct sh7372_pm_domain *sh7372_pd, struct platform_device *pdev); +extern void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, + struct sh7372_pm_domain *sh7372_sd); #else #define sh7372_init_pm_domain(pd) do { } while(0) #define sh7372_add_device_to_domain(pd, pdev) do { } while(0) +#define sh7372_pm_add_subdomain(pd, sd) do { } while(0) #endif /* CONFIG_PM */ #endif /* __ASM_SH7372_H__ */ diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 933fb41..b471b79 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -91,35 +91,6 @@ static int pd_power_up(struct generic_pm_domain *genpd) return ret; } -static int pd_power_up_a3rv(struct generic_pm_domain *genpd) -{ - int ret = pd_power_up(genpd); - - /* force A4LC on after A3RV has been requested on */ - pm_genpd_poweron(&sh7372_a4lc.genpd); - - return ret; -} - -static int pd_power_down_a3rv(struct generic_pm_domain *genpd) -{ - int ret = pd_power_down(genpd); - - /* try to power down A4LC after A3RV is requested off */ - genpd_queue_power_off_work(&sh7372_a4lc.genpd); - - return ret; -} - -static int pd_power_down_a4lc(struct generic_pm_domain *genpd) -{ - /* only power down A4LC if A3RV is off */ - if (!(__raw_readl(PSTR) & (1 << sh7372_a3rv.bit_shift))) - return pd_power_down(genpd); - - return -EBUSY; -} - static bool pd_active_wakeup(struct device *dev) { return true; @@ -133,17 +104,8 @@ void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) genpd->stop_device = pm_clk_suspend; genpd->start_device = pm_clk_resume; genpd->active_wakeup = pd_active_wakeup; - - if (sh7372_pd == &sh7372_a4lc) { - genpd->power_off = pd_power_down_a4lc; - genpd->power_on = pd_power_up; - } else if (sh7372_pd == &sh7372_a3rv) { - genpd->power_off = pd_power_down_a3rv; - genpd->power_on = pd_power_up_a3rv; - } else { - genpd->power_off = pd_power_down; - genpd->power_on = pd_power_up; - } + genpd->power_off = pd_power_down; + genpd->power_on = pd_power_up; genpd->power_on(&sh7372_pd->genpd); } @@ -159,6 +121,12 @@ void sh7372_add_device_to_domain(struct sh7372_pm_domain *sh7372_pd, pm_genpd_add_device(&sh7372_pd->genpd, dev); } +void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, + struct sh7372_pm_domain *sh7372_sd) +{ + pm_genpd_add_subdomain(&sh7372_pd->genpd, &sh7372_sd->genpd); +} + struct sh7372_pm_domain sh7372_a4lc = { .bit_shift = 1, }; diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index 79f0413..a12ee41 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -848,6 +849,8 @@ void __init sh7372_add_standard_devices(void) sh7372_init_pm_domain(&sh7372_a3ri); sh7372_init_pm_domain(&sh7372_a3sg); + sh7372_pm_add_subdomain(&sh7372_a4lc, &sh7372_a3rv); + platform_add_devices(sh7372_early_devices, ARRAY_SIZE(sh7372_early_devices)); -- cgit v0.10.2 From 5c095a0e0d600d5a5a4207eaadabd18db46395ce Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:33:50 +0200 Subject: PM: Introduce struct pm_subsys_data Introduce struct pm_subsys_data that may be subclassed by subsystems to store subsystem-specific information related to the device. Move the clock management fields accessed through the power.subsys_data pointer in struct device to the new strucutre. Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index b471b79..54d4d86 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -115,7 +115,7 @@ void sh7372_add_device_to_domain(struct sh7372_pm_domain *sh7372_pd, struct device *dev = &pdev->dev; if (!dev->power.subsys_data) { - pm_clk_init(dev); + pm_clk_create(dev); pm_clk_add(dev, NULL); } pm_genpd_add_device(&sh7372_pd->genpd, dev); diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 2c18d58..b7f1db4 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -17,11 +17,6 @@ #ifdef CONFIG_PM -struct pm_clk_data { - struct list_head clock_list; - spinlock_t lock; -}; - enum pce_status { PCE_STATUS_NONE = 0, PCE_STATUS_ACQUIRED, @@ -36,11 +31,6 @@ struct pm_clock_entry { enum pce_status status; }; -static struct pm_clk_data *__to_pcd(struct device *dev) -{ - return dev ? dev->power.subsys_data : NULL; -} - /** * pm_clk_add - Start using a device clock for power management. * @dev: Device whose clock is going to be used for power management. @@ -51,10 +41,10 @@ static struct pm_clk_data *__to_pcd(struct device *dev) */ int pm_clk_add(struct device *dev, const char *con_id) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; - if (!pcd) + if (!psd) return -EINVAL; ce = kzalloc(sizeof(*ce), GFP_KERNEL); @@ -73,9 +63,9 @@ int pm_clk_add(struct device *dev, const char *con_id) } } - spin_lock_irq(&pcd->lock); - list_add_tail(&ce->node, &pcd->clock_list); - spin_unlock_irq(&pcd->lock); + spin_lock_irq(&psd->lock); + list_add_tail(&ce->node, &psd->clock_list); + spin_unlock_irq(&psd->lock); return 0; } @@ -117,15 +107,15 @@ static void __pm_clk_remove(struct pm_clock_entry *ce) */ void pm_clk_remove(struct device *dev, const char *con_id) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; - if (!pcd) + if (!psd) return; - spin_lock_irq(&pcd->lock); + spin_lock_irq(&psd->lock); - list_for_each_entry(ce, &pcd->clock_list, node) { + list_for_each_entry(ce, &psd->clock_list, node) { if (!con_id && !ce->con_id) { __pm_clk_remove(ce); break; @@ -137,29 +127,45 @@ void pm_clk_remove(struct device *dev, const char *con_id) } } - spin_unlock_irq(&pcd->lock); + spin_unlock_irq(&psd->lock); } /** * pm_clk_init - Initialize a device's list of power management clocks. * @dev: Device to initialize the list of PM clocks for. * - * Allocate a struct pm_clk_data object, initialize its lock member and - * make the @dev's power.subsys_data field point to it. + * Initialize the lock and clock_list members of the device's pm_subsys_data + * object. */ -int pm_clk_init(struct device *dev) +void pm_clk_init(struct device *dev) { - struct pm_clk_data *pcd; + struct pm_subsys_data *psd = dev_to_psd(dev); + + if (!psd) + return; - pcd = kzalloc(sizeof(*pcd), GFP_KERNEL); - if (!pcd) { + INIT_LIST_HEAD(&psd->clock_list); + spin_lock_init(&psd->lock); +} + +/** + * pm_clk_create - Create and initialize a device's list of PM clocks. + * @dev: Device to create and initialize the list of PM clocks for. + * + * Allocate a struct pm_subsys_data object, initialize its lock and clock_list + * members and make the @dev's power.subsys_data field point to it. + */ +int pm_clk_create(struct device *dev) +{ + struct pm_subsys_data *psd; + + psd = kzalloc(sizeof(*psd), GFP_KERNEL); + if (!psd) { dev_err(dev, "Not enough memory for PM clock data.\n"); return -ENOMEM; } - - INIT_LIST_HEAD(&pcd->clock_list); - spin_lock_init(&pcd->lock); - dev->power.subsys_data = pcd; + dev->power.subsys_data = psd; + pm_clk_init(dev); return 0; } @@ -168,27 +174,27 @@ int pm_clk_init(struct device *dev) * @dev: Device to destroy the list of PM clocks for. * * Clear the @dev's power.subsys_data field, remove the list of clock entries - * from the struct pm_clk_data object pointed to by it before and free + * from the struct pm_subsys_data object pointed to by it before and free * that object. */ void pm_clk_destroy(struct device *dev) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce, *c; - if (!pcd) + if (!psd) return; dev->power.subsys_data = NULL; - spin_lock_irq(&pcd->lock); + spin_lock_irq(&psd->lock); - list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node) + list_for_each_entry_safe_reverse(ce, c, &psd->clock_list, node) __pm_clk_remove(ce); - spin_unlock_irq(&pcd->lock); + spin_unlock_irq(&psd->lock); - kfree(pcd); + kfree(psd); } #endif /* CONFIG_PM */ @@ -218,18 +224,18 @@ static void pm_clk_acquire(struct device *dev, */ int pm_clk_suspend(struct device *dev) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; dev_dbg(dev, "%s()\n", __func__); - if (!pcd) + if (!psd) return 0; - spin_lock_irqsave(&pcd->lock, flags); + spin_lock_irqsave(&psd->lock, flags); - list_for_each_entry_reverse(ce, &pcd->clock_list, node) { + list_for_each_entry_reverse(ce, &psd->clock_list, node) { if (ce->status == PCE_STATUS_NONE) pm_clk_acquire(dev, ce); @@ -239,7 +245,7 @@ int pm_clk_suspend(struct device *dev) } } - spin_unlock_irqrestore(&pcd->lock, flags); + spin_unlock_irqrestore(&psd->lock, flags); return 0; } @@ -250,18 +256,18 @@ int pm_clk_suspend(struct device *dev) */ int pm_clk_resume(struct device *dev) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; dev_dbg(dev, "%s()\n", __func__); - if (!pcd) + if (!psd) return 0; - spin_lock_irqsave(&pcd->lock, flags); + spin_lock_irqsave(&psd->lock, flags); - list_for_each_entry(ce, &pcd->clock_list, node) { + list_for_each_entry(ce, &psd->clock_list, node) { if (ce->status == PCE_STATUS_NONE) pm_clk_acquire(dev, ce); @@ -271,7 +277,7 @@ int pm_clk_resume(struct device *dev) } } - spin_unlock_irqrestore(&pcd->lock, flags); + spin_unlock_irqrestore(&psd->lock, flags); return 0; } @@ -309,7 +315,7 @@ static int pm_clk_notify(struct notifier_block *nb, if (dev->pm_domain) break; - error = pm_clk_init(dev); + error = pm_clk_create(dev); if (error) break; @@ -344,22 +350,22 @@ static int pm_clk_notify(struct notifier_block *nb, */ int pm_clk_suspend(struct device *dev) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; dev_dbg(dev, "%s()\n", __func__); /* If there is no driver, the clocks are already disabled. */ - if (!pcd || !dev->driver) + if (!psd || !dev->driver) return 0; - spin_lock_irqsave(&pcd->lock, flags); + spin_lock_irqsave(&psd->lock, flags); - list_for_each_entry_reverse(ce, &pcd->clock_list, node) + list_for_each_entry_reverse(ce, &psd->clock_list, node) clk_disable(ce->clk); - spin_unlock_irqrestore(&pcd->lock, flags); + spin_unlock_irqrestore(&psd->lock, flags); return 0; } @@ -370,22 +376,22 @@ int pm_clk_suspend(struct device *dev) */ int pm_clk_resume(struct device *dev) { - struct pm_clk_data *pcd = __to_pcd(dev); + struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; dev_dbg(dev, "%s()\n", __func__); /* If there is no driver, the clocks should remain disabled. */ - if (!pcd || !dev->driver) + if (!psd || !dev->driver) return 0; - spin_lock_irqsave(&pcd->lock, flags); + spin_lock_irqsave(&psd->lock, flags); - list_for_each_entry(ce, &pcd->clock_list, node) + list_for_each_entry(ce, &psd->clock_list, node) clk_enable(ce->clk); - spin_unlock_irqrestore(&pcd->lock, flags); + spin_unlock_irqrestore(&psd->lock, flags); return 0; } diff --git a/include/linux/device.h b/include/linux/device.h index c20dfbf..5d200ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -636,6 +636,11 @@ static inline void set_dev_node(struct device *dev, int node) } #endif +static inline struct pm_subsys_data *dev_to_psd(struct device *dev) +{ + return dev ? dev->power.subsys_data : NULL; +} + static inline unsigned int dev_get_uevent_suppress(const struct device *dev) { return dev->kobj.uevent_suppress; diff --git a/include/linux/pm.h b/include/linux/pm.h index f7c84c9..bf5ee37 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -421,6 +421,13 @@ enum rpm_request { struct wakeup_source; +struct pm_subsys_data { + spinlock_t lock; +#ifdef CONFIG_PM_CLK + struct list_head clock_list; +#endif +}; + struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; @@ -462,7 +469,7 @@ struct dev_pm_info { unsigned long suspended_jiffies; unsigned long accounting_timestamp; #endif - void *subsys_data; /* Owned by the subsystem. */ + struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ }; extern void update_pm_runtime_accounting(struct device *dev); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index daac05d..6b90630e 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -258,14 +258,18 @@ struct pm_clk_notifier_block { }; #ifdef CONFIG_PM_CLK -extern int pm_clk_init(struct device *dev); +extern void pm_clk_init(struct device *dev); +extern int pm_clk_create(struct device *dev); extern void pm_clk_destroy(struct device *dev); extern int pm_clk_add(struct device *dev, const char *con_id); extern void pm_clk_remove(struct device *dev, const char *con_id); extern int pm_clk_suspend(struct device *dev); extern int pm_clk_resume(struct device *dev); #else -static inline int pm_clk_init(struct device *dev) +static inline void pm_clk_init(struct device *dev) +{ +} +static inline int pm_clk_create(struct device *dev) { return -EINVAL; } -- cgit v0.10.2 From ef27bed1870dbd5fd363ff5ec51eebd5a695e277 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:34:01 +0200 Subject: PM: Reference counting of power.subsys_data Since the power.subsys_data device field will be used by multiple filesystems, introduce a reference counting mechanism for it to avoid freeing it prematurely or changing its value at a wrong time. Make the PM clocks management code that currently is the only user of power.subsys_data use the new reference counting. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 2639ae7..6488ce1 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_PM) += sysfs.o generic_ops.o +obj-$(CONFIG_PM) += sysfs.o generic_ops.o common.o obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index b7f1db4..8383e24 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -140,12 +140,8 @@ void pm_clk_remove(struct device *dev, const char *con_id) void pm_clk_init(struct device *dev) { struct pm_subsys_data *psd = dev_to_psd(dev); - - if (!psd) - return; - - INIT_LIST_HEAD(&psd->clock_list); - spin_lock_init(&psd->lock); + if (psd) + INIT_LIST_HEAD(&psd->clock_list); } /** @@ -157,16 +153,8 @@ void pm_clk_init(struct device *dev) */ int pm_clk_create(struct device *dev) { - struct pm_subsys_data *psd; - - psd = kzalloc(sizeof(*psd), GFP_KERNEL); - if (!psd) { - dev_err(dev, "Not enough memory for PM clock data.\n"); - return -ENOMEM; - } - dev->power.subsys_data = psd; - pm_clk_init(dev); - return 0; + int ret = dev_pm_get_subsys_data(dev); + return ret < 0 ? ret : 0; } /** @@ -185,8 +173,6 @@ void pm_clk_destroy(struct device *dev) if (!psd) return; - dev->power.subsys_data = NULL; - spin_lock_irq(&psd->lock); list_for_each_entry_safe_reverse(ce, c, &psd->clock_list, node) @@ -194,7 +180,7 @@ void pm_clk_destroy(struct device *dev) spin_unlock_irq(&psd->lock); - kfree(psd); + dev_pm_put_subsys_data(dev); } #endif /* CONFIG_PM */ diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c new file mode 100644 index 0000000..d398cf0 --- /dev/null +++ b/drivers/base/power/common.c @@ -0,0 +1,87 @@ +/* + * drivers/base/power/common.c - Common device power management code. + * + * Copyright (C) 2011 Rafael J. Wysocki , Renesas Electronics Corp. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include + +/** + * dev_pm_get_subsys_data - Create or refcount power.subsys_data for device. + * @dev: Device to handle. + * + * If power.subsys_data is NULL, point it to a new object, otherwise increment + * its reference counter. Return 1 if a new object has been created, otherwise + * return 0 or error code. + */ +int dev_pm_get_subsys_data(struct device *dev) +{ + struct pm_subsys_data *psd; + int ret = 0; + + psd = kzalloc(sizeof(*psd), GFP_KERNEL); + if (!psd) + return -ENOMEM; + + spin_lock_irq(&dev->power.lock); + + if (dev->power.subsys_data) { + dev->power.subsys_data->refcount++; + } else { + spin_lock_init(&psd->lock); + psd->refcount = 1; + dev->power.subsys_data = psd; + pm_clk_init(dev); + psd = NULL; + ret = 1; + } + + spin_unlock_irq(&dev->power.lock); + + /* kfree() verifies that its argument is nonzero. */ + kfree(psd); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data); + +/** + * dev_pm_put_subsys_data - Drop reference to power.subsys_data. + * @dev: Device to handle. + * + * If the reference counter of power.subsys_data is zero after dropping the + * reference, power.subsys_data is removed. Return 1 if that happens or 0 + * otherwise. + */ +int dev_pm_put_subsys_data(struct device *dev) +{ + struct pm_subsys_data *psd; + int ret = 0; + + spin_lock_irq(&dev->power.lock); + + psd = dev_to_psd(dev); + if (!psd) { + ret = -EINVAL; + goto out; + } + + if (--psd->refcount == 0) { + dev->power.subsys_data = NULL; + kfree(psd); + ret = 1; + } + + out: + spin_unlock_irq(&dev->power.lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data); diff --git a/include/linux/pm.h b/include/linux/pm.h index bf5ee37..c6b5a0a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -423,6 +423,7 @@ struct wakeup_source; struct pm_subsys_data { spinlock_t lock; + unsigned int refcount; #ifdef CONFIG_PM_CLK struct list_head clock_list; #endif @@ -473,6 +474,8 @@ struct dev_pm_info { }; extern void update_pm_runtime_accounting(struct device *dev); +extern int dev_pm_get_subsys_data(struct device *dev); +extern int dev_pm_put_subsys_data(struct device *dev); /* * Power domains provide callbacks that are executed during system suspend, -- cgit v0.10.2 From 4605ab653c1f9d7cc2dda8033de215c9cee325f4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:34:12 +0200 Subject: PM / Domains: Use power.sybsys_data to reduce overhead Currently pm_genpd_runtime_resume() has to walk the list of devices from the device's PM domain to find the corresponding device list object containing the need_restore field to check if the driver's .runtime_resume() callback should be executed for the device. This is suboptimal and can be simplified by using power.sybsys_data to store device information used by the generic PM domains code. Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 54d4d86..4aeca0a 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -114,11 +114,9 @@ void sh7372_add_device_to_domain(struct sh7372_pm_domain *sh7372_pd, { struct device *dev = &pdev->dev; - if (!dev->power.subsys_data) { - pm_clk_create(dev); - pm_clk_add(dev, NULL); - } pm_genpd_add_device(&sh7372_pd->genpd, dev); + if (pm_clk_no_clocks(dev)) + pm_clk_add(dev, NULL); } void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1fc6cc9..339eb2d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -181,18 +181,18 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) /** * __pm_genpd_save_device - Save the pre-suspend state of a device. - * @dle: Device list entry of the device to save the state of. + * @pdd: Domain data of the device to save the state of. * @genpd: PM domain the device belongs to. */ -static int __pm_genpd_save_device(struct dev_list_entry *dle, +static int __pm_genpd_save_device(struct pm_domain_data *pdd, struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { - struct device *dev = dle->dev; + struct device *dev = pdd->dev; struct device_driver *drv = dev->driver; int ret = 0; - if (dle->need_restore) + if (pdd->need_restore) return 0; mutex_unlock(&genpd->lock); @@ -210,24 +210,24 @@ static int __pm_genpd_save_device(struct dev_list_entry *dle, mutex_lock(&genpd->lock); if (!ret) - dle->need_restore = true; + pdd->need_restore = true; return ret; } /** * __pm_genpd_restore_device - Restore the pre-suspend state of a device. - * @dle: Device list entry of the device to restore the state of. + * @pdd: Domain data of the device to restore the state of. * @genpd: PM domain the device belongs to. */ -static void __pm_genpd_restore_device(struct dev_list_entry *dle, +static void __pm_genpd_restore_device(struct pm_domain_data *pdd, struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { - struct device *dev = dle->dev; + struct device *dev = pdd->dev; struct device_driver *drv = dev->driver; - if (!dle->need_restore) + if (!pdd->need_restore) return; mutex_unlock(&genpd->lock); @@ -244,7 +244,7 @@ static void __pm_genpd_restore_device(struct dev_list_entry *dle, mutex_lock(&genpd->lock); - dle->need_restore = false; + pdd->need_restore = false; } /** @@ -286,7 +286,7 @@ void genpd_queue_power_off_work(struct generic_pm_domain *genpd) static int pm_genpd_poweroff(struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { - struct dev_list_entry *dle; + struct pm_domain_data *pdd; struct gpd_link *link; unsigned int not_suspended; int ret = 0; @@ -308,8 +308,8 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) return -EBUSY; not_suspended = 0; - list_for_each_entry(dle, &genpd->dev_list, node) - if (dle->dev->driver && !pm_runtime_suspended(dle->dev)) + list_for_each_entry(pdd, &genpd->dev_list, list_node) + if (pdd->dev->driver && !pm_runtime_suspended(pdd->dev)) not_suspended++; if (not_suspended > genpd->in_progress) @@ -332,9 +332,9 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd->status = GPD_STATE_BUSY; genpd->poweroff_task = current; - list_for_each_entry_reverse(dle, &genpd->dev_list, node) { + list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { ret = atomic_read(&genpd->sd_count) == 0 ? - __pm_genpd_save_device(dle, genpd) : -EBUSY; + __pm_genpd_save_device(pdd, genpd) : -EBUSY; if (genpd_abort_poweroff(genpd)) goto out; @@ -433,24 +433,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) } /** - * __pm_genpd_runtime_resume - Resume a device belonging to I/O PM domain. - * @dev: Device to resume. - * @genpd: PM domain the device belongs to. - */ -static void __pm_genpd_runtime_resume(struct device *dev, - struct generic_pm_domain *genpd) -{ - struct dev_list_entry *dle; - - list_for_each_entry(dle, &genpd->dev_list, node) { - if (dle->dev == dev) { - __pm_genpd_restore_device(dle, genpd); - break; - } - } -} - -/** * pm_genpd_runtime_resume - Resume a device belonging to I/O PM domain. * @dev: Device to resume. * @@ -495,7 +477,7 @@ static int pm_genpd_runtime_resume(struct device *dev) mutex_lock(&genpd->lock); } finish_wait(&genpd->status_wait_queue, &wait); - __pm_genpd_runtime_resume(dev, genpd); + __pm_genpd_restore_device(&dev->power.subsys_data->domain_data, genpd); genpd->resume_count--; genpd_set_active(genpd); wake_up_all(&genpd->status_wait_queue); @@ -525,8 +507,6 @@ void pm_genpd_poweroff_unused(void) #else static inline void genpd_power_off_work_fn(struct work_struct *work) {} -static inline void __pm_genpd_runtime_resume(struct device *dev, - struct generic_pm_domain *genpd) {} #define pm_genpd_runtime_suspend NULL #define pm_genpd_runtime_resume NULL @@ -1083,7 +1063,7 @@ static void pm_genpd_complete(struct device *dev) */ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { - struct dev_list_entry *dle; + struct pm_domain_data *pdd; int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -1103,26 +1083,20 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) goto out; } - list_for_each_entry(dle, &genpd->dev_list, node) - if (dle->dev == dev) { + list_for_each_entry(pdd, &genpd->dev_list, list_node) + if (pdd->dev == dev) { ret = -EINVAL; goto out; } - dle = kzalloc(sizeof(*dle), GFP_KERNEL); - if (!dle) { - ret = -ENOMEM; - goto out; - } - - dle->dev = dev; - dle->need_restore = false; - list_add_tail(&dle->node, &genpd->dev_list); genpd->device_count++; - spin_lock_irq(&dev->power.lock); dev->pm_domain = &genpd->domain; - spin_unlock_irq(&dev->power.lock); + dev_pm_get_subsys_data(dev); + pdd = &dev->power.subsys_data->domain_data; + pdd->dev = dev; + pdd->need_restore = false; + list_add_tail(&pdd->list_node, &genpd->dev_list); out: genpd_release_lock(genpd); @@ -1138,7 +1112,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) { - struct dev_list_entry *dle; + struct pm_domain_data *pdd; int ret = -EINVAL; dev_dbg(dev, "%s()\n", __func__); @@ -1153,17 +1127,16 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(dle, &genpd->dev_list, node) { - if (dle->dev != dev) + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + if (pdd->dev != dev) continue; - spin_lock_irq(&dev->power.lock); + list_del_init(&pdd->list_node); + pdd->dev = NULL; + dev_pm_put_subsys_data(dev); dev->pm_domain = NULL; - spin_unlock_irq(&dev->power.lock); genpd->device_count--; - list_del(&dle->node); - kfree(dle); ret = 0; break; diff --git a/include/linux/pm.h b/include/linux/pm.h index c6b5a0a..ed10f24 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -421,12 +421,21 @@ enum rpm_request { struct wakeup_source; +struct pm_domain_data { + struct list_head list_node; + struct device *dev; + bool need_restore; +}; + struct pm_subsys_data { spinlock_t lock; unsigned int refcount; #ifdef CONFIG_PM_CLK struct list_head clock_list; #endif +#ifdef CONFIG_PM_GENERIC_DOMAINS + struct pm_domain_data domain_data; +#endif }; struct dev_pm_info { diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index bf679f5..5cce46c 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,12 +61,6 @@ struct gpd_link { struct list_head slave_node; }; -struct dev_list_entry { - struct list_head node; - struct device *dev; - bool need_restore; -}; - #ifdef CONFIG_PM_GENERIC_DOMAINS extern int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 6b90630e..a5a41a8 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -258,6 +258,12 @@ struct pm_clk_notifier_block { }; #ifdef CONFIG_PM_CLK +static inline bool pm_clk_no_clocks(struct device *dev) +{ + return dev && dev->power.subsys_data + && list_empty(&dev->power.subsys_data->clock_list); +} + extern void pm_clk_init(struct device *dev); extern int pm_clk_create(struct device *dev); extern void pm_clk_destroy(struct device *dev); @@ -266,6 +272,10 @@ extern void pm_clk_remove(struct device *dev, const char *con_id); extern int pm_clk_suspend(struct device *dev); extern int pm_clk_resume(struct device *dev); #else +static inline bool pm_clk_no_clocks(struct device *dev) +{ + return true; +} static inline void pm_clk_init(struct device *dev) { } -- cgit v0.10.2 From b5e8d269d814763d597ccc0108d1fa6639ad35a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:34:19 +0200 Subject: PM: Move clock-related definitions and headers to separate file Since the PM clock management code in drivers/base/power/clock_ops.c is used for both runtime PM and system suspend/hibernation, the definitions of data structures and headers related to it should not be located in include/linux/pm_rumtime.h. Move them to a separate header file. Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c index 943072d..7868e75 100644 --- a/arch/arm/mach-omap1/pm_bus.c +++ b/arch/arm/mach-omap1/pm_bus.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index fadbe5b..074884c 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 0ea71f8..4978e81 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 4aeca0a..5d35831 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c index 6ec454e..bd5c6a3 100644 --- a/arch/arm/mach-shmobile/pm_runtime.c +++ b/arch/arm/mach-shmobile/pm_runtime.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 8383e24..cb44b58 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index d398cf0..29820c3 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -10,8 +10,7 @@ #include #include #include -#include -#include +#include /** * dev_pm_get_subsys_data - Create or refcount power.subsys_data for device. diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h new file mode 100644 index 0000000..8348866 --- /dev/null +++ b/include/linux/pm_clock.h @@ -0,0 +1,71 @@ +/* + * pm_clock.h - Definitions and headers related to device clocks. + * + * Copyright (C) 2011 Rafael J. Wysocki , Renesas Electronics Corp. + * + * This file is released under the GPLv2. + */ + +#ifndef _LINUX_PM_CLOCK_H +#define _LINUX_PM_CLOCK_H + +#include +#include + +struct pm_clk_notifier_block { + struct notifier_block nb; + struct dev_pm_domain *pm_domain; + char *con_ids[]; +}; + +#ifdef CONFIG_PM_CLK +static inline bool pm_clk_no_clocks(struct device *dev) +{ + return dev && dev->power.subsys_data + && list_empty(&dev->power.subsys_data->clock_list); +} + +extern void pm_clk_init(struct device *dev); +extern int pm_clk_create(struct device *dev); +extern void pm_clk_destroy(struct device *dev); +extern int pm_clk_add(struct device *dev, const char *con_id); +extern void pm_clk_remove(struct device *dev, const char *con_id); +extern int pm_clk_suspend(struct device *dev); +extern int pm_clk_resume(struct device *dev); +#else +static inline bool pm_clk_no_clocks(struct device *dev) +{ + return true; +} +static inline void pm_clk_init(struct device *dev) +{ +} +static inline int pm_clk_create(struct device *dev) +{ + return -EINVAL; +} +static inline void pm_clk_destroy(struct device *dev) +{ +} +static inline int pm_clk_add(struct device *dev, const char *con_id) +{ + return -EINVAL; +} +static inline void pm_clk_remove(struct device *dev, const char *con_id) +{ +} +#define pm_clk_suspend NULL +#define pm_clk_resume NULL +#endif + +#ifdef CONFIG_HAVE_CLK +extern void pm_clk_add_notifier(struct bus_type *bus, + struct pm_clk_notifier_block *clknb); +#else +static inline void pm_clk_add_notifier(struct bus_type *bus, + struct pm_clk_notifier_block *clknb) +{ +} +#endif + +#endif diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index a5a41a8..70b2840 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -251,60 +251,4 @@ static inline void pm_runtime_dont_use_autosuspend(struct device *dev) __pm_runtime_use_autosuspend(dev, false); } -struct pm_clk_notifier_block { - struct notifier_block nb; - struct dev_pm_domain *pm_domain; - char *con_ids[]; -}; - -#ifdef CONFIG_PM_CLK -static inline bool pm_clk_no_clocks(struct device *dev) -{ - return dev && dev->power.subsys_data - && list_empty(&dev->power.subsys_data->clock_list); -} - -extern void pm_clk_init(struct device *dev); -extern int pm_clk_create(struct device *dev); -extern void pm_clk_destroy(struct device *dev); -extern int pm_clk_add(struct device *dev, const char *con_id); -extern void pm_clk_remove(struct device *dev, const char *con_id); -extern int pm_clk_suspend(struct device *dev); -extern int pm_clk_resume(struct device *dev); -#else -static inline bool pm_clk_no_clocks(struct device *dev) -{ - return true; -} -static inline void pm_clk_init(struct device *dev) -{ -} -static inline int pm_clk_create(struct device *dev) -{ - return -EINVAL; -} -static inline void pm_clk_destroy(struct device *dev) -{ -} -static inline int pm_clk_add(struct device *dev, const char *con_id) -{ - return -EINVAL; -} -static inline void pm_clk_remove(struct device *dev, const char *con_id) -{ -} -#define pm_clk_suspend NULL -#define pm_clk_resume NULL -#endif - -#ifdef CONFIG_HAVE_CLK -extern void pm_clk_add_notifier(struct bus_type *bus, - struct pm_clk_notifier_block *clknb); -#else -static inline void pm_clk_add_notifier(struct bus_type *bus, - struct pm_clk_notifier_block *clknb) -{ -} -#endif - #endif -- cgit v0.10.2 From e8db0be1245de16a6cc6365506abc392c3c212d4 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:03 +0200 Subject: PM QoS: Move and rename the implementation files The PM QoS implementation files are better named kernel/power/qos.c and include/linux/pm_qos.h. The PM QoS support is compiled under the CONFIG_PM option. Signed-off-by: Jean Pihet Acked-by: markgross Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-msm/clock.c b/arch/arm/mach-msm/clock.c index 22a5376..d9145df 100644 --- a/arch/arm/mach-msm/clock.c +++ b/arch/arm/mach-msm/clock.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 431ab11..2e69e09 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -37,7 +37,7 @@ #include #include #include /* need_resched() */ -#include +#include #include #include #include diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index d4c5423..0df0141 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 12c9890..f62fde2 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c47f3d0..3600f19 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c index 85d3048..b3ca389 100644 --- a/drivers/media/video/via-camera.c +++ b/drivers/media/video/via-camera.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 2198e61..07031af 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 3774dd0..aaab76c 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -161,7 +161,7 @@ that only one external action is invoked at a time. #include #include #include -#include +#include #include diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddee79b..f72ac6b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -31,7 +31,7 @@ #include #ifdef __KERNEL__ -#include +#include #include #include #include diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h new file mode 100644 index 0000000..7ba67541 --- /dev/null +++ b/include/linux/pm_qos.h @@ -0,0 +1,61 @@ +#ifndef _LINUX_PM_QOS_H +#define _LINUX_PM_QOS_H +/* interface for the pm_qos_power infrastructure of the linux kernel. + * + * Mark Gross + */ +#include +#include +#include + +#define PM_QOS_RESERVED 0 +#define PM_QOS_CPU_DMA_LATENCY 1 +#define PM_QOS_NETWORK_LATENCY 2 +#define PM_QOS_NETWORK_THROUGHPUT 3 + +#define PM_QOS_NUM_CLASSES 4 +#define PM_QOS_DEFAULT_VALUE -1 + +#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 + +struct pm_qos_request_list { + struct plist_node list; + int pm_qos_class; +}; + +#ifdef CONFIG_PM +void pm_qos_add_request(struct pm_qos_request_list *l, + int pm_qos_class, s32 value); +void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, + s32 new_value); +void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req); + +int pm_qos_request(int pm_qos_class); +int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); +int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); +int pm_qos_request_active(struct pm_qos_request_list *req); +#else +static inline void pm_qos_add_request(struct pm_qos_request_list *l, + int pm_qos_class, s32 value) + { return; } +static inline void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, + s32 new_value) + { return; } +static inline void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) + { return; } + +static inline int pm_qos_request(int pm_qos_class) + { return 0; } +static inline int pm_qos_add_notifier(int pm_qos_class, + struct notifier_block *notifier) + { return 0; } +static inline int pm_qos_remove_notifier(int pm_qos_class, + struct notifier_block *notifier) + { return 0; } +static inline int pm_qos_request_active(struct pm_qos_request_list *req) + { return 0; } +#endif + +#endif diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h deleted file mode 100644 index a7d87f9..0000000 --- a/include/linux/pm_qos_params.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _LINUX_PM_QOS_PARAMS_H -#define _LINUX_PM_QOS_PARAMS_H -/* interface for the pm_qos_power infrastructure of the linux kernel. - * - * Mark Gross - */ -#include -#include -#include - -#define PM_QOS_RESERVED 0 -#define PM_QOS_CPU_DMA_LATENCY 1 -#define PM_QOS_NETWORK_LATENCY 2 -#define PM_QOS_NETWORK_THROUGHPUT 3 - -#define PM_QOS_NUM_CLASSES 4 -#define PM_QOS_DEFAULT_VALUE -1 - -#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) -#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) -#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 - -struct pm_qos_request_list { - struct plist_node list; - int pm_qos_class; -}; - -void pm_qos_add_request(struct pm_qos_request_list *l, int pm_qos_class, s32 value); -void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, - s32 new_value); -void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req); - -int pm_qos_request(int pm_qos_class); -int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); -int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); -int pm_qos_request_active(struct pm_qos_request_list *req); - -#endif diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 57e71fa..666ee91 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include #define snd_pcm_substream_chip(substream) ((substream)->private_data) #define snd_pcm_chip(pcm) ((pcm)->private_data) diff --git a/kernel/Makefile b/kernel/Makefile index eca595e..2da48d3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ + notifier.o ksysfs.o sched_clock.o cred.o \ async.o range.o obj-y += groups.o diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c deleted file mode 100644 index 37f05d0..0000000 --- a/kernel/pm_qos_params.c +++ /dev/null @@ -1,481 +0,0 @@ -/* - * This module exposes the interface to kernel space for specifying - * QoS dependencies. It provides infrastructure for registration of: - * - * Dependents on a QoS value : register requests - * Watchers of QoS value : get notified when target QoS value changes - * - * This QoS design is best effort based. Dependents register their QoS needs. - * Watchers register to keep track of the current QoS needs of the system. - * - * There are 3 basic classes of QoS parameter: latency, timeout, throughput - * each have defined units: - * latency: usec - * timeout: usec <-- currently not used. - * throughput: kbs (kilo byte / sec) - * - * There are lists of pm_qos_objects each one wrapping requests, notifiers - * - * User mode requests on a QOS parameter register themselves to the - * subsystem by opening the device node /dev/... and writing there request to - * the node. As long as the process holds a file handle open to the node the - * client continues to be accounted for. Upon file release the usermode - * request is removed and a new qos target is computed. This way when the - * request that the application has is cleaned up when closes the file - * pointer or exits the pm_qos_object will get an opportunity to clean up. - * - * Mark Gross - */ - -/*#define DEBUG*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * locking rule: all changes to requests or notifiers lists - * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock - * held, taken with _irqsave. One lock to rule them all - */ -enum pm_qos_type { - PM_QOS_MAX, /* return the largest value */ - PM_QOS_MIN /* return the smallest value */ -}; - -/* - * Note: The lockless read path depends on the CPU accessing - * target_value atomically. Atomic access is only guaranteed on all CPU - * types linux supports for 32 bit quantites - */ -struct pm_qos_object { - struct plist_head requests; - struct blocking_notifier_head *notifiers; - struct miscdevice pm_qos_power_miscdev; - char *name; - s32 target_value; /* Do not change to 64 bit */ - s32 default_value; - enum pm_qos_type type; -}; - -static DEFINE_SPINLOCK(pm_qos_lock); - -static struct pm_qos_object null_pm_qos; -static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); -static struct pm_qos_object cpu_dma_pm_qos = { - .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests), - .notifiers = &cpu_dma_lat_notifier, - .name = "cpu_dma_latency", - .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, - .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, - .type = PM_QOS_MIN, -}; - -static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); -static struct pm_qos_object network_lat_pm_qos = { - .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests), - .notifiers = &network_lat_notifier, - .name = "network_latency", - .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .type = PM_QOS_MIN -}; - - -static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); -static struct pm_qos_object network_throughput_pm_qos = { - .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests), - .notifiers = &network_throughput_notifier, - .name = "network_throughput", - .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, - .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, - .type = PM_QOS_MAX, -}; - - -static struct pm_qos_object *pm_qos_array[] = { - &null_pm_qos, - &cpu_dma_pm_qos, - &network_lat_pm_qos, - &network_throughput_pm_qos -}; - -static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos); -static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, - size_t count, loff_t *f_pos); -static int pm_qos_power_open(struct inode *inode, struct file *filp); -static int pm_qos_power_release(struct inode *inode, struct file *filp); - -static const struct file_operations pm_qos_power_fops = { - .write = pm_qos_power_write, - .read = pm_qos_power_read, - .open = pm_qos_power_open, - .release = pm_qos_power_release, - .llseek = noop_llseek, -}; - -/* unlocked internal variant */ -static inline int pm_qos_get_value(struct pm_qos_object *o) -{ - if (plist_head_empty(&o->requests)) - return o->default_value; - - switch (o->type) { - case PM_QOS_MIN: - return plist_first(&o->requests)->prio; - - case PM_QOS_MAX: - return plist_last(&o->requests)->prio; - - default: - /* runtime check for not using enum */ - BUG(); - } -} - -static inline s32 pm_qos_read_value(struct pm_qos_object *o) -{ - return o->target_value; -} - -static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) -{ - o->target_value = value; -} - -static void update_target(struct pm_qos_object *o, struct plist_node *node, - int del, int value) -{ - unsigned long flags; - int prev_value, curr_value; - - spin_lock_irqsave(&pm_qos_lock, flags); - prev_value = pm_qos_get_value(o); - /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ - if (value != PM_QOS_DEFAULT_VALUE) { - /* - * to change the list, we atomically remove, reinit - * with new value and add, then see if the extremal - * changed - */ - plist_del(node, &o->requests); - plist_node_init(node, value); - plist_add(node, &o->requests); - } else if (del) { - plist_del(node, &o->requests); - } else { - plist_add(node, &o->requests); - } - curr_value = pm_qos_get_value(o); - pm_qos_set_value(o, curr_value); - spin_unlock_irqrestore(&pm_qos_lock, flags); - - if (prev_value != curr_value) - blocking_notifier_call_chain(o->notifiers, - (unsigned long)curr_value, - NULL); -} - -static int register_pm_qos_misc(struct pm_qos_object *qos) -{ - qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; - qos->pm_qos_power_miscdev.name = qos->name; - qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; - - return misc_register(&qos->pm_qos_power_miscdev); -} - -static int find_pm_qos_object_by_minor(int minor) -{ - int pm_qos_class; - - for (pm_qos_class = 0; - pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { - if (minor == - pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) - return pm_qos_class; - } - return -1; -} - -/** - * pm_qos_request - returns current system wide qos expectation - * @pm_qos_class: identification of which qos value is requested - * - * This function returns the current target value. - */ -int pm_qos_request(int pm_qos_class) -{ - return pm_qos_read_value(pm_qos_array[pm_qos_class]); -} -EXPORT_SYMBOL_GPL(pm_qos_request); - -int pm_qos_request_active(struct pm_qos_request_list *req) -{ - return req->pm_qos_class != 0; -} -EXPORT_SYMBOL_GPL(pm_qos_request_active); - -/** - * pm_qos_add_request - inserts new qos request into the list - * @dep: pointer to a preallocated handle - * @pm_qos_class: identifies which list of qos request to use - * @value: defines the qos request - * - * This function inserts a new entry in the pm_qos_class list of requested qos - * performance characteristics. It recomputes the aggregate QoS expectations - * for the pm_qos_class of parameters and initializes the pm_qos_request_list - * handle. Caller needs to save this handle for later use in updates and - * removal. - */ - -void pm_qos_add_request(struct pm_qos_request_list *dep, - int pm_qos_class, s32 value) -{ - struct pm_qos_object *o = pm_qos_array[pm_qos_class]; - int new_value; - - if (pm_qos_request_active(dep)) { - WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); - return; - } - if (value == PM_QOS_DEFAULT_VALUE) - new_value = o->default_value; - else - new_value = value; - plist_node_init(&dep->list, new_value); - dep->pm_qos_class = pm_qos_class; - update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); -} -EXPORT_SYMBOL_GPL(pm_qos_add_request); - -/** - * pm_qos_update_request - modifies an existing qos request - * @pm_qos_req : handle to list element holding a pm_qos request to use - * @value: defines the qos request - * - * Updates an existing qos request for the pm_qos_class of parameters along - * with updating the target pm_qos_class value. - * - * Attempts are made to make this code callable on hot code paths. - */ -void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, - s32 new_value) -{ - s32 temp; - struct pm_qos_object *o; - - if (!pm_qos_req) /*guard against callers passing in null */ - return; - - if (!pm_qos_request_active(pm_qos_req)) { - WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); - return; - } - - o = pm_qos_array[pm_qos_req->pm_qos_class]; - - if (new_value == PM_QOS_DEFAULT_VALUE) - temp = o->default_value; - else - temp = new_value; - - if (temp != pm_qos_req->list.prio) - update_target(o, &pm_qos_req->list, 0, temp); -} -EXPORT_SYMBOL_GPL(pm_qos_update_request); - -/** - * pm_qos_remove_request - modifies an existing qos request - * @pm_qos_req: handle to request list element - * - * Will remove pm qos request from the list of requests and - * recompute the current target value for the pm_qos_class. Call this - * on slow code paths. - */ -void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) -{ - struct pm_qos_object *o; - - if (pm_qos_req == NULL) - return; - /* silent return to keep pcm code cleaner */ - - if (!pm_qos_request_active(pm_qos_req)) { - WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); - return; - } - - o = pm_qos_array[pm_qos_req->pm_qos_class]; - update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); - memset(pm_qos_req, 0, sizeof(*pm_qos_req)); -} -EXPORT_SYMBOL_GPL(pm_qos_remove_request); - -/** - * pm_qos_add_notifier - sets notification entry for changes to target value - * @pm_qos_class: identifies which qos target changes should be notified. - * @notifier: notifier block managed by caller. - * - * will register the notifier into a notification chain that gets called - * upon changes to the pm_qos_class target value. - */ -int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) -{ - int retval; - - retval = blocking_notifier_chain_register( - pm_qos_array[pm_qos_class]->notifiers, notifier); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_qos_add_notifier); - -/** - * pm_qos_remove_notifier - deletes notification entry from chain. - * @pm_qos_class: identifies which qos target changes are notified. - * @notifier: notifier block to be removed. - * - * will remove the notifier from the notification chain that gets called - * upon changes to the pm_qos_class target value. - */ -int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) -{ - int retval; - - retval = blocking_notifier_chain_unregister( - pm_qos_array[pm_qos_class]->notifiers, notifier); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); - -static int pm_qos_power_open(struct inode *inode, struct file *filp) -{ - long pm_qos_class; - - pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); - if (pm_qos_class >= 0) { - struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) - return -ENOMEM; - - pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); - filp->private_data = req; - - if (filp->private_data) - return 0; - } - return -EPERM; -} - -static int pm_qos_power_release(struct inode *inode, struct file *filp) -{ - struct pm_qos_request_list *req; - - req = filp->private_data; - pm_qos_remove_request(req); - kfree(req); - - return 0; -} - - -static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, - size_t count, loff_t *f_pos) -{ - s32 value; - unsigned long flags; - struct pm_qos_object *o; - struct pm_qos_request_list *pm_qos_req = filp->private_data; - - if (!pm_qos_req) - return -EINVAL; - if (!pm_qos_request_active(pm_qos_req)) - return -EINVAL; - - o = pm_qos_array[pm_qos_req->pm_qos_class]; - spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(o); - spin_unlock_irqrestore(&pm_qos_lock, flags); - - return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); -} - -static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) -{ - s32 value; - struct pm_qos_request_list *pm_qos_req; - - if (count == sizeof(s32)) { - if (copy_from_user(&value, buf, sizeof(s32))) - return -EFAULT; - } else if (count <= 11) { /* ASCII perhaps? */ - char ascii_value[11]; - unsigned long int ulval; - int ret; - - if (copy_from_user(ascii_value, buf, count)) - return -EFAULT; - - if (count > 10) { - if (ascii_value[10] == '\n') - ascii_value[10] = '\0'; - else - return -EINVAL; - } else { - ascii_value[count] = '\0'; - } - ret = strict_strtoul(ascii_value, 16, &ulval); - if (ret) { - pr_debug("%s, 0x%lx, 0x%x\n", ascii_value, ulval, ret); - return -EINVAL; - } - value = (s32)lower_32_bits(ulval); - } else { - return -EINVAL; - } - - pm_qos_req = filp->private_data; - pm_qos_update_request(pm_qos_req, value); - - return count; -} - - -static int __init pm_qos_power_init(void) -{ - int ret = 0; - - ret = register_pm_qos_misc(&cpu_dma_pm_qos); - if (ret < 0) { - printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); - return ret; - } - ret = register_pm_qos_misc(&network_lat_pm_qos); - if (ret < 0) { - printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); - return ret; - } - ret = register_pm_qos_misc(&network_throughput_pm_qos); - if (ret < 0) - printk(KERN_ERR - "pm_qos_param: network_throughput setup failed\n"); - - return ret; -} - -late_initcall(pm_qos_power_init); diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c5ebc6a..ad6bdd8 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,7 +1,7 @@ ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG -obj-$(CONFIG_PM) += main.o +obj-$(CONFIG_PM) += main.o qos.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/kernel/power/qos.c b/kernel/power/qos.c new file mode 100644 index 0000000..61b4738 --- /dev/null +++ b/kernel/power/qos.c @@ -0,0 +1,481 @@ +/* + * This module exposes the interface to kernel space for specifying + * QoS dependencies. It provides infrastructure for registration of: + * + * Dependents on a QoS value : register requests + * Watchers of QoS value : get notified when target QoS value changes + * + * This QoS design is best effort based. Dependents register their QoS needs. + * Watchers register to keep track of the current QoS needs of the system. + * + * There are 3 basic classes of QoS parameter: latency, timeout, throughput + * each have defined units: + * latency: usec + * timeout: usec <-- currently not used. + * throughput: kbs (kilo byte / sec) + * + * There are lists of pm_qos_objects each one wrapping requests, notifiers + * + * User mode requests on a QOS parameter register themselves to the + * subsystem by opening the device node /dev/... and writing there request to + * the node. As long as the process holds a file handle open to the node the + * client continues to be accounted for. Upon file release the usermode + * request is removed and a new qos target is computed. This way when the + * request that the application has is cleaned up when closes the file + * pointer or exits the pm_qos_object will get an opportunity to clean up. + * + * Mark Gross + */ + +/*#define DEBUG*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * locking rule: all changes to requests or notifiers lists + * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock + * held, taken with _irqsave. One lock to rule them all + */ +enum pm_qos_type { + PM_QOS_MAX, /* return the largest value */ + PM_QOS_MIN /* return the smallest value */ +}; + +/* + * Note: The lockless read path depends on the CPU accessing + * target_value atomically. Atomic access is only guaranteed on all CPU + * types linux supports for 32 bit quantites + */ +struct pm_qos_object { + struct plist_head requests; + struct blocking_notifier_head *notifiers; + struct miscdevice pm_qos_power_miscdev; + char *name; + s32 target_value; /* Do not change to 64 bit */ + s32 default_value; + enum pm_qos_type type; +}; + +static DEFINE_SPINLOCK(pm_qos_lock); + +static struct pm_qos_object null_pm_qos; +static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); +static struct pm_qos_object cpu_dma_pm_qos = { + .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests), + .notifiers = &cpu_dma_lat_notifier, + .name = "cpu_dma_latency", + .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .type = PM_QOS_MIN, +}; + +static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); +static struct pm_qos_object network_lat_pm_qos = { + .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests), + .notifiers = &network_lat_notifier, + .name = "network_latency", + .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .type = PM_QOS_MIN +}; + + +static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); +static struct pm_qos_object network_throughput_pm_qos = { + .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests), + .notifiers = &network_throughput_notifier, + .name = "network_throughput", + .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .type = PM_QOS_MAX, +}; + + +static struct pm_qos_object *pm_qos_array[] = { + &null_pm_qos, + &cpu_dma_pm_qos, + &network_lat_pm_qos, + &network_throughput_pm_qos +}; + +static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos); +static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos); +static int pm_qos_power_open(struct inode *inode, struct file *filp); +static int pm_qos_power_release(struct inode *inode, struct file *filp); + +static const struct file_operations pm_qos_power_fops = { + .write = pm_qos_power_write, + .read = pm_qos_power_read, + .open = pm_qos_power_open, + .release = pm_qos_power_release, + .llseek = noop_llseek, +}; + +/* unlocked internal variant */ +static inline int pm_qos_get_value(struct pm_qos_object *o) +{ + if (plist_head_empty(&o->requests)) + return o->default_value; + + switch (o->type) { + case PM_QOS_MIN: + return plist_first(&o->requests)->prio; + + case PM_QOS_MAX: + return plist_last(&o->requests)->prio; + + default: + /* runtime check for not using enum */ + BUG(); + } +} + +static inline s32 pm_qos_read_value(struct pm_qos_object *o) +{ + return o->target_value; +} + +static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +{ + o->target_value = value; +} + +static void update_target(struct pm_qos_object *o, struct plist_node *node, + int del, int value) +{ + unsigned long flags; + int prev_value, curr_value; + + spin_lock_irqsave(&pm_qos_lock, flags); + prev_value = pm_qos_get_value(o); + /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ + if (value != PM_QOS_DEFAULT_VALUE) { + /* + * to change the list, we atomically remove, reinit + * with new value and add, then see if the extremal + * changed + */ + plist_del(node, &o->requests); + plist_node_init(node, value); + plist_add(node, &o->requests); + } else if (del) { + plist_del(node, &o->requests); + } else { + plist_add(node, &o->requests); + } + curr_value = pm_qos_get_value(o); + pm_qos_set_value(o, curr_value); + spin_unlock_irqrestore(&pm_qos_lock, flags); + + if (prev_value != curr_value) + blocking_notifier_call_chain(o->notifiers, + (unsigned long)curr_value, + NULL); +} + +static int register_pm_qos_misc(struct pm_qos_object *qos) +{ + qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; + qos->pm_qos_power_miscdev.name = qos->name; + qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; + + return misc_register(&qos->pm_qos_power_miscdev); +} + +static int find_pm_qos_object_by_minor(int minor) +{ + int pm_qos_class; + + for (pm_qos_class = 0; + pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { + if (minor == + pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) + return pm_qos_class; + } + return -1; +} + +/** + * pm_qos_request - returns current system wide qos expectation + * @pm_qos_class: identification of which qos value is requested + * + * This function returns the current target value. + */ +int pm_qos_request(int pm_qos_class) +{ + return pm_qos_read_value(pm_qos_array[pm_qos_class]); +} +EXPORT_SYMBOL_GPL(pm_qos_request); + +int pm_qos_request_active(struct pm_qos_request_list *req) +{ + return req->pm_qos_class != 0; +} +EXPORT_SYMBOL_GPL(pm_qos_request_active); + +/** + * pm_qos_add_request - inserts new qos request into the list + * @dep: pointer to a preallocated handle + * @pm_qos_class: identifies which list of qos request to use + * @value: defines the qos request + * + * This function inserts a new entry in the pm_qos_class list of requested qos + * performance characteristics. It recomputes the aggregate QoS expectations + * for the pm_qos_class of parameters and initializes the pm_qos_request_list + * handle. Caller needs to save this handle for later use in updates and + * removal. + */ + +void pm_qos_add_request(struct pm_qos_request_list *dep, + int pm_qos_class, s32 value) +{ + struct pm_qos_object *o = pm_qos_array[pm_qos_class]; + int new_value; + + if (pm_qos_request_active(dep)) { + WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); + return; + } + if (value == PM_QOS_DEFAULT_VALUE) + new_value = o->default_value; + else + new_value = value; + plist_node_init(&dep->list, new_value); + dep->pm_qos_class = pm_qos_class; + update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); +} +EXPORT_SYMBOL_GPL(pm_qos_add_request); + +/** + * pm_qos_update_request - modifies an existing qos request + * @pm_qos_req : handle to list element holding a pm_qos request to use + * @value: defines the qos request + * + * Updates an existing qos request for the pm_qos_class of parameters along + * with updating the target pm_qos_class value. + * + * Attempts are made to make this code callable on hot code paths. + */ +void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, + s32 new_value) +{ + s32 temp; + struct pm_qos_object *o; + + if (!pm_qos_req) /*guard against callers passing in null */ + return; + + if (!pm_qos_request_active(pm_qos_req)) { + WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); + return; + } + + o = pm_qos_array[pm_qos_req->pm_qos_class]; + + if (new_value == PM_QOS_DEFAULT_VALUE) + temp = o->default_value; + else + temp = new_value; + + if (temp != pm_qos_req->list.prio) + update_target(o, &pm_qos_req->list, 0, temp); +} +EXPORT_SYMBOL_GPL(pm_qos_update_request); + +/** + * pm_qos_remove_request - modifies an existing qos request + * @pm_qos_req: handle to request list element + * + * Will remove pm qos request from the list of requests and + * recompute the current target value for the pm_qos_class. Call this + * on slow code paths. + */ +void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) +{ + struct pm_qos_object *o; + + if (pm_qos_req == NULL) + return; + /* silent return to keep pcm code cleaner */ + + if (!pm_qos_request_active(pm_qos_req)) { + WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); + return; + } + + o = pm_qos_array[pm_qos_req->pm_qos_class]; + update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); + memset(pm_qos_req, 0, sizeof(*pm_qos_req)); +} +EXPORT_SYMBOL_GPL(pm_qos_remove_request); + +/** + * pm_qos_add_notifier - sets notification entry for changes to target value + * @pm_qos_class: identifies which qos target changes should be notified. + * @notifier: notifier block managed by caller. + * + * will register the notifier into a notification chain that gets called + * upon changes to the pm_qos_class target value. + */ +int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) +{ + int retval; + + retval = blocking_notifier_chain_register( + pm_qos_array[pm_qos_class]->notifiers, notifier); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_qos_add_notifier); + +/** + * pm_qos_remove_notifier - deletes notification entry from chain. + * @pm_qos_class: identifies which qos target changes are notified. + * @notifier: notifier block to be removed. + * + * will remove the notifier from the notification chain that gets called + * upon changes to the pm_qos_class target value. + */ +int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) +{ + int retval; + + retval = blocking_notifier_chain_unregister( + pm_qos_array[pm_qos_class]->notifiers, notifier); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); + +static int pm_qos_power_open(struct inode *inode, struct file *filp) +{ + long pm_qos_class; + + pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); + if (pm_qos_class >= 0) { + struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); + filp->private_data = req; + + if (filp->private_data) + return 0; + } + return -EPERM; +} + +static int pm_qos_power_release(struct inode *inode, struct file *filp) +{ + struct pm_qos_request_list *req; + + req = filp->private_data; + pm_qos_remove_request(req); + kfree(req); + + return 0; +} + + +static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + s32 value; + unsigned long flags; + struct pm_qos_object *o; + struct pm_qos_request_list *pm_qos_req = filp->private_data; + + if (!pm_qos_req) + return -EINVAL; + if (!pm_qos_request_active(pm_qos_req)) + return -EINVAL; + + o = pm_qos_array[pm_qos_req->pm_qos_class]; + spin_lock_irqsave(&pm_qos_lock, flags); + value = pm_qos_get_value(o); + spin_unlock_irqrestore(&pm_qos_lock, flags); + + return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); +} + +static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + s32 value; + struct pm_qos_request_list *pm_qos_req; + + if (count == sizeof(s32)) { + if (copy_from_user(&value, buf, sizeof(s32))) + return -EFAULT; + } else if (count <= 11) { /* ASCII perhaps? */ + char ascii_value[11]; + unsigned long int ulval; + int ret; + + if (copy_from_user(ascii_value, buf, count)) + return -EFAULT; + + if (count > 10) { + if (ascii_value[10] == '\n') + ascii_value[10] = '\0'; + else + return -EINVAL; + } else { + ascii_value[count] = '\0'; + } + ret = strict_strtoul(ascii_value, 16, &ulval); + if (ret) { + pr_debug("%s, 0x%lx, 0x%x\n", ascii_value, ulval, ret); + return -EINVAL; + } + value = (s32)lower_32_bits(ulval); + } else { + return -EINVAL; + } + + pm_qos_req = filp->private_data; + pm_qos_update_request(pm_qos_req, value); + + return count; +} + + +static int __init pm_qos_power_init(void) +{ + int ret = 0; + + ret = register_pm_qos_misc(&cpu_dma_pm_qos); + if (ret < 0) { + printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); + return ret; + } + ret = register_pm_qos_misc(&network_lat_pm_qos); + if (ret < 0) { + printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); + return ret; + } + ret = register_pm_qos_misc(&network_throughput_pm_qos); + if (ret < 0) + printk(KERN_ERR + "pm_qos_param: network_throughput setup failed\n"); + + return ret; +} + +late_initcall(pm_qos_power_init); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 866f269..bb771e9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d6470c7..9604abc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6f09eca..beefb0a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 1c6be91..c74e228 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v0.10.2 From cc74998618a66d34651c784dd02412614c3e81cc Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:12 +0200 Subject: PM QoS: Minor clean-ups - Misc fixes to improve code readability: * rename struct pm_qos_request_list to struct pm_qos_request, * rename pm_qos_req parameter to req in internal code, consistenly use req in the API parameters, * update the in-kernel API callers to the new parameters names, * rename of fields names (requests, list, node, constraints) Signed-off-by: Jean Pihet Acked-by: markgross Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c index b3ca389..fba6c64 100644 --- a/drivers/media/video/via-camera.c +++ b/drivers/media/video/via-camera.c @@ -69,7 +69,7 @@ struct via_camera { struct mutex lock; enum viacam_opstate opstate; unsigned long flags; - struct pm_qos_request_list qos_request; + struct pm_qos_request qos_request; /* * GPIO info for power/reset management */ diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index aaab76c..db35f99 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -174,7 +174,7 @@ that only one external action is invoked at a time. #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2100 Network Driver" #define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" -static struct pm_qos_request_list ipw2100_pm_qos_req; +static struct pm_qos_request ipw2100_pm_qos_req; /* Debugging stuff */ #ifdef CONFIG_IPW2100_DEBUG diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f72ac6b..f38ab5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -964,7 +964,7 @@ struct net_device { */ char name[IFNAMSIZ]; - struct pm_qos_request_list pm_qos_req; + struct pm_qos_request pm_qos_req; /* device name hash chain */ struct hlist_node name_hlist; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 7ba67541..6b0968f 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -20,30 +20,30 @@ #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 -struct pm_qos_request_list { - struct plist_node list; +struct pm_qos_request { + struct plist_node node; int pm_qos_class; }; #ifdef CONFIG_PM -void pm_qos_add_request(struct pm_qos_request_list *l, - int pm_qos_class, s32 value); -void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, +void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, + s32 value); +void pm_qos_update_request(struct pm_qos_request *req, s32 new_value); -void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req); +void pm_qos_remove_request(struct pm_qos_request *req); int pm_qos_request(int pm_qos_class); int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); -int pm_qos_request_active(struct pm_qos_request_list *req); +int pm_qos_request_active(struct pm_qos_request *req); #else -static inline void pm_qos_add_request(struct pm_qos_request_list *l, +static inline void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value) { return; } -static inline void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, +static inline void pm_qos_update_request(struct pm_qos_request *req, s32 new_value) { return; } -static inline void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) +static inline void pm_qos_remove_request(struct pm_qos_request *req) { return; } static inline int pm_qos_request(int pm_qos_class) @@ -54,7 +54,7 @@ static inline int pm_qos_add_notifier(int pm_qos_class, static inline int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { return 0; } -static inline int pm_qos_request_active(struct pm_qos_request_list *req) +static inline int pm_qos_request_active(struct pm_qos_request *req) { return 0; } #endif diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 666ee91..54cb079 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -373,7 +373,7 @@ struct snd_pcm_substream { int number; char name[32]; /* substream name */ int stream; /* stream (direction) */ - struct pm_qos_request_list latency_pm_qos_req; /* pm_qos request */ + struct pm_qos_request latency_pm_qos_req; /* pm_qos request */ size_t buffer_bytes_max; /* limit ring buffer size */ struct snd_dma_buffer dma_buffer; unsigned int dma_buf_id; diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 61b4738..aa52c44 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -45,7 +45,7 @@ #include /* - * locking rule: all changes to requests or notifiers lists + * locking rule: all changes to constraints or notifiers lists * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ @@ -60,7 +60,7 @@ enum pm_qos_type { * types linux supports for 32 bit quantites */ struct pm_qos_object { - struct plist_head requests; + struct plist_head constraints; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; @@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(pm_qos_lock); static struct pm_qos_object null_pm_qos; static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); static struct pm_qos_object cpu_dma_pm_qos = { - .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests), + .constraints = PLIST_HEAD_INIT(cpu_dma_pm_qos.constraints), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, @@ -84,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = { static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); static struct pm_qos_object network_lat_pm_qos = { - .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests), + .constraints = PLIST_HEAD_INIT(network_lat_pm_qos.constraints), .notifiers = &network_lat_notifier, .name = "network_latency", .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, @@ -95,7 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = { static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); static struct pm_qos_object network_throughput_pm_qos = { - .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests), + .constraints = PLIST_HEAD_INIT(network_throughput_pm_qos.constraints), .notifiers = &network_throughput_notifier, .name = "network_throughput", .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, @@ -129,15 +129,15 @@ static const struct file_operations pm_qos_power_fops = { /* unlocked internal variant */ static inline int pm_qos_get_value(struct pm_qos_object *o) { - if (plist_head_empty(&o->requests)) + if (plist_head_empty(&o->constraints)) return o->default_value; switch (o->type) { case PM_QOS_MIN: - return plist_first(&o->requests)->prio; + return plist_first(&o->constraints)->prio; case PM_QOS_MAX: - return plist_last(&o->requests)->prio; + return plist_last(&o->constraints)->prio; default: /* runtime check for not using enum */ @@ -170,13 +170,13 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, * with new value and add, then see if the extremal * changed */ - plist_del(node, &o->requests); + plist_del(node, &o->constraints); plist_node_init(node, value); - plist_add(node, &o->requests); + plist_add(node, &o->constraints); } else if (del) { - plist_del(node, &o->requests); + plist_del(node, &o->constraints); } else { - plist_add(node, &o->requests); + plist_add(node, &o->constraints); } curr_value = pm_qos_get_value(o); pm_qos_set_value(o, curr_value); @@ -222,7 +222,7 @@ int pm_qos_request(int pm_qos_class) } EXPORT_SYMBOL_GPL(pm_qos_request); -int pm_qos_request_active(struct pm_qos_request_list *req) +int pm_qos_request_active(struct pm_qos_request *req) { return req->pm_qos_class != 0; } @@ -230,24 +230,24 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active); /** * pm_qos_add_request - inserts new qos request into the list - * @dep: pointer to a preallocated handle + * @req: pointer to a preallocated handle * @pm_qos_class: identifies which list of qos request to use * @value: defines the qos request * * This function inserts a new entry in the pm_qos_class list of requested qos * performance characteristics. It recomputes the aggregate QoS expectations - * for the pm_qos_class of parameters and initializes the pm_qos_request_list + * for the pm_qos_class of parameters and initializes the pm_qos_request * handle. Caller needs to save this handle for later use in updates and * removal. */ -void pm_qos_add_request(struct pm_qos_request_list *dep, +void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value) { struct pm_qos_object *o = pm_qos_array[pm_qos_class]; int new_value; - if (pm_qos_request_active(dep)) { + if (pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); return; } @@ -255,15 +255,15 @@ void pm_qos_add_request(struct pm_qos_request_list *dep, new_value = o->default_value; else new_value = value; - plist_node_init(&dep->list, new_value); - dep->pm_qos_class = pm_qos_class; - update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); + plist_node_init(&req->node, new_value); + req->pm_qos_class = pm_qos_class; + update_target(o, &req->node, 0, PM_QOS_DEFAULT_VALUE); } EXPORT_SYMBOL_GPL(pm_qos_add_request); /** * pm_qos_update_request - modifies an existing qos request - * @pm_qos_req : handle to list element holding a pm_qos request to use + * @req : handle to list element holding a pm_qos request to use * @value: defines the qos request * * Updates an existing qos request for the pm_qos_class of parameters along @@ -271,56 +271,56 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request); * * Attempts are made to make this code callable on hot code paths. */ -void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, +void pm_qos_update_request(struct pm_qos_request *req, s32 new_value) { s32 temp; struct pm_qos_object *o; - if (!pm_qos_req) /*guard against callers passing in null */ + if (!req) /*guard against callers passing in null */ return; - if (!pm_qos_request_active(pm_qos_req)) { + if (!pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); return; } - o = pm_qos_array[pm_qos_req->pm_qos_class]; + o = pm_qos_array[req->pm_qos_class]; if (new_value == PM_QOS_DEFAULT_VALUE) temp = o->default_value; else temp = new_value; - if (temp != pm_qos_req->list.prio) - update_target(o, &pm_qos_req->list, 0, temp); + if (temp != req->node.prio) + update_target(o, &req->node, 0, temp); } EXPORT_SYMBOL_GPL(pm_qos_update_request); /** * pm_qos_remove_request - modifies an existing qos request - * @pm_qos_req: handle to request list element + * @req: handle to request list element * - * Will remove pm qos request from the list of requests and + * Will remove pm qos request from the list of constraints and * recompute the current target value for the pm_qos_class. Call this * on slow code paths. */ -void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) +void pm_qos_remove_request(struct pm_qos_request *req) { struct pm_qos_object *o; - if (pm_qos_req == NULL) + if (req == NULL) return; /* silent return to keep pcm code cleaner */ - if (!pm_qos_request_active(pm_qos_req)) { + if (!pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); return; } - o = pm_qos_array[pm_qos_req->pm_qos_class]; - update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); - memset(pm_qos_req, 0, sizeof(*pm_qos_req)); + o = pm_qos_array[req->pm_qos_class]; + update_target(o, &req->node, 1, PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); @@ -368,7 +368,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); if (pm_qos_class >= 0) { - struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL); + struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; @@ -383,7 +383,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) static int pm_qos_power_release(struct inode *inode, struct file *filp) { - struct pm_qos_request_list *req; + struct pm_qos_request *req; req = filp->private_data; pm_qos_remove_request(req); @@ -399,14 +399,14 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, s32 value; unsigned long flags; struct pm_qos_object *o; - struct pm_qos_request_list *pm_qos_req = filp->private_data; + struct pm_qos_request *req = filp->private_data; - if (!pm_qos_req) + if (!req) return -EINVAL; - if (!pm_qos_request_active(pm_qos_req)) + if (!pm_qos_request_active(req)) return -EINVAL; - o = pm_qos_array[pm_qos_req->pm_qos_class]; + o = pm_qos_array[req->pm_qos_class]; spin_lock_irqsave(&pm_qos_lock, flags); value = pm_qos_get_value(o); spin_unlock_irqrestore(&pm_qos_lock, flags); @@ -418,7 +418,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { s32 value; - struct pm_qos_request_list *pm_qos_req; + struct pm_qos_request *req; if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) @@ -449,8 +449,8 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, return -EINVAL; } - pm_qos_req = filp->private_data; - pm_qos_update_request(pm_qos_req, value); + req = filp->private_data; + pm_qos_update_request(req, value); return count; } -- cgit v0.10.2 From 4a31a33425a1eb92f6a0b9846f081842268361c8 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:20 +0200 Subject: PM QoS: Code reorganization Move around the PM QoS misc devices management code for better readability. Signed-off-by: Jean Pihet Acked-by: markgross Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/qos.c b/kernel/power/qos.c index aa52c44..788c4cf 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -188,28 +188,6 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, NULL); } -static int register_pm_qos_misc(struct pm_qos_object *qos) -{ - qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; - qos->pm_qos_power_miscdev.name = qos->name; - qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; - - return misc_register(&qos->pm_qos_power_miscdev); -} - -static int find_pm_qos_object_by_minor(int minor) -{ - int pm_qos_class; - - for (pm_qos_class = 0; - pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { - if (minor == - pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) - return pm_qos_class; - } - return -1; -} - /** * pm_qos_request - returns current system wide qos expectation * @pm_qos_class: identification of which qos value is requested @@ -362,6 +340,29 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) } EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); +/* User space interface to PM QoS classes via misc devices */ +static int register_pm_qos_misc(struct pm_qos_object *qos) +{ + qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; + qos->pm_qos_power_miscdev.name = qos->name; + qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; + + return misc_register(&qos->pm_qos_power_miscdev); +} + +static int find_pm_qos_object_by_minor(int minor) +{ + int pm_qos_class; + + for (pm_qos_class = 0; + pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { + if (minor == + pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) + return pm_qos_class; + } + return -1; +} + static int pm_qos_power_open(struct inode *inode, struct file *filp) { long pm_qos_class; -- cgit v0.10.2 From 4e1779baaa542c83b459b0a56585e0c1a04c7782 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:27 +0200 Subject: PM QoS: Reorganize data structs In preparation for the per-device constratins support, re-organize the data strctures: - add a struct pm_qos_constraints which contains the constraints related data - update struct pm_qos_object contents to the PM QoS internal object data. Add a pointer to struct pm_qos_constraints - update the internal code to use the new data structs. Signed-off-by: Jean Pihet Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 6b0968f..9772311 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -25,6 +25,25 @@ struct pm_qos_request { int pm_qos_class; }; +enum pm_qos_type { + PM_QOS_UNITIALIZED, + PM_QOS_MAX, /* return the largest value */ + PM_QOS_MIN /* return the smallest value */ +}; + +/* + * Note: The lockless read path depends on the CPU accessing + * target_value atomically. Atomic access is only guaranteed on all CPU + * types linux supports for 32 bit quantites + */ +struct pm_qos_constraints { + struct plist_head list; + s32 target_value; /* Do not change to 64 bit */ + s32 default_value; + enum pm_qos_type type; + struct blocking_notifier_head *notifiers; +}; + #ifdef CONFIG_PM void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 788c4cf..4a35fe5 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -49,58 +49,53 @@ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ -enum pm_qos_type { - PM_QOS_MAX, /* return the largest value */ - PM_QOS_MIN /* return the smallest value */ -}; - -/* - * Note: The lockless read path depends on the CPU accessing - * target_value atomically. Atomic access is only guaranteed on all CPU - * types linux supports for 32 bit quantites - */ struct pm_qos_object { - struct plist_head constraints; - struct blocking_notifier_head *notifiers; + struct pm_qos_constraints *constraints; struct miscdevice pm_qos_power_miscdev; char *name; - s32 target_value; /* Do not change to 64 bit */ - s32 default_value; - enum pm_qos_type type; }; static DEFINE_SPINLOCK(pm_qos_lock); static struct pm_qos_object null_pm_qos; + static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); -static struct pm_qos_object cpu_dma_pm_qos = { - .constraints = PLIST_HEAD_INIT(cpu_dma_pm_qos.constraints), - .notifiers = &cpu_dma_lat_notifier, - .name = "cpu_dma_latency", +static struct pm_qos_constraints cpu_dma_constraints = { + .list = PLIST_HEAD_INIT(cpu_dma_constraints.list), .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, + .notifiers = &cpu_dma_lat_notifier, +}; +static struct pm_qos_object cpu_dma_pm_qos = { + .constraints = &cpu_dma_constraints, }; static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); -static struct pm_qos_object network_lat_pm_qos = { - .constraints = PLIST_HEAD_INIT(network_lat_pm_qos.constraints), - .notifiers = &network_lat_notifier, - .name = "network_latency", +static struct pm_qos_constraints network_lat_constraints = { + .list = PLIST_HEAD_INIT(network_lat_constraints.list), .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .type = PM_QOS_MIN + .type = PM_QOS_MIN, + .notifiers = &network_lat_notifier, +}; +static struct pm_qos_object network_lat_pm_qos = { + .constraints = &network_lat_constraints, + .name = "network_latency", }; static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); -static struct pm_qos_object network_throughput_pm_qos = { - .constraints = PLIST_HEAD_INIT(network_throughput_pm_qos.constraints), - .notifiers = &network_throughput_notifier, - .name = "network_throughput", +static struct pm_qos_constraints network_tput_constraints = { + .list = PLIST_HEAD_INIT(network_tput_constraints.list), .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, + .notifiers = &network_throughput_notifier, +}; +static struct pm_qos_object network_throughput_pm_qos = { + .constraints = &network_tput_constraints, + .name = "network_throughput", }; @@ -129,15 +124,15 @@ static const struct file_operations pm_qos_power_fops = { /* unlocked internal variant */ static inline int pm_qos_get_value(struct pm_qos_object *o) { - if (plist_head_empty(&o->constraints)) - return o->default_value; + if (plist_head_empty(&o->constraints->list)) + return o->constraints->default_value; - switch (o->type) { + switch (o->constraints->type) { case PM_QOS_MIN: - return plist_first(&o->constraints)->prio; + return plist_first(&o->constraints->list)->prio; case PM_QOS_MAX: - return plist_last(&o->constraints)->prio; + return plist_last(&o->constraints->list)->prio; default: /* runtime check for not using enum */ @@ -147,12 +142,12 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) static inline s32 pm_qos_read_value(struct pm_qos_object *o) { - return o->target_value; + return o->constraints->target_value; } static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) { - o->target_value = value; + o->constraints->target_value = value; } static void update_target(struct pm_qos_object *o, struct plist_node *node, @@ -170,20 +165,20 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, * with new value and add, then see if the extremal * changed */ - plist_del(node, &o->constraints); + plist_del(node, &o->constraints->list); plist_node_init(node, value); - plist_add(node, &o->constraints); + plist_add(node, &o->constraints->list); } else if (del) { - plist_del(node, &o->constraints); + plist_del(node, &o->constraints->list); } else { - plist_add(node, &o->constraints); + plist_add(node, &o->constraints->list); } curr_value = pm_qos_get_value(o); pm_qos_set_value(o, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); if (prev_value != curr_value) - blocking_notifier_call_chain(o->notifiers, + blocking_notifier_call_chain(o->constraints->notifiers, (unsigned long)curr_value, NULL); } @@ -230,7 +225,7 @@ void pm_qos_add_request(struct pm_qos_request *req, return; } if (value == PM_QOS_DEFAULT_VALUE) - new_value = o->default_value; + new_value = o->constraints->default_value; else new_value = value; plist_node_init(&req->node, new_value); @@ -266,7 +261,7 @@ void pm_qos_update_request(struct pm_qos_request *req, o = pm_qos_array[req->pm_qos_class]; if (new_value == PM_QOS_DEFAULT_VALUE) - temp = o->default_value; + temp = o->constraints->default_value; else temp = new_value; @@ -315,7 +310,8 @@ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) int retval; retval = blocking_notifier_chain_register( - pm_qos_array[pm_qos_class]->notifiers, notifier); + pm_qos_array[pm_qos_class]->constraints->notifiers, + notifier); return retval; } @@ -334,7 +330,8 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) int retval; retval = blocking_notifier_chain_unregister( - pm_qos_array[pm_qos_class]->notifiers, notifier); + pm_qos_array[pm_qos_class]->constraints->notifiers, + notifier); return retval; } -- cgit v0.10.2 From abe98ec2d86279fe821c9051003a0abc43444f15 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:34 +0200 Subject: PM QoS: Generalize and export constraints management code In preparation for the per-device constratins support: - rename update_target to pm_qos_update_target - generalize and export pm_qos_update_target for usage by the upcoming per-device latency constraints framework: * operate on struct pm_qos_constraints for constraints management, * introduce an 'action' parameter for constraints add/update/remove, * the return value indicates if the aggregated constraint value has changed, - update the internal code to operate on struct pm_qos_constraints - add a NULL pointer check in the API functions Signed-off-by: Jean Pihet Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 9772311..84aa150 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -44,7 +44,16 @@ struct pm_qos_constraints { struct blocking_notifier_head *notifiers; }; +/* Action requested to pm_qos_update_target */ +enum pm_qos_req_action { + PM_QOS_ADD_REQ, /* Add a new request */ + PM_QOS_UPDATE_REQ, /* Update an existing request */ + PM_QOS_REMOVE_REQ /* Remove an existing request */ +}; + #ifdef CONFIG_PM +int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, + enum pm_qos_req_action action, int value); void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value); void pm_qos_update_request(struct pm_qos_request *req, @@ -56,6 +65,11 @@ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); #else +static inline int pm_qos_update_target(struct pm_qos_constraints *c, + struct plist_node *node, + enum pm_qos_req_action action, + int value) + { return 0; } static inline void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value) { return; } diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 4a35fe5..7c7cd18 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -122,17 +122,17 @@ static const struct file_operations pm_qos_power_fops = { }; /* unlocked internal variant */ -static inline int pm_qos_get_value(struct pm_qos_object *o) +static inline int pm_qos_get_value(struct pm_qos_constraints *c) { - if (plist_head_empty(&o->constraints->list)) - return o->constraints->default_value; + if (plist_head_empty(&c->list)) + return c->default_value; - switch (o->constraints->type) { + switch (c->type) { case PM_QOS_MIN: - return plist_first(&o->constraints->list)->prio; + return plist_first(&c->list)->prio; case PM_QOS_MAX: - return plist_last(&o->constraints->list)->prio; + return plist_last(&c->list)->prio; default: /* runtime check for not using enum */ @@ -140,47 +140,73 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) } } -static inline s32 pm_qos_read_value(struct pm_qos_object *o) +static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) { - return o->constraints->target_value; + return c->target_value; } -static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) { - o->constraints->target_value = value; + c->target_value = value; } -static void update_target(struct pm_qos_object *o, struct plist_node *node, - int del, int value) +/** + * pm_qos_update_target - manages the constraints list and calls the notifiers + * if needed + * @c: constraints data struct + * @node: request to add to the list, to update or to remove + * @action: action to take on the constraints list + * @value: value of the request to add or update + * + * This function returns 1 if the aggregated constraint value has changed, 0 + * otherwise. + */ +int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, + enum pm_qos_req_action action, int value) { unsigned long flags; - int prev_value, curr_value; + int prev_value, curr_value, new_value; spin_lock_irqsave(&pm_qos_lock, flags); - prev_value = pm_qos_get_value(o); - /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ - if (value != PM_QOS_DEFAULT_VALUE) { + prev_value = pm_qos_get_value(c); + if (value == PM_QOS_DEFAULT_VALUE) + new_value = c->default_value; + else + new_value = value; + + switch (action) { + case PM_QOS_REMOVE_REQ: + plist_del(node, &c->list); + break; + case PM_QOS_UPDATE_REQ: /* * to change the list, we atomically remove, reinit * with new value and add, then see if the extremal * changed */ - plist_del(node, &o->constraints->list); - plist_node_init(node, value); - plist_add(node, &o->constraints->list); - } else if (del) { - plist_del(node, &o->constraints->list); - } else { - plist_add(node, &o->constraints->list); + plist_del(node, &c->list); + case PM_QOS_ADD_REQ: + plist_node_init(node, new_value); + plist_add(node, &c->list); + break; + default: + /* no action */ + ; } - curr_value = pm_qos_get_value(o); - pm_qos_set_value(o, curr_value); + + curr_value = pm_qos_get_value(c); + pm_qos_set_value(c, curr_value); + spin_unlock_irqrestore(&pm_qos_lock, flags); - if (prev_value != curr_value) - blocking_notifier_call_chain(o->constraints->notifiers, + if (prev_value != curr_value) { + blocking_notifier_call_chain(c->notifiers, (unsigned long)curr_value, NULL); + return 1; + } else { + return 0; + } } /** @@ -191,7 +217,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, */ int pm_qos_request(int pm_qos_class) { - return pm_qos_read_value(pm_qos_array[pm_qos_class]); + return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints); } EXPORT_SYMBOL_GPL(pm_qos_request); @@ -217,20 +243,16 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active); void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value) { - struct pm_qos_object *o = pm_qos_array[pm_qos_class]; - int new_value; + if (!req) /*guard against callers passing in null */ + return; if (pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); return; } - if (value == PM_QOS_DEFAULT_VALUE) - new_value = o->constraints->default_value; - else - new_value = value; - plist_node_init(&req->node, new_value); req->pm_qos_class = pm_qos_class; - update_target(o, &req->node, 0, PM_QOS_DEFAULT_VALUE); + pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, + &req->node, PM_QOS_ADD_REQ, value); } EXPORT_SYMBOL_GPL(pm_qos_add_request); @@ -247,9 +269,6 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request); void pm_qos_update_request(struct pm_qos_request *req, s32 new_value) { - s32 temp; - struct pm_qos_object *o; - if (!req) /*guard against callers passing in null */ return; @@ -258,15 +277,10 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } - o = pm_qos_array[req->pm_qos_class]; - - if (new_value == PM_QOS_DEFAULT_VALUE) - temp = o->constraints->default_value; - else - temp = new_value; - - if (temp != req->node.prio) - update_target(o, &req->node, 0, temp); + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); @@ -280,9 +294,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_request); */ void pm_qos_remove_request(struct pm_qos_request *req) { - struct pm_qos_object *o; - - if (req == NULL) + if (!req) /*guard against callers passing in null */ return; /* silent return to keep pcm code cleaner */ @@ -291,8 +303,9 @@ void pm_qos_remove_request(struct pm_qos_request *req) return; } - o = pm_qos_array[req->pm_qos_class]; - update_target(o, &req->node, 1, PM_QOS_DEFAULT_VALUE); + pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); @@ -396,7 +409,6 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, { s32 value; unsigned long flags; - struct pm_qos_object *o; struct pm_qos_request *req = filp->private_data; if (!req) @@ -404,9 +416,8 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, if (!pm_qos_request_active(req)) return -EINVAL; - o = pm_qos_array[req->pm_qos_class]; spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(o); + value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints); spin_unlock_irqrestore(&pm_qos_lock, flags); return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); -- cgit v0.10.2 From 91ff4cb803df6de9114351b9f2f0f39f397ee03e Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:41 +0200 Subject: PM QoS: Implement per-device PM QoS constraints Implement the per-device PM QoS constraints by creating a device PM QoS API, which calls the PM QoS constraints management core code. The per-device latency constraints data strctures are stored in the device dev_pm_info struct. The device PM code calls the init and destroy of the per-device constraints data struct in order to support the dynamic insertion and removal of the devices in the system. To minimize the data usage by the per-device constraints, the data struct is only allocated at the first call to dev_pm_qos_add_request. The data is later free'd when the device is removed from the system. A global mutex protects the constraints users from the data being allocated and free'd. Signed-off-by: Jean Pihet Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 6488ce1..81676dd 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_PM) += sysfs.o generic_ops.o common.o +obj-$(CONFIG_PM) += sysfs.o generic_ops.o common.o qos.o obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o @@ -6,4 +6,4 @@ obj-$(CONFIG_PM_OPP) += opp.o obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o -ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG \ No newline at end of file +ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a854591..956443f8 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,7 @@ void device_pm_add(struct device *dev) dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); mutex_unlock(&dpm_list_mtx); + dev_pm_qos_constraints_init(dev); } /** @@ -107,6 +109,7 @@ void device_pm_remove(struct device *dev) { pr_debug("PM: Removing info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); + dev_pm_qos_constraints_destroy(dev); complete_all(&dev->power.completion); mutex_lock(&dpm_list_mtx); list_del_init(&dev->power.entry); diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c new file mode 100644 index 0000000..cc4c541 --- /dev/null +++ b/drivers/base/power/qos.c @@ -0,0 +1,338 @@ +/* + * Devices PM QoS constraints management + * + * Copyright (C) 2011 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * This module exposes the interface to kernel space for specifying + * per-device PM QoS dependencies. It provides infrastructure for registration + * of: + * + * Dependents on a QoS value : register requests + * Watchers of QoS value : get notified when target QoS value changes + * + * This QoS design is best effort based. Dependents register their QoS needs. + * Watchers register to keep track of the current QoS needs of the system. + * + * Note about the per-device constraint data struct allocation: + * . The per-device constraints data struct ptr is tored into the device + * dev_pm_info. + * . To minimize the data usage by the per-device constraints, the data struct + * is only allocated at the first call to dev_pm_qos_add_request. + * . The data is later free'd when the device is removed from the system. + * . The constraints_state variable from dev_pm_info tracks the data struct + * allocation state: + * DEV_PM_QOS_NO_DEVICE: No device present or device removed, no data + * allocated, + * DEV_PM_QOS_DEVICE_PRESENT: Device present, data not allocated and will be + * allocated at the first call to dev_pm_qos_add_request, + * DEV_PM_QOS_ALLOCATED: Device present, data allocated. The per-device + * PM QoS constraints framework is operational and constraints can be + * added, updated or removed using the dev_pm_qos_* API. + * . A global mutex protects the constraints users from the data being + * allocated and free'd. + */ + +#include +#include +#include +#include +#include + + +static DEFINE_MUTEX(dev_pm_qos_mtx); + +/* + * dev_pm_qos_constraints_allocate + * @dev: device to allocate data for + * + * Called at the first call to add_request, for constraint data allocation + * Must be called with the dev_pm_qos_mtx mutex held + */ +static int dev_pm_qos_constraints_allocate(struct device *dev) +{ + struct pm_qos_constraints *c; + struct blocking_notifier_head *n; + + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (!n) { + kfree(c); + return -ENOMEM; + } + BLOCKING_INIT_NOTIFIER_HEAD(n); + + dev->power.constraints = c; + plist_head_init(&dev->power.constraints->list); + dev->power.constraints->target_value = PM_QOS_DEV_LAT_DEFAULT_VALUE; + dev->power.constraints->default_value = PM_QOS_DEV_LAT_DEFAULT_VALUE; + dev->power.constraints->type = PM_QOS_MIN; + dev->power.constraints->notifiers = n; + dev->power.constraints_state = DEV_PM_QOS_ALLOCATED; + + return 0; +} + +/** + * dev_pm_qos_constraints_init + * @dev: target device + * + * Called from the device PM subsystem at device insertion + */ +void dev_pm_qos_constraints_init(struct device *dev) +{ + mutex_lock(&dev_pm_qos_mtx); + dev->power.constraints_state = DEV_PM_QOS_DEVICE_PRESENT; + mutex_unlock(&dev_pm_qos_mtx); +} + +/** + * dev_pm_qos_constraints_destroy + * @dev: target device + * + * Called from the device PM subsystem at device removal + */ +void dev_pm_qos_constraints_destroy(struct device *dev) +{ + struct dev_pm_qos_request *req, *tmp; + + mutex_lock(&dev_pm_qos_mtx); + + if (dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { + /* Flush the constraints list for the device */ + plist_for_each_entry_safe(req, tmp, + &dev->power.constraints->list, + node) { + /* + * Update constraints list and call the per-device + * callbacks if needed + */ + pm_qos_update_target(req->dev->power.constraints, + &req->node, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } + + kfree(dev->power.constraints->notifiers); + kfree(dev->power.constraints); + dev->power.constraints = NULL; + } + dev->power.constraints_state = DEV_PM_QOS_NO_DEVICE; + + mutex_unlock(&dev_pm_qos_mtx); +} + +/** + * dev_pm_qos_add_request - inserts new qos request into the list + * @dev: target device for the constraint + * @req: pointer to a preallocated handle + * @value: defines the qos request + * + * This function inserts a new entry in the device constraints list of + * requested qos performance characteristics. It recomputes the aggregate + * QoS expectations of parameters and initializes the dev_pm_qos_request + * handle. Caller needs to save this handle for later use in updates and + * removal. + * + * Returns 1 if the aggregated constraint value has changed, + * 0 if the aggregated constraint value has not changed, + * -EINVAL in case of wrong parameters, -ENODEV if the device has been + * removed from the system + */ +int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, + s32 value) +{ + int ret = 0; + + if (!dev || !req) /*guard against callers passing in null */ + return -EINVAL; + + if (dev_pm_qos_request_active(req)) { + WARN(1, KERN_ERR "dev_pm_qos_add_request() called for already " + "added request\n"); + return -EINVAL; + } + + mutex_lock(&dev_pm_qos_mtx); + req->dev = dev; + + /* Return if the device has been removed */ + if (req->dev->power.constraints_state == DEV_PM_QOS_NO_DEVICE) { + ret = -ENODEV; + goto out; + } + + /* + * Allocate the constraints data on the first call to add_request, + * i.e. only if the data is not already allocated and if the device has + * not been removed + */ + if (dev->power.constraints_state == DEV_PM_QOS_DEVICE_PRESENT) + ret = dev_pm_qos_constraints_allocate(dev); + + if (!ret) + ret = pm_qos_update_target(dev->power.constraints, &req->node, + PM_QOS_ADD_REQ, value); + +out: + mutex_unlock(&dev_pm_qos_mtx); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_add_request); + +/** + * dev_pm_qos_update_request - modifies an existing qos request + * @req : handle to list element holding a dev_pm_qos request to use + * @new_value: defines the qos request + * + * Updates an existing dev PM qos request along with updating the + * target value. + * + * Attempts are made to make this code callable on hot code paths. + * + * Returns 1 if the aggregated constraint value has changed, + * 0 if the aggregated constraint value has not changed, + * -EINVAL in case of wrong parameters, -ENODEV if the device has been + * removed from the system + */ +int dev_pm_qos_update_request(struct dev_pm_qos_request *req, + s32 new_value) +{ + int ret = 0; + + if (!req) /*guard against callers passing in null */ + return -EINVAL; + + if (!dev_pm_qos_request_active(req)) { + WARN(1, KERN_ERR "dev_pm_qos_update_request() called for " + "unknown object\n"); + return -EINVAL; + } + + mutex_lock(&dev_pm_qos_mtx); + + if (req->dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { + if (new_value != req->node.prio) + ret = pm_qos_update_target(req->dev->power.constraints, + &req->node, + PM_QOS_UPDATE_REQ, + new_value); + } else { + /* Return if the device has been removed */ + ret = -ENODEV; + } + + mutex_unlock(&dev_pm_qos_mtx); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); + +/** + * dev_pm_qos_remove_request - modifies an existing qos request + * @req: handle to request list element + * + * Will remove pm qos request from the list of constraints and + * recompute the current target value. Call this on slow code paths. + * + * Returns 1 if the aggregated constraint value has changed, + * 0 if the aggregated constraint value has not changed, + * -EINVAL in case of wrong parameters, -ENODEV if the device has been + * removed from the system + */ +int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) +{ + int ret = 0; + + if (!req) /*guard against callers passing in null */ + return -EINVAL; + + if (!dev_pm_qos_request_active(req)) { + WARN(1, KERN_ERR "dev_pm_qos_remove_request() called for " + "unknown object\n"); + return -EINVAL; + } + + mutex_lock(&dev_pm_qos_mtx); + + if (req->dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { + ret = pm_qos_update_target(req->dev->power.constraints, + &req->node, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } else { + /* Return if the device has been removed */ + ret = -ENODEV; + } + + mutex_unlock(&dev_pm_qos_mtx); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); + +/** + * dev_pm_qos_add_notifier - sets notification entry for changes to target value + * of per-device PM QoS constraints + * + * @dev: target device for the constraint + * @notifier: notifier block managed by caller. + * + * Will register the notifier into a notification chain that gets called + * upon changes to the target value for the device. + */ +int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) +{ + int retval = 0; + + mutex_lock(&dev_pm_qos_mtx); + + /* Silently return if the device has been removed */ + if (dev->power.constraints_state != DEV_PM_QOS_ALLOCATED) + goto out; + + retval = blocking_notifier_chain_register( + dev->power.constraints->notifiers, + notifier); + +out: + mutex_unlock(&dev_pm_qos_mtx); + return retval; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier); + +/** + * dev_pm_qos_remove_notifier - deletes notification for changes to target value + * of per-device PM QoS constraints + * + * @dev: target device for the constraint + * @notifier: notifier block to be removed. + * + * Will remove the notifier from the notification chain that gets called + * upon changes to the target value. + */ +int dev_pm_qos_remove_notifier(struct device *dev, + struct notifier_block *notifier) +{ + int retval = 0; + + mutex_lock(&dev_pm_qos_mtx); + + /* Silently return if the device has been removed */ + if (dev->power.constraints_state != DEV_PM_QOS_ALLOCATED) + goto out; + + retval = blocking_notifier_chain_unregister( + dev->power.constraints->notifiers, + notifier); + +out: + mutex_unlock(&dev_pm_qos_mtx); + return retval; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier); diff --git a/include/linux/pm.h b/include/linux/pm.h index ed10f24..d78187e 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -419,6 +419,13 @@ enum rpm_request { RPM_REQ_RESUME, }; +/* Per-device PM QoS constraints data struct state */ +enum dev_pm_qos_state { + DEV_PM_QOS_NO_DEVICE, /* No device present */ + DEV_PM_QOS_DEVICE_PRESENT, /* Device present, data not allocated */ + DEV_PM_QOS_ALLOCATED, /* Device present, data allocated */ +}; + struct wakeup_source; struct pm_domain_data { @@ -480,6 +487,8 @@ struct dev_pm_info { unsigned long accounting_timestamp; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ + struct pm_qos_constraints *constraints; + enum dev_pm_qos_state constraints_state; }; extern void update_pm_runtime_accounting(struct device *dev); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 84aa150..f75f74d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -19,12 +19,18 @@ #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 +#define PM_QOS_DEV_LAT_DEFAULT_VALUE 0 struct pm_qos_request { struct plist_node node; int pm_qos_class; }; +struct dev_pm_qos_request { + struct plist_node node; + struct device *dev; +}; + enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ @@ -51,6 +57,11 @@ enum pm_qos_req_action { PM_QOS_REMOVE_REQ /* Remove an existing request */ }; +static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req) +{ + return req->dev != 0; +} + #ifdef CONFIG_PM int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value); @@ -64,6 +75,17 @@ int pm_qos_request(int pm_qos_class); int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); + +int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, + s32 value); +int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value); +int dev_pm_qos_remove_request(struct dev_pm_qos_request *req); +int dev_pm_qos_add_notifier(struct device *dev, + struct notifier_block *notifier); +int dev_pm_qos_remove_notifier(struct device *dev, + struct notifier_block *notifier); +void dev_pm_qos_constraints_init(struct device *dev); +void dev_pm_qos_constraints_destroy(struct device *dev); #else static inline int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, @@ -89,6 +111,26 @@ static inline int pm_qos_remove_notifier(int pm_qos_class, { return 0; } static inline int pm_qos_request_active(struct pm_qos_request *req) { return 0; } + +static inline int dev_pm_qos_add_request(struct device *dev, + struct dev_pm_qos_request *req, + s32 value) + { return 0; } +static inline int dev_pm_qos_update_request(struct dev_pm_qos_request *req, + s32 new_value) + { return 0; } +static inline int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) + { return 0; } +static inline int dev_pm_qos_add_notifier(struct device *dev, + struct notifier_block *notifier) + { return 0; } +static inline int dev_pm_qos_remove_notifier(struct device *dev, + struct notifier_block *notifier) + { return 0; } +static inline void dev_pm_qos_constraints_init(struct device *dev) + { return; } +static inline void dev_pm_qos_constraints_destroy(struct device *dev) + { return; } #endif #endif -- cgit v0.10.2 From b66213cdb002b08b29603d488c451dfe25e2ca20 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Thu, 25 Aug 2011 15:35:47 +0200 Subject: PM QoS: Add global notification mechanism for device constraints Add a global notification chain that gets called upon changes to the aggregated constraint value for any device. The notification callbacks are passing the full constraint request data in order for the callees to have access to it. The current use is for the platform low-level code to access the target device of the constraint. Signed-off-by: Jean Pihet Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index cc4c541..8d0b811 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -17,6 +17,12 @@ * * This QoS design is best effort based. Dependents register their QoS needs. * Watchers register to keep track of the current QoS needs of the system. + * Watchers can register different types of notification callbacks: + * . a per-device notification callback using the dev_pm_qos_*_notifier API. + * The notification chain data is stored in the per-device constraint + * data struct. + * . a system-wide notification callback using the dev_pm_qos_*_global_notifier + * API. The notification chain data is stored in a static variable. * * Note about the per-device constraint data struct allocation: * . The per-device constraints data struct ptr is tored into the device @@ -45,6 +51,36 @@ static DEFINE_MUTEX(dev_pm_qos_mtx); +static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers); + +/* + * apply_constraint + * @req: constraint request to apply + * @action: action to perform add/update/remove, of type enum pm_qos_req_action + * @value: defines the qos request + * + * Internal function to update the constraints list using the PM QoS core + * code and if needed call the per-device and the global notification + * callbacks + */ +static int apply_constraint(struct dev_pm_qos_request *req, + enum pm_qos_req_action action, int value) +{ + int ret, curr_value; + + ret = pm_qos_update_target(req->dev->power.constraints, + &req->node, action, value); + + if (ret) { + /* Call the global callbacks if needed */ + curr_value = pm_qos_read_value(req->dev->power.constraints); + blocking_notifier_call_chain(&dev_pm_notifiers, + (unsigned long)curr_value, + req); + } + + return ret; +} /* * dev_pm_qos_constraints_allocate @@ -111,12 +147,11 @@ void dev_pm_qos_constraints_destroy(struct device *dev) &dev->power.constraints->list, node) { /* - * Update constraints list and call the per-device + * Update constraints list and call the notification * callbacks if needed */ - pm_qos_update_target(req->dev->power.constraints, - &req->node, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); + apply_constraint(req, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } @@ -147,7 +182,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev) * removed from the system */ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, - s32 value) + s32 value) { int ret = 0; @@ -178,8 +213,7 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, ret = dev_pm_qos_constraints_allocate(dev); if (!ret) - ret = pm_qos_update_target(dev->power.constraints, &req->node, - PM_QOS_ADD_REQ, value); + ret = apply_constraint(req, PM_QOS_ADD_REQ, value); out: mutex_unlock(&dev_pm_qos_mtx); @@ -220,10 +254,8 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, if (req->dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { if (new_value != req->node.prio) - ret = pm_qos_update_target(req->dev->power.constraints, - &req->node, - PM_QOS_UPDATE_REQ, - new_value); + ret = apply_constraint(req, PM_QOS_UPDATE_REQ, + new_value); } else { /* Return if the device has been removed */ ret = -ENODEV; @@ -262,9 +294,8 @@ int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) mutex_lock(&dev_pm_qos_mtx); if (req->dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { - ret = pm_qos_update_target(req->dev->power.constraints, - &req->node, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); + ret = apply_constraint(req, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } else { /* Return if the device has been removed */ @@ -336,3 +367,33 @@ out: return retval; } EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier); + +/** + * dev_pm_qos_add_global_notifier - sets notification entry for changes to + * target value of the PM QoS constraints for any device + * + * @notifier: notifier block managed by caller. + * + * Will register the notifier into a notification chain that gets called + * upon changes to the target value for any device. + */ +int dev_pm_qos_add_global_notifier(struct notifier_block *notifier) +{ + return blocking_notifier_chain_register(&dev_pm_notifiers, notifier); +} +EXPORT_SYMBOL_GPL(dev_pm_qos_add_global_notifier); + +/** + * dev_pm_qos_remove_global_notifier - deletes notification for changes to + * target value of PM QoS constraints for any device + * + * @notifier: notifier block to be removed. + * + * Will remove the notifier from the notification chain that gets called + * upon changes to the target value for any device. + */ +int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier) +{ + return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier); +} +EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index f75f74d..ca7bd3f 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -75,6 +75,7 @@ int pm_qos_request(int pm_qos_class); int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); +s32 pm_qos_read_value(struct pm_qos_constraints *c); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value); @@ -84,6 +85,8 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier); int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier); +int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); +int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); #else @@ -111,6 +114,8 @@ static inline int pm_qos_remove_notifier(int pm_qos_class, { return 0; } static inline int pm_qos_request_active(struct pm_qos_request *req) { return 0; } +static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) + { return 0; } static inline int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, @@ -127,6 +132,12 @@ static inline int dev_pm_qos_add_notifier(struct device *dev, static inline int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier) { return 0; } +static inline int dev_pm_qos_add_global_notifier( + struct notifier_block *notifier) + { return 0; } +static inline int dev_pm_qos_remove_global_notifier( + struct notifier_block *notifier) + { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) { return; } static inline void dev_pm_qos_constraints_destroy(struct device *dev) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 7c7cd18..1c1797d 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -140,7 +140,7 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c) } } -static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) +s32 pm_qos_read_value(struct pm_qos_constraints *c) { return c->target_value; } -- cgit v0.10.2 From 0aa2a221696cc8ea20a4cdca01315d3b6b4ecc4d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:37:04 +0200 Subject: PM / Domains: Preliminary support for devices with power.irq_safe set The generic PM domains framework currently doesn't work with devices whose power.irq_safe flag is set, because runtime PM callbacks for such devices are run with interrupts disabled and the callbacks provided by the generic PM domains framework use domain mutexes and may sleep. However, such devices very well may belong to power domains on some systems, so the generic PM domains framework should take them into account. For this reason, modify the generic PM domains framework so that the domain .power_off() and .power_on() callbacks are never executed for a domain containing devices with power.irq_safe set, although the .stop_device() and .start_device() callbacks are still run for them. Additionally, introduce a flag allowing the creator of a struct generic_pm_domain object to indicate that its .stop_device() and .start_device() callbacks may be run in interrupt context (might_sleep_if() triggers if that flag is not set and one of those callbacks is run in interrupt context). Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 5d35831..ac47bfc 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -103,6 +103,7 @@ void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) pm_genpd_init(genpd, NULL, false); genpd->stop_device = pm_clk_suspend; genpd->start_device = pm_clk_resume; + genpd->dev_irq_safe = true; genpd->active_wakeup = pd_active_wakeup; genpd->power_off = pd_power_down; genpd->power_on = pd_power_up; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 339eb2d..c2468a7 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -309,7 +309,8 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) not_suspended = 0; list_for_each_entry(pdd, &genpd->dev_list, list_node) - if (pdd->dev->driver && !pm_runtime_suspended(pdd->dev)) + if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev) + || pdd->dev->power.irq_safe)) not_suspended++; if (not_suspended > genpd->in_progress) @@ -417,12 +418,21 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; + might_sleep_if(!genpd->dev_irq_safe); + if (genpd->stop_device) { int ret = genpd->stop_device(dev); if (ret) return ret; } + /* + * If power.irq_safe is set, this routine will be run with interrupts + * off, so it can't use mutexes. + */ + if (dev->power.irq_safe) + return 0; + mutex_lock(&genpd->lock); genpd->in_progress++; pm_genpd_poweroff(genpd); @@ -452,6 +462,12 @@ static int pm_genpd_runtime_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; + might_sleep_if(!genpd->dev_irq_safe); + + /* If power.irq_safe, the PM domain is never powered off. */ + if (dev->power.irq_safe) + goto out; + mutex_lock(&genpd->lock); ret = __pm_genpd_poweron(genpd); if (ret) { @@ -483,6 +499,7 @@ static int pm_genpd_runtime_resume(struct device *dev) wake_up_all(&genpd->status_wait_queue); mutex_unlock(&genpd->lock); + out: if (genpd->start_device) genpd->start_device(dev); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 5cce46c..2538d90 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -42,6 +42,7 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ bool suspend_power_off; /* Power status before system suspend */ + bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); int (*start_device)(struct device *dev); -- cgit v0.10.2 From 2e6ba515f50ef7ddf35b2703d014d3216c9b8b24 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 21 Sep 2011 22:31:33 +0200 Subject: PM / Runtime: pm_runtime_idle() can be called in atomic context Add to pm_runtime_idle the list of functions that can be called in atomic context if pm_runtime_irq_safe() has been called for the device. Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 62f37bc..1750740 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -478,6 +478,7 @@ pm_runtime_autosuspend_expiration() If pm_runtime_irq_safe() has been called for a device then the following helper functions may also be used in interrupt context: +pm_runtime_idle() pm_runtime_suspend() pm_runtime_autosuspend() pm_runtime_resume() -- cgit v0.10.2 From 06b841666a5a47918d31472dd77837906f999a9a Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sun, 25 Sep 2011 23:18:42 +0200 Subject: ARM: mach-shmobile: sh7372 generic suspend/resume support Convert the sh7372 Core Standby code to make use of the new generic ARM cpu suspend/resume code. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 06aecb3..7b6f6f9 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -35,8 +35,7 @@ extern void sh7372_add_standard_devices(void); extern void sh7372_clock_init(void); extern void sh7372_pinmux_init(void); extern void sh7372_pm_init(void); -extern void sh7372_cpu_suspend(void); -extern void sh7372_cpu_resume(void); +extern void sh7372_resume_core_standby(void); extern struct clk sh7372_extal1_clk; extern struct clk sh7372_extal2_clk; diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index ac47bfc..aa7d352 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -152,30 +153,25 @@ struct sh7372_pm_domain sh7372_a3sg = { #endif /* CONFIG_PM */ -static void sh7372_enter_core_standby(void) +static int sh7372_do_idle_core_standby(unsigned long unused) { - void __iomem *smfram = (void __iomem *)SMFRAM; - - __raw_writel(0, APARMBAREA); /* translate 4k */ - __raw_writel(__pa(sh7372_cpu_resume), SBAR); /* set reset vector */ - __raw_writel(0x10, SYSTBCR); /* enable core standby */ - - __raw_writel(0, smfram + 0x3c); /* clear page table address */ - - sh7372_cpu_suspend(); - cpu_init(); + cpu_do_idle(); /* WFI when SYSTBCR == 0x10 -> Core Standby */ + return 0; +} - /* if page table address is non-NULL then we have been powered down */ - if (__raw_readl(smfram + 0x3c)) { - __raw_writel(__raw_readl(smfram + 0x40), - __va(__raw_readl(smfram + 0x3c))); +static void sh7372_enter_core_standby(void) +{ + /* set reset vector, translate 4k */ + __raw_writel(__pa(sh7372_resume_core_standby), SBAR); + __raw_writel(0, APARMBAREA); - flush_tlb_all(); - set_cr(__raw_readl(smfram + 0x38)); - } + /* enter sleep mode with SYSTBCR to 0x10 */ + __raw_writel(0x10, SYSTBCR); + cpu_suspend(0, sh7372_do_idle_core_standby); + __raw_writel(0, SYSTBCR); - __raw_writel(0, SYSTBCR); /* disable core standby */ - __raw_writel(0, SBAR); /* disable reset vector translation */ + /* disable reset vector translation */ + __raw_writel(0, SBAR); } #ifdef CONFIG_CPU_IDLE diff --git a/arch/arm/mach-shmobile/sleep-sh7372.S b/arch/arm/mach-shmobile/sleep-sh7372.S index d37d3ca..dedf612 100644 --- a/arch/arm/mach-shmobile/sleep-sh7372.S +++ b/arch/arm/mach-shmobile/sleep-sh7372.S @@ -30,231 +30,13 @@ */ #include +#include +#include #include -#define SMFRAM 0xe6a70000 - - .align -kernel_flush: - .word v7_flush_dcache_all - - .align 3 -ENTRY(sh7372_cpu_suspend) - stmfd sp!, {r0-r12, lr} @ save registers on stack - - ldr r8, =SMFRAM - - mov r4, sp @ Store sp - mrs r5, spsr @ Store spsr - mov r6, lr @ Store lr - stmia r8!, {r4-r6} - - mrc p15, 0, r4, c1, c0, 2 @ Coprocessor access control register - mrc p15, 0, r5, c2, c0, 0 @ TTBR0 - mrc p15, 0, r6, c2, c0, 1 @ TTBR1 - mrc p15, 0, r7, c2, c0, 2 @ TTBCR - stmia r8!, {r4-r7} - - mrc p15, 0, r4, c3, c0, 0 @ Domain access Control Register - mrc p15, 0, r5, c10, c2, 0 @ PRRR - mrc p15, 0, r6, c10, c2, 1 @ NMRR - stmia r8!,{r4-r6} - - mrc p15, 0, r4, c13, c0, 1 @ Context ID - mrc p15, 0, r5, c13, c0, 2 @ User r/w thread and process ID - mrc p15, 0, r6, c12, c0, 0 @ Secure or NS vector base address - mrs r7, cpsr @ Store current cpsr - stmia r8!, {r4-r7} - - mrc p15, 0, r4, c1, c0, 0 @ save control register - stmia r8!, {r4} - - /* - * jump out to kernel flush routine - * - reuse that code is better - * - it executes in a cached space so is faster than refetch per-block - * - should be faster and will change with kernel - * - 'might' have to copy address, load and jump to it - * Flush all data from the L1 data cache before disabling - * SCTLR.C bit. - */ - ldr r1, kernel_flush - mov lr, pc - bx r1 - - /* - * Clear the SCTLR.C bit to prevent further data cache - * allocation. Clearing SCTLR.C would make all the data accesses - * strongly ordered and would not hit the cache. - */ - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #(1 << 2) @ Disable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - - /* - * Invalidate L1 data cache. Even though only invalidate is - * necessary exported flush API is used here. Doing clean - * on already clean cache would be almost NOP. - */ - ldr r1, kernel_flush - blx r1 - /* - * The kernel doesn't interwork: v7_flush_dcache_all in particluar will - * always return in Thumb state when CONFIG_THUMB2_KERNEL is enabled. - * This sequence switches back to ARM. Note that .align may insert a - * nop: bx pc needs to be word-aligned in order to work. - */ - THUMB( .thumb ) - THUMB( .align ) - THUMB( bx pc ) - THUMB( nop ) - .arm - - /* Data memory barrier and Data sync barrier */ - dsb - dmb - -/* - * =================================== - * == WFI instruction => Enter idle == - * =================================== - */ - wfi @ wait for interrupt - -/* - * =================================== - * == Resume path for non-OFF modes == - * =================================== - */ - mrc p15, 0, r0, c1, c0, 0 - tst r0, #(1 << 2) @ Check C bit enabled? - orreq r0, r0, #(1 << 2) @ Enable the C bit if cleared - mcreq p15, 0, r0, c1, c0, 0 - isb - -/* - * =================================== - * == Exit point from non-OFF modes == - * =================================== - */ - ldmfd sp!, {r0-r12, pc} @ restore regs and return - - .pool - .align 12 .text - .global sh7372_cpu_resume -sh7372_cpu_resume: - - mov r1, #0 - /* - * Invalidate all instruction caches to PoU - * and flush branch target cache - */ - mcr p15, 0, r1, c7, c5, 0 - - ldr r3, =SMFRAM - - ldmia r3!, {r4-r6} - mov sp, r4 @ Restore sp - msr spsr_cxsf, r5 @ Restore spsr - mov lr, r6 @ Restore lr - - ldmia r3!, {r4-r7} - mcr p15, 0, r4, c1, c0, 2 @ Coprocessor access Control Register - mcr p15, 0, r5, c2, c0, 0 @ TTBR0 - mcr p15, 0, r6, c2, c0, 1 @ TTBR1 - mcr p15, 0, r7, c2, c0, 2 @ TTBCR - - ldmia r3!,{r4-r6} - mcr p15, 0, r4, c3, c0, 0 @ Domain access Control Register - mcr p15, 0, r5, c10, c2, 0 @ PRRR - mcr p15, 0, r6, c10, c2, 1 @ NMRR - - ldmia r3!,{r4-r7} - mcr p15, 0, r4, c13, c0, 1 @ Context ID - mcr p15, 0, r5, c13, c0, 2 @ User r/w thread and process ID - mrc p15, 0, r6, c12, c0, 0 @ Secure or NS vector base address - msr cpsr, r7 @ store cpsr - - /* Starting to enable MMU here */ - mrc p15, 0, r7, c2, c0, 2 @ Read TTBRControl - /* Extract N (0:2) bits and decide whether to use TTBR0 or TTBR1 */ - and r7, #0x7 - cmp r7, #0x0 - beq usettbr0 -ttbr_error: - /* - * More work needs to be done to support N[0:2] value other than 0 - * So looping here so that the error can be detected - */ - b ttbr_error - - .align -cache_pred_disable_mask: - .word 0xFFFFE7FB -ttbrbit_mask: - .word 0xFFFFC000 -table_index_mask: - .word 0xFFF00000 -table_entry: - .word 0x00000C02 -usettbr0: - - mrc p15, 0, r2, c2, c0, 0 - ldr r5, ttbrbit_mask - and r2, r5 - mov r4, pc - ldr r5, table_index_mask - and r4, r5 @ r4 = 31 to 20 bits of pc - /* Extract the value to be written to table entry */ - ldr r6, table_entry - /* r6 has the value to be written to table entry */ - add r6, r6, r4 - /* Getting the address of table entry to modify */ - lsr r4, #18 - /* r2 has the location which needs to be modified */ - add r2, r4 - ldr r4, [r2] - str r6, [r2] /* modify the table entry */ - - mov r7, r6 - mov r5, r2 - mov r6, r4 - /* r5 = original page table address */ - /* r6 = original page table data */ - - mov r0, #0 - mcr p15, 0, r0, c7, c5, 4 @ Flush prefetch buffer - mcr p15, 0, r0, c7, c5, 6 @ Invalidate branch predictor array - mcr p15, 0, r0, c8, c5, 0 @ Invalidate instruction TLB - mcr p15, 0, r0, c8, c6, 0 @ Invalidate data TLB - - /* - * Restore control register. This enables the MMU. - * The caches and prediction are not enabled here, they - * will be enabled after restoring the MMU table entry. - */ - ldmia r3!, {r4} - stmia r3!, {r5} /* save original page table address */ - stmia r3!, {r6} /* save original page table data */ - stmia r3!, {r7} /* save modified page table data */ - - ldr r2, cache_pred_disable_mask - and r4, r2 - mcr p15, 0, r4, c1, c0, 0 - dsb - isb - - ldr r0, =restoremmu_on - bx r0 - -/* - * ============================== - * == Exit point from OFF mode == - * ============================== - */ -restoremmu_on: - - ldmfd sp!, {r0-r12, pc} @ restore regs and return + .global sh7372_resume_core_standby +sh7372_resume_core_standby: + ldr pc, 1f +1: .long cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET -- cgit v0.10.2 From cf33835c5fc528cacd4f98bc38f246022dad340d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sun, 25 Sep 2011 23:20:49 +0200 Subject: ARM: mach-shmobile: sh7372 A3SM support This patch adds sh7372 A3SM power domain support. The sh7372 A3SM hardware power domain contains the ARM Cortex-A8 CPU Core including L2 cache. This sleep mode can be seen as a one step deeper sleep mode from the already existing Core Standby mode. To wake up from A3SM sleep only a few wakeup sources are supported - so the regular INTC controller will not be able to help us unfortunately. The code in this patch will enter A3SM sleep via the regular Suspend-to-RAM interface in the case of only wakeups supported by A3SM are enabled. If unsupported wakeups are enabled then Core Standby will be used instead. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 7b6f6f9..c0cdbf9 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -35,7 +35,8 @@ extern void sh7372_add_standard_devices(void); extern void sh7372_clock_init(void); extern void sh7372_pinmux_init(void); extern void sh7372_pm_init(void); -extern void sh7372_resume_core_standby(void); +extern void sh7372_resume_core_standby_a3sm(void); +extern int sh7372_do_idle_a3sm(unsigned long unused); extern struct clk sh7372_extal1_clk; extern struct clk sh7372_extal2_clk; diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index aa7d352..444f42f 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include @@ -25,14 +27,48 @@ #include #include -#define SMFRAM 0xe6a70000 -#define SYSTBCR 0xe6150024 -#define SBAR 0xe6180020 -#define APARMBAREA 0xe6f10020 +/* DBG */ +#define DBGREG1 0xe6100020 +#define DBGREG9 0xe6100040 +/* CPGA */ +#define SYSTBCR 0xe6150024 +#define MSTPSR0 0xe6150030 +#define MSTPSR1 0xe6150038 +#define MSTPSR2 0xe6150040 +#define MSTPSR3 0xe6150048 +#define MSTPSR4 0xe615004c +#define PLLC01STPCR 0xe61500c8 + +/* SYSC */ #define SPDCR 0xe6180008 #define SWUCR 0xe6180014 +#define SBAR 0xe6180020 +#define WUPSMSK 0xe618002c +#define WUPSMSK2 0xe6180048 #define PSTR 0xe6180080 +#define WUPSFAC 0xe6180098 +#define IRQCR 0xe618022c +#define IRQCR2 0xe6180238 +#define IRQCR3 0xe6180244 +#define IRQCR4 0xe6180248 +#define PDNSEL 0xe6180254 + +/* INTC */ +#define ICR1A 0xe6900000 +#define ICR2A 0xe6900004 +#define ICR3A 0xe6900008 +#define ICR4A 0xe690000c +#define INTMSK00A 0xe6900040 +#define INTMSK10A 0xe6900044 +#define INTMSK20A 0xe6900048 +#define INTMSK30A 0xe690004c + +/* MFIS */ +#define SMFRAM 0xe6a70000 + +/* AP-System Core */ +#define APARMBAREA 0xe6f10020 #define PSTR_RETRIES 100 #define PSTR_DELAY_US 10 @@ -162,7 +198,7 @@ static int sh7372_do_idle_core_standby(unsigned long unused) static void sh7372_enter_core_standby(void) { /* set reset vector, translate 4k */ - __raw_writel(__pa(sh7372_resume_core_standby), SBAR); + __raw_writel(__pa(sh7372_resume_core_standby_a3sm), SBAR); __raw_writel(0, APARMBAREA); /* enter sleep mode with SYSTBCR to 0x10 */ @@ -174,7 +210,151 @@ static void sh7372_enter_core_standby(void) __raw_writel(0, SBAR); } +static void sh7372_enter_a3sm_common(int pllc0_on) +{ + /* set reset vector, translate 4k */ + __raw_writel(__pa(sh7372_resume_core_standby_a3sm), SBAR); + __raw_writel(0, APARMBAREA); + + if (pllc0_on) + __raw_writel(0, PLLC01STPCR); + else + __raw_writel(1 << 28, PLLC01STPCR); + + __raw_writel(0, PDNSEL); /* power-down A3SM only, not A4S */ + __raw_readl(WUPSFAC); /* read wakeup int. factor before sleep */ + cpu_suspend(0, sh7372_do_idle_a3sm); + __raw_readl(WUPSFAC); /* read wakeup int. factor after wakeup */ + + /* disable reset vector translation */ + __raw_writel(0, SBAR); +} + +static int sh7372_a3sm_valid(unsigned long *mskp, unsigned long *msk2p) +{ + unsigned long mstpsr0, mstpsr1, mstpsr2, mstpsr3, mstpsr4; + unsigned long msk, msk2; + + /* check active clocks to determine potential wakeup sources */ + + mstpsr0 = __raw_readl(MSTPSR0); + if ((mstpsr0 & 0x00000003) != 0x00000003) { + pr_debug("sh7372 mstpsr0 0x%08lx\n", mstpsr0); + return 0; + } + + mstpsr1 = __raw_readl(MSTPSR1); + if ((mstpsr1 & 0xff079b7f) != 0xff079b7f) { + pr_debug("sh7372 mstpsr1 0x%08lx\n", mstpsr1); + return 0; + } + + mstpsr2 = __raw_readl(MSTPSR2); + if ((mstpsr2 & 0x000741ff) != 0x000741ff) { + pr_debug("sh7372 mstpsr2 0x%08lx\n", mstpsr2); + return 0; + } + + mstpsr3 = __raw_readl(MSTPSR3); + if ((mstpsr3 & 0x1a60f010) != 0x1a60f010) { + pr_debug("sh7372 mstpsr3 0x%08lx\n", mstpsr3); + return 0; + } + + mstpsr4 = __raw_readl(MSTPSR4); + if ((mstpsr4 & 0x00008cf0) != 0x00008cf0) { + pr_debug("sh7372 mstpsr4 0x%08lx\n", mstpsr4); + return 0; + } + + msk = 0; + msk2 = 0; + + /* make bitmaps of limited number of wakeup sources */ + + if ((mstpsr2 & (1 << 23)) == 0) /* SPU2 */ + msk |= 1 << 31; + + if ((mstpsr2 & (1 << 12)) == 0) /* MFI_MFIM */ + msk |= 1 << 21; + + if ((mstpsr4 & (1 << 3)) == 0) /* KEYSC */ + msk |= 1 << 2; + + if ((mstpsr1 & (1 << 24)) == 0) /* CMT0 */ + msk |= 1 << 1; + + if ((mstpsr3 & (1 << 29)) == 0) /* CMT1 */ + msk |= 1 << 1; + + if ((mstpsr4 & (1 << 0)) == 0) /* CMT2 */ + msk |= 1 << 1; + + if ((mstpsr2 & (1 << 13)) == 0) /* MFI_MFIS */ + msk2 |= 1 << 17; + + *mskp = msk; + *msk2p = msk2; + + return 1; +} + +static void sh7372_icr_to_irqcr(unsigned long icr, u16 *irqcr1p, u16 *irqcr2p) +{ + u16 tmp, irqcr1, irqcr2; + int k; + + irqcr1 = 0; + irqcr2 = 0; + + /* convert INTCA ICR register layout to SYSC IRQCR+IRQCR2 */ + for (k = 0; k <= 7; k++) { + tmp = (icr >> ((7 - k) * 4)) & 0xf; + irqcr1 |= (tmp & 0x03) << (k * 2); + irqcr2 |= (tmp >> 2) << (k * 2); + } + + *irqcr1p = irqcr1; + *irqcr2p = irqcr2; +} + +static void sh7372_setup_a3sm(unsigned long msk, unsigned long msk2) +{ + u16 irqcrx_low, irqcrx_high, irqcry_low, irqcry_high; + unsigned long tmp; + + /* read IRQ0A -> IRQ15A mask */ + tmp = bitrev8(__raw_readb(INTMSK00A)); + tmp |= bitrev8(__raw_readb(INTMSK10A)) << 8; + + /* setup WUPSMSK from clocks and external IRQ mask */ + msk = (~msk & 0xc030000f) | (tmp << 4); + __raw_writel(msk, WUPSMSK); + + /* propage level/edge trigger for external IRQ 0->15 */ + sh7372_icr_to_irqcr(__raw_readl(ICR1A), &irqcrx_low, &irqcry_low); + sh7372_icr_to_irqcr(__raw_readl(ICR2A), &irqcrx_high, &irqcry_high); + __raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR); + __raw_writel((irqcry_high << 16) | irqcry_low, IRQCR2); + + /* read IRQ16A -> IRQ31A mask */ + tmp = bitrev8(__raw_readb(INTMSK20A)); + tmp |= bitrev8(__raw_readb(INTMSK30A)) << 8; + + /* setup WUPSMSK2 from clocks and external IRQ mask */ + msk2 = (~msk2 & 0x00030000) | tmp; + __raw_writel(msk2, WUPSMSK2); + + /* propage level/edge trigger for external IRQ 16->31 */ + sh7372_icr_to_irqcr(__raw_readl(ICR3A), &irqcrx_low, &irqcry_low); + sh7372_icr_to_irqcr(__raw_readl(ICR4A), &irqcrx_high, &irqcry_high); + __raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR3); + __raw_writel((irqcry_high << 16) | irqcry_low, IRQCR4); +} + + #ifdef CONFIG_CPU_IDLE + static void sh7372_cpuidle_setup(struct cpuidle_device *dev) { struct cpuidle_state *state; @@ -202,9 +382,25 @@ static void sh7372_cpuidle_init(void) {} #endif #ifdef CONFIG_SUSPEND + static int sh7372_enter_suspend(suspend_state_t suspend_state) { - sh7372_enter_core_standby(); + unsigned long msk, msk2; + + /* check active clocks to determine potential wakeup sources */ + if (sh7372_a3sm_valid(&msk, &msk2)) { + + /* convert INTC mask and sense to SYSC mask and sense */ + sh7372_setup_a3sm(msk, msk2); + + /* enter A3SM sleep with PLLC0 off */ + pr_debug("entering A3SM\n"); + sh7372_enter_a3sm_common(0); + } else { + /* default to Core Standby that supports all wakeup sources */ + pr_debug("entering Core Standby\n"); + sh7372_enter_core_standby(); + } return 0; } @@ -216,9 +412,6 @@ static void sh7372_suspend_init(void) static void sh7372_suspend_init(void) {} #endif -#define DBGREG1 0xe6100020 -#define DBGREG9 0xe6100040 - void __init sh7372_pm_init(void) { /* enable DBG hardware block to kick SYSC */ diff --git a/arch/arm/mach-shmobile/sleep-sh7372.S b/arch/arm/mach-shmobile/sleep-sh7372.S index dedf612..d365842 100644 --- a/arch/arm/mach-shmobile/sleep-sh7372.S +++ b/arch/arm/mach-shmobile/sleep-sh7372.S @@ -36,7 +36,58 @@ .align 12 .text - .global sh7372_resume_core_standby -sh7372_resume_core_standby: + .global sh7372_resume_core_standby_a3sm +sh7372_resume_core_standby_a3sm: ldr pc, 1f 1: .long cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET + + .global sh7372_do_idle_a3sm +sh7372_do_idle_a3sm: + /* + * Clear the SCTLR.C bit to prevent further data cache + * allocation. Clearing SCTLR.C would make all the data accesses + * strongly ordered and would not hit the cache. + */ + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #(1 << 2) @ Disable the C bit + mcr p15, 0, r0, c1, c0, 0 + isb + + /* disable L2 cache in the aux control register */ + mrc p15, 0, r10, c1, c0, 1 + bic r10, r10, #2 + mcr p15, 0, r10, c1, c0, 1 + + /* + * Invalidate data cache again. + */ + ldr r1, kernel_flush + blx r1 + /* + * The kernel doesn't interwork: v7_flush_dcache_all in particluar will + * always return in Thumb state when CONFIG_THUMB2_KERNEL is enabled. + * This sequence switches back to ARM. Note that .align may insert a + * nop: bx pc needs to be word-aligned in order to work. + */ + THUMB( .thumb ) + THUMB( .align ) + THUMB( bx pc ) + THUMB( nop ) + .arm + + /* Data memory barrier and Data sync barrier */ + dsb + dmb + +#define SPDCR 0xe6180008 +#define A3SM (1 << 12) + + /* A3SM power down */ + ldr r0, =SPDCR + ldr r1, =A3SM + str r1, [r0] +1: + b 1b + +kernel_flush: + .word v7_flush_dcache_all -- cgit v0.10.2 From a0089bd617adea27ebc352e1e0871649ab1dbaa6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sun, 25 Sep 2011 23:21:02 +0200 Subject: ARM: mach-shmobile: sh7372 sleep warning fixes Update the sh7372 sleep code to build parts of the code only when SUSPEND and/or CPU_IDLE are set. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 444f42f..8e0944f 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -189,6 +189,7 @@ struct sh7372_pm_domain sh7372_a3sg = { #endif /* CONFIG_PM */ +#if defined(CONFIG_SUSPEND) || defined(CONFIG_CPU_IDLE) static int sh7372_do_idle_core_standby(unsigned long unused) { cpu_do_idle(); /* WFI when SYSTBCR == 0x10 -> Core Standby */ @@ -209,7 +210,9 @@ static void sh7372_enter_core_standby(void) /* disable reset vector translation */ __raw_writel(0, SBAR); } +#endif +#ifdef CONFIG_SUSPEND static void sh7372_enter_a3sm_common(int pllc0_on) { /* set reset vector, translate 4k */ @@ -351,7 +354,7 @@ static void sh7372_setup_a3sm(unsigned long msk, unsigned long msk2) __raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR3); __raw_writel((irqcry_high << 16) | irqcry_low, IRQCR4); } - +#endif #ifdef CONFIG_CPU_IDLE diff --git a/arch/arm/mach-shmobile/sleep-sh7372.S b/arch/arm/mach-shmobile/sleep-sh7372.S index d365842..f3ab3c5 100644 --- a/arch/arm/mach-shmobile/sleep-sh7372.S +++ b/arch/arm/mach-shmobile/sleep-sh7372.S @@ -34,6 +34,7 @@ #include #include +#if defined(CONFIG_SUSPEND) || defined(CONFIG_CPU_IDLE) .align 12 .text .global sh7372_resume_core_standby_a3sm @@ -91,3 +92,4 @@ sh7372_do_idle_a3sm: kernel_flush: .word v7_flush_dcache_all +#endif -- cgit v0.10.2 From cd0ea672f58d5cfdea271c45cec0c897f2b792aa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Sep 2011 20:22:02 +0200 Subject: PM / Domains: Split device PM domain data into base and need_restore The struct pm_domain_data data type is defined in such a way that adding new fields specific to the generic PM domains code will require include/linux/pm.h to be modified. As a result, data types used only by the generic PM domains code will be defined in two headers, although they all should be defined in pm_domain.h and pm.h will need to include more headers, which won't be very nice. For this reason change the definition of struct pm_subsys_data so that its domain_data member is a pointer, which will allow struct pm_domain_data to be subclassed by various PM domains implementations. Remove the need_restore member from struct pm_domain_data and make the generic PM domains code subclass it by adding the need_restore member to the new data type. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c2468a7..22fe029 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -188,11 +188,12 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { + struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; struct device_driver *drv = dev->driver; int ret = 0; - if (pdd->need_restore) + if (gpd_data->need_restore) return 0; mutex_unlock(&genpd->lock); @@ -210,7 +211,7 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, mutex_lock(&genpd->lock); if (!ret) - pdd->need_restore = true; + gpd_data->need_restore = true; return ret; } @@ -224,10 +225,11 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, struct generic_pm_domain *genpd) __releases(&genpd->lock) __acquires(&genpd->lock) { + struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; struct device_driver *drv = dev->driver; - if (!pdd->need_restore) + if (!gpd_data->need_restore) return; mutex_unlock(&genpd->lock); @@ -244,7 +246,7 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, mutex_lock(&genpd->lock); - pdd->need_restore = false; + gpd_data->need_restore = false; } /** @@ -493,7 +495,7 @@ static int pm_genpd_runtime_resume(struct device *dev) mutex_lock(&genpd->lock); } finish_wait(&genpd->status_wait_queue, &wait); - __pm_genpd_restore_device(&dev->power.subsys_data->domain_data, genpd); + __pm_genpd_restore_device(dev->power.subsys_data->domain_data, genpd); genpd->resume_count--; genpd_set_active(genpd); wake_up_all(&genpd->status_wait_queue); @@ -1080,6 +1082,7 @@ static void pm_genpd_complete(struct device *dev) */ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { + struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; int ret = 0; @@ -1106,14 +1109,20 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) goto out; } + gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); + if (!gpd_data) { + ret = -ENOMEM; + goto out; + } + genpd->device_count++; dev->pm_domain = &genpd->domain; dev_pm_get_subsys_data(dev); - pdd = &dev->power.subsys_data->domain_data; - pdd->dev = dev; - pdd->need_restore = false; - list_add_tail(&pdd->list_node, &genpd->dev_list); + dev->power.subsys_data->domain_data = &gpd_data->base; + gpd_data->base.dev = dev; + gpd_data->need_restore = false; + list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); out: genpd_release_lock(genpd); @@ -1152,6 +1161,7 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, pdd->dev = NULL; dev_pm_put_subsys_data(dev); dev->pm_domain = NULL; + kfree(to_gpd_data(pdd)); genpd->device_count--; diff --git a/include/linux/pm.h b/include/linux/pm.h index ed10f24..f256824 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -424,7 +424,6 @@ struct wakeup_source; struct pm_domain_data { struct list_head list_node; struct device *dev; - bool need_restore; }; struct pm_subsys_data { @@ -434,7 +433,7 @@ struct pm_subsys_data { struct list_head clock_list; #endif #ifdef CONFIG_PM_GENERIC_DOMAINS - struct pm_domain_data domain_data; + struct pm_domain_data *domain_data; #endif }; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 2538d90..65633e5 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -62,6 +62,16 @@ struct gpd_link { struct list_head slave_node; }; +struct generic_pm_domain_data { + struct pm_domain_data base; + bool need_restore; +}; + +static inline struct generic_pm_domain_data *to_gpd_data(struct pm_domain_data *pdd) +{ + return container_of(pdd, struct generic_pm_domain_data, base); +} + #ifdef CONFIG_PM_GENERIC_DOMAINS extern int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); -- cgit v0.10.2 From 30b1a7a32ca48fd8758f8ca44d60deebc0aa3d72 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 27 Sep 2011 21:54:22 +0200 Subject: USB: Add wakeup info to debugging messages This patch (as1487) improves the usbcore debugging output for port suspend and bus suspend, by stating whether or not remote wakeup is enabled. Signed-off-by: Alan Stern Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index da582f4..877e0e2 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1959,8 +1959,9 @@ int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg) int status; int old_state = hcd->state; - dev_dbg(&rhdev->dev, "bus %s%s\n", - (PMSG_IS_AUTO(msg) ? "auto-" : ""), "suspend"); + dev_dbg(&rhdev->dev, "bus %ssuspend, wakeup %d\n", + (PMSG_IS_AUTO(msg) ? "auto-" : ""), + rhdev->do_remote_wakeup); if (HCD_DEAD(hcd)) { dev_dbg(&rhdev->dev, "skipped %s of dead bus\n", "suspend"); return 0; @@ -1995,8 +1996,8 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg) int status; int old_state = hcd->state; - dev_dbg(&rhdev->dev, "usb %s%s\n", - (PMSG_IS_AUTO(msg) ? "auto-" : ""), "resume"); + dev_dbg(&rhdev->dev, "usb %sresume\n", + (PMSG_IS_AUTO(msg) ? "auto-" : "")); if (HCD_DEAD(hcd)) { dev_dbg(&rhdev->dev, "skipped %s of dead bus\n", "resume"); return 0; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ee50e0b..13bc832 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2324,8 +2324,6 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) int port1 = udev->portnum; int status; - // dev_dbg(hub->intfdev, "suspend port %d\n", port1); - /* enable remote wakeup when appropriate; this lets the device * wake up the upstream hub (including maybe the root hub). * @@ -2371,8 +2369,9 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) status = 0; } else { /* device has up to 10 msec to fully suspend */ - dev_dbg(&udev->dev, "usb %ssuspend\n", - (PMSG_IS_AUTO(msg) ? "auto-" : "")); + dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n", + (PMSG_IS_AUTO(msg) ? "auto-" : ""), + udev->do_remote_wakeup); usb_set_device_state(udev, USB_STATE_SUSPENDED); msleep(10); } -- cgit v0.10.2 From ad3c36a534bc7b945d7bffdda1c62e13bf93489a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 27 Sep 2011 21:54:52 +0200 Subject: PM / Runtime: Don't run callbacks under lock for power.irq_safe set The rpm_suspend() and rpm_resume() routines execute subsystem or PM domain callbacks under power.lock if power.irq_safe is set for the given device. This is inconsistent with that rpm_idle() does after commit 02b2677 (PM / Runtime: Allow _put_sync() from interrupts-disabled context) and is problematic for subsystems and PM domains wanting to use power.lock for synchronization in their runtime PM callbacks. This change requires the code checking if the device's runtime PM status is RPM_SUSPENDING or RPM_RESUMING to be modified too, to take the power.irq_safe set case into account (that code wasn't reachable before with power.irq_safe set, because it's executed with the device's power.lock held). Signed-off-by: Rafael J. Wysocki Reviewed-by: Ming Lei Reviewed-by: Kevin Hilman diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 04e18ab..aecb2a8 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -155,6 +155,31 @@ static int rpm_check_suspend_allowed(struct device *dev) } /** + * __rpm_callback - Run a given runtime PM callback for a given device. + * @cb: Runtime PM callback to run. + * @dev: Device to run the callback for. + */ +static int __rpm_callback(int (*cb)(struct device *), struct device *dev) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + int retval; + + if (dev->power.irq_safe) + spin_unlock(&dev->power.lock); + else + spin_unlock_irq(&dev->power.lock); + + retval = cb(dev); + + if (dev->power.irq_safe) + spin_lock(&dev->power.lock); + else + spin_lock_irq(&dev->power.lock); + + return retval; +} + +/** * rpm_idle - Notify device bus type if the device can be suspended. * @dev: Device to notify the bus type about. * @rpmflags: Flag bits. @@ -225,19 +250,8 @@ static int rpm_idle(struct device *dev, int rpmflags) else callback = NULL; - if (callback) { - if (dev->power.irq_safe) - spin_unlock(&dev->power.lock); - else - spin_unlock_irq(&dev->power.lock); - - callback(dev); - - if (dev->power.irq_safe) - spin_lock(&dev->power.lock); - else - spin_lock_irq(&dev->power.lock); - } + if (callback) + __rpm_callback(callback, dev); dev->power.idle_notification = false; wake_up_all(&dev->power.wait_queue); @@ -252,22 +266,14 @@ static int rpm_idle(struct device *dev, int rpmflags) * @dev: Device to run the callback for. */ static int rpm_callback(int (*cb)(struct device *), struct device *dev) - __releases(&dev->power.lock) __acquires(&dev->power.lock) { int retval; if (!cb) return -ENOSYS; - if (dev->power.irq_safe) { - retval = cb(dev); - } else { - spin_unlock_irq(&dev->power.lock); - - retval = cb(dev); + retval = __rpm_callback(cb, dev); - spin_lock_irq(&dev->power.lock); - } dev->power.runtime_error = retval; return retval != -EACCES ? retval : -EIO; } @@ -347,6 +353,15 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } + if (dev->power.irq_safe) { + spin_unlock(&dev->power.lock); + + cpu_relax(); + + spin_lock(&dev->power.lock); + goto repeat; + } + /* Wait for the other suspend running in parallel with us. */ for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, @@ -496,6 +511,15 @@ static int rpm_resume(struct device *dev, int rpmflags) goto out; } + if (dev->power.irq_safe) { + spin_unlock(&dev->power.lock); + + cpu_relax(); + + spin_lock(&dev->power.lock); + goto repeat; + } + /* Wait for the operation carried out in parallel with us. */ for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, -- cgit v0.10.2 From 53b615ccca567ada1931eb04ad0614ac150c14a3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 Sep 2011 22:53:27 +0200 Subject: PM / Runtime: Introduce trace points for tracing rpm_* functions This patch introduces 3 trace points to prepare for tracing rpm_idle/rpm_suspend/rpm_resume functions, so we can use these trace points to replace the current dev_dbg(). Signed-off-by: Ming Lei Acked-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki diff --git a/include/trace/events/rpm.h b/include/trace/events/rpm.h new file mode 100644 index 0000000..d62c558 --- /dev/null +++ b/include/trace/events/rpm.h @@ -0,0 +1,99 @@ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rpm + +#if !defined(_TRACE_RUNTIME_POWER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RUNTIME_POWER_H + +#include +#include +#include + +/* + * The rpm_internal events are used for tracing some important + * runtime pm internal functions. + */ +DECLARE_EVENT_CLASS(rpm_internal, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags), + + TP_STRUCT__entry( + __string( name, dev_name(dev) ) + __field( int, flags ) + __field( int , usage_count ) + __field( int , disable_depth ) + __field( int , runtime_auto ) + __field( int , request_pending ) + __field( int , irq_safe ) + __field( int , child_count ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->flags = flags; + __entry->usage_count = atomic_read( + &dev->power.usage_count); + __entry->disable_depth = dev->power.disable_depth; + __entry->runtime_auto = dev->power.runtime_auto; + __entry->request_pending = dev->power.request_pending; + __entry->irq_safe = dev->power.irq_safe; + __entry->child_count = atomic_read( + &dev->power.child_count); + ), + + TP_printk("%s flags-%x cnt-%-2d dep-%-2d auto-%-1d p-%-1d" + " irq-%-1d child-%d", + __get_str(name), __entry->flags, + __entry->usage_count, + __entry->disable_depth, + __entry->runtime_auto, + __entry->request_pending, + __entry->irq_safe, + __entry->child_count + ) +); +DEFINE_EVENT(rpm_internal, rpm_suspend, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags) +); +DEFINE_EVENT(rpm_internal, rpm_resume, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags) +); +DEFINE_EVENT(rpm_internal, rpm_idle, + + TP_PROTO(struct device *dev, int flags), + + TP_ARGS(dev, flags) +); + +TRACE_EVENT(rpm_return_int, + TP_PROTO(struct device *dev, unsigned long ip, int ret), + TP_ARGS(dev, ip, ret), + + TP_STRUCT__entry( + __string( name, dev_name(dev)) + __field( unsigned long, ip ) + __field( int, ret ) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->ip = ip; + __entry->ret = ret; + ), + + TP_printk("%pS:%s ret=%d", (void *)__entry->ip, __get_str(name), + __entry->ret) +); + +#endif /* _TRACE_RUNTIME_POWER_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 761c510..56bdab5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,7 @@ endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o +obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c new file mode 100644 index 0000000..4b3b5ea --- /dev/null +++ b/kernel/trace/rpm-traces.c @@ -0,0 +1,20 @@ +/* + * Power trace points + * + * Copyright (C) 2009 Ming Lei + */ + +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int); +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend); +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume); -- cgit v0.10.2 From c3dc2f14622a06488f11452b6efd1e02c5a8548b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 Sep 2011 22:54:41 +0200 Subject: PM / Runtime: Replace dev_dbg() with trace_rpm_*() This patch replaces dev_dbg with trace_rpm_* inside the three important functions: rpm_idle rpm_suspend rpm_resume Trace points have the below advantages compared with dev_dbg: - trace points include much runtime information(such as running cpu, current task, ...) - most of linux distributions may disable "verbose debug" driver debug compile switch, so it is very difficult to report/debug runtime pm related problems from distribution users without this kind of debug information. - for upstream kernel users, enableing the debug switch will produce many useless "rpm_resume" output, and it is very noise. - dev_dbg inside rpm_suspend/rpm_resume may have some effects on runtime pm behaviour of console devicer Signed-off-by: Ming Lei Acked-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index aecb2a8..7a6fb5e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -9,6 +9,7 @@ #include #include +#include #include "power.h" static int rpm_resume(struct device *dev, int rpmflags); @@ -196,6 +197,7 @@ static int rpm_idle(struct device *dev, int rpmflags) int (*callback)(struct device *); int retval; + trace_rpm_idle(dev, rpmflags); retval = rpm_check_suspend_allowed(dev); if (retval < 0) ; /* Conditions are wrong. */ @@ -257,6 +259,7 @@ static int rpm_idle(struct device *dev, int rpmflags) wake_up_all(&dev->power.wait_queue); out: + trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; } @@ -301,7 +304,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) struct device *parent = NULL; int retval; - dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); + trace_rpm_suspend(dev, rpmflags); repeat: retval = rpm_check_suspend_allowed(dev); @@ -445,7 +448,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) } out: - dev_dbg(dev, "%s returns %d\n", __func__, retval); + trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; } @@ -474,7 +477,7 @@ static int rpm_resume(struct device *dev, int rpmflags) struct device *parent = NULL; int retval = 0; - dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); + trace_rpm_resume(dev, rpmflags); repeat: if (dev->power.runtime_error) @@ -639,7 +642,7 @@ static int rpm_resume(struct device *dev, int rpmflags) spin_lock_irq(&dev->power.lock); } - dev_dbg(dev, "%s returns %d\n", __func__, retval); + trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; } -- cgit v0.10.2 From 2a5306cc5f383b0e7414c75e458111afd4a563a4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 29 Sep 2011 22:07:23 +0200 Subject: PM / Tracing: build rpm-traces.c only if CONFIG_PM_RUNTIME is set Do not build kernel/trace/rpm-traces.c if CONFIG_PM_RUNTIME is not set, which avoids a build failure. [rjw: Added the changelog and modified the subject slightly.] Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 56bdab5..f49405f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,7 +53,9 @@ endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o +ifeq ($(CONFIG_PM_RUNTIME),y) obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o +endif ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif -- cgit v0.10.2 From 03ca370fbf7b76d6d002380dbdc2cdc2319f9c80 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Fri, 30 Sep 2011 22:35:12 +0200 Subject: PM / OPP: Add OPP availability change notifier. The patch enables to register notifier_block for an OPP-device in order to get notified for any changes in the availability of OPPs of the device. For example, if a new OPP is inserted or enable/disable status of an OPP is changed, the notifier is executed. This enables the usage of opp_add, opp_enable, and opp_disable to directly take effect with any connected entities such as cpufreq or devfreq. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Reviewed-by: Mike Turquette Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index b23de18..434a6c0 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -73,6 +73,7 @@ struct opp { * RCU usage: nodes are not modified in the list of device_opp, * however addition is possible and is secured by dev_opp_list_lock * @dev: device pointer + * @head: notifier head to notify the OPP availability changes. * @opp_list: list of opps * * This is an internal data structure maintaining the link to opps attached to @@ -83,6 +84,7 @@ struct device_opp { struct list_head node; struct device *dev; + struct srcu_notifier_head head; struct list_head opp_list; }; @@ -404,6 +406,7 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) } dev_opp->dev = dev; + srcu_init_notifier_head(&dev_opp->head); INIT_LIST_HEAD(&dev_opp->opp_list); /* Secure the device list modification */ @@ -428,6 +431,11 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) list_add_rcu(&new_opp->node, head); mutex_unlock(&dev_opp_list_lock); + /* + * Notify the changes in the availability of the operable + * frequency/voltage list. + */ + srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp); return 0; } @@ -504,6 +512,14 @@ static int opp_set_availability(struct device *dev, unsigned long freq, mutex_unlock(&dev_opp_list_lock); synchronize_rcu(); + /* Notify the change of the OPP availability */ + if (availability_req) + srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE, + new_opp); + else + srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE, + new_opp); + /* clean up old opp */ new_opp = opp; goto out; @@ -643,3 +659,17 @@ void opp_free_cpufreq_table(struct device *dev, *table = NULL; } #endif /* CONFIG_CPU_FREQ */ + +/** + * opp_get_notifier() - find notifier_head of the device with opp + * @dev: device pointer used to lookup device OPPs. + */ +struct srcu_notifier_head *opp_get_notifier(struct device *dev) +{ + struct device_opp *dev_opp = find_device_opp(dev); + + if (IS_ERR(dev_opp)) + return ERR_PTR(PTR_ERR(dev_opp)); /* matching type */ + + return &dev_opp->head; +} diff --git a/include/linux/opp.h b/include/linux/opp.h index 7020e97..87a9208 100644 --- a/include/linux/opp.h +++ b/include/linux/opp.h @@ -16,9 +16,14 @@ #include #include +#include struct opp; +enum opp_event { + OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, +}; + #if defined(CONFIG_PM_OPP) unsigned long opp_get_voltage(struct opp *opp); @@ -40,6 +45,8 @@ int opp_enable(struct device *dev, unsigned long freq); int opp_disable(struct device *dev, unsigned long freq); +struct srcu_notifier_head *opp_get_notifier(struct device *dev); + #else static inline unsigned long opp_get_voltage(struct opp *opp) { @@ -89,6 +96,11 @@ static inline int opp_disable(struct device *dev, unsigned long freq) { return 0; } + +struct srcu_notifier_head *opp_get_notifier(struct device *dev) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_PM */ #if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) -- cgit v0.10.2 From a3c98b8b2ede1f4230f49f9af7135cd902e71e83 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Sun, 2 Oct 2011 00:19:15 +0200 Subject: PM: Introduce devfreq: generic DVFS framework with device-specific OPPs With OPPs, a device may have multiple operable frequency and voltage sets. However, there can be multiple possible operable sets and a system will need to choose one from them. In order to reduce the power consumption (by reducing frequency and voltage) without affecting the performance too much, a Dynamic Voltage and Frequency Scaling (DVFS) scheme may be used. This patch introduces the DVFS capability to non-CPU devices with OPPs. DVFS is a techique whereby the frequency and supplied voltage of a device is adjusted on-the-fly. DVFS usually sets the frequency as low as possible with given conditions (such as QoS assurance) and adjusts voltage according to the chosen frequency in order to reduce power consumption and heat dissipation. The generic DVFS for devices, devfreq, may appear quite similar with /drivers/cpufreq. However, cpufreq does not allow to have multiple devices registered and is not suitable to have multiple heterogenous devices with different (but simple) governors. Normally, DVFS mechanism controls frequency based on the demand for the device, and then, chooses voltage based on the chosen frequency. devfreq also controls the frequency based on the governor's frequency recommendation and let OPP pick up the pair of frequency and voltage based on the recommended frequency. Then, the chosen OPP is passed to device driver's "target" callback. When PM QoS is going to be used with the devfreq device, the device driver should enable OPPs that are appropriate with the current PM QoS requests. In order to do so, the device driver may call opp_enable and opp_disable at the notifier callback of PM QoS so that PM QoS's update_target() call enables the appropriate OPPs. Note that at least one of OPPs should be enabled at any time; be careful when there is a transition. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Reviewed-by: Mike Turquette Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/drivers/Kconfig b/drivers/Kconfig index 95b9e7e..a1efd75 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -130,4 +130,6 @@ source "drivers/iommu/Kconfig" source "drivers/virt/Kconfig" +source "drivers/devfreq/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 7fa433a..97c957b5 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -127,3 +127,5 @@ obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ # Virtualization drivers obj-$(CONFIG_VIRT_DRIVERS) += virt/ + +obj-$(CONFIG_PM_DEVFREQ) += devfreq/ diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig new file mode 100644 index 0000000..1fb42de --- /dev/null +++ b/drivers/devfreq/Kconfig @@ -0,0 +1,39 @@ +config ARCH_HAS_DEVFREQ + bool + depends on ARCH_HAS_OPP + help + Denotes that the architecture supports DEVFREQ. If the architecture + supports multiple OPP entries per device and the frequency of the + devices with OPPs may be altered dynamically, the architecture + supports DEVFREQ. + +menuconfig PM_DEVFREQ + bool "Generic Dynamic Voltage and Frequency Scaling (DVFS) support" + depends on PM_OPP && ARCH_HAS_DEVFREQ + help + With OPP support, a device may have a list of frequencies and + voltages available. DEVFREQ, a generic DVFS framework can be + registered for a device with OPP support in order to let the + governor provided to DEVFREQ choose an operating frequency + based on the OPP's list and the policy given with DEVFREQ. + + Each device may have its own governor and policy. DEVFREQ can + reevaluate the device state periodically and/or based on the + OPP list changes (each frequency/voltage pair in OPP may be + disabled or enabled). + + Like some CPUs with CPUFREQ, a device may have multiple clocks. + However, because the clock frequencies of a single device are + determined by the single device's state, an instance of DEVFREQ + is attached to a single device and returns a "representative" + clock frequency from the OPP of the device, which is also attached + to a device by 1-to-1. The device registering DEVFREQ takes the + responsiblity to "interpret" the frequency listed in OPP and + to set its every clock accordingly with the "target" callback + given to DEVFREQ. + +if PM_DEVFREQ + +comment "DEVFREQ Drivers" + +endif # PM_DEVFREQ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile new file mode 100644 index 0000000..168934a --- /dev/null +++ b/drivers/devfreq/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PM_DEVFREQ) += devfreq.o diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c new file mode 100644 index 0000000..f3100b1 --- /dev/null +++ b/drivers/devfreq/devfreq.c @@ -0,0 +1,532 @@ +/* + * devfreq: Generic Dynamic Voltage and Frequency Scaling (DVFS) Framework + * for Non-CPU Devices. + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "governor.h" + +struct class *devfreq_class; + +/* + * devfreq_work periodically monitors every registered device. + * The minimum polling interval is one jiffy. The polling interval is + * determined by the minimum polling period among all polling devfreq + * devices. The resolution of polling interval is one jiffy. + */ +static bool polling; +static struct workqueue_struct *devfreq_wq; +static struct delayed_work devfreq_work; + +/* wait removing if this is to be removed */ +static struct devfreq *wait_remove_device; + +/* The list of all device-devfreq */ +static LIST_HEAD(devfreq_list); +static DEFINE_MUTEX(devfreq_list_lock); + +/** + * find_device_devfreq() - find devfreq struct using device pointer + * @dev: device pointer used to lookup device devfreq. + * + * Search the list of device devfreqs and return the matched device's + * devfreq info. devfreq_list_lock should be held by the caller. + */ +static struct devfreq *find_device_devfreq(struct device *dev) +{ + struct devfreq *tmp_devfreq; + + if (unlikely(IS_ERR_OR_NULL(dev))) { + pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); + return ERR_PTR(-EINVAL); + } + WARN(!mutex_is_locked(&devfreq_list_lock), + "devfreq_list_lock must be locked."); + + list_for_each_entry(tmp_devfreq, &devfreq_list, node) { + if (tmp_devfreq->dev.parent == dev) + return tmp_devfreq; + } + + return ERR_PTR(-ENODEV); +} + +/** + * update_devfreq() - Reevaluate the device and configure frequency. + * @devfreq: the devfreq instance. + * + * Note: Lock devfreq->lock before calling update_devfreq + * This function is exported for governors. + */ +int update_devfreq(struct devfreq *devfreq) +{ + unsigned long freq; + int err = 0; + + if (!mutex_is_locked(&devfreq->lock)) { + WARN(true, "devfreq->lock must be locked by the caller.\n"); + return -EINVAL; + } + + /* Reevaluate the proper frequency */ + err = devfreq->governor->get_target_freq(devfreq, &freq); + if (err) + return err; + + err = devfreq->profile->target(devfreq->dev.parent, &freq); + if (err) + return err; + + devfreq->previous_freq = freq; + return err; +} + +/** + * devfreq_notifier_call() - Notify that the device frequency requirements + * has been changed out of devfreq framework. + * @nb the notifier_block (supposed to be devfreq->nb) + * @type not used + * @devp not used + * + * Called by a notifier that uses devfreq->nb. + */ +static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type, + void *devp) +{ + struct devfreq *devfreq = container_of(nb, struct devfreq, nb); + int ret; + + mutex_lock(&devfreq->lock); + ret = update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); + + return ret; +} + +/** + * _remove_devfreq() - Remove devfreq from the device. + * @devfreq: the devfreq struct + * @skip: skip calling device_unregister(). + * + * Note that the caller should lock devfreq->lock before calling + * this. _remove_devfreq() will unlock it and free devfreq + * internally. devfreq_list_lock should be locked by the caller + * as well (not relased at return) + * + * Lock usage: + * devfreq->lock: locked before call. + * unlocked at return (and freed) + * devfreq_list_lock: locked before call. + * kept locked at return. + * if devfreq is centrally polled. + * + * Freed memory: + * devfreq + */ +static void _remove_devfreq(struct devfreq *devfreq, bool skip) +{ + if (!mutex_is_locked(&devfreq->lock)) { + WARN(true, "devfreq->lock must be locked by the caller.\n"); + return; + } + if (!devfreq->governor->no_central_polling && + !mutex_is_locked(&devfreq_list_lock)) { + WARN(true, "devfreq_list_lock must be locked by the caller.\n"); + return; + } + + if (devfreq->being_removed) + return; + + devfreq->being_removed = true; + + if (devfreq->profile->exit) + devfreq->profile->exit(devfreq->dev.parent); + + if (devfreq->governor->exit) + devfreq->governor->exit(devfreq); + + if (!skip && get_device(&devfreq->dev)) { + device_unregister(&devfreq->dev); + put_device(&devfreq->dev); + } + + if (!devfreq->governor->no_central_polling) + list_del(&devfreq->node); + + mutex_unlock(&devfreq->lock); + mutex_destroy(&devfreq->lock); + + kfree(devfreq); +} + +/** + * devfreq_dev_release() - Callback for struct device to release the device. + * @dev: the devfreq device + * + * This calls _remove_devfreq() if _remove_devfreq() is not called. + * Note that devfreq_dev_release() could be called by _remove_devfreq() as + * well as by others unregistering the device. + */ +static void devfreq_dev_release(struct device *dev) +{ + struct devfreq *devfreq = to_devfreq(dev); + bool central_polling = !devfreq->governor->no_central_polling; + + /* + * If devfreq_dev_release() was called by device_unregister() of + * _remove_devfreq(), we cannot mutex_lock(&devfreq->lock) and + * being_removed is already set. This also partially checks the case + * where devfreq_dev_release() is called from a thread other than + * the one called _remove_devfreq(); however, this case is + * dealt completely with another following being_removed check. + * + * Because being_removed is never being + * unset, we do not need to worry about race conditions on + * being_removed. + */ + if (devfreq->being_removed) + return; + + if (central_polling) + mutex_lock(&devfreq_list_lock); + + mutex_lock(&devfreq->lock); + + /* + * Check being_removed flag again for the case where + * devfreq_dev_release() was called in a thread other than the one + * possibly called _remove_devfreq(). + */ + if (devfreq->being_removed) { + mutex_unlock(&devfreq->lock); + goto out; + } + + /* devfreq->lock is unlocked and removed in _removed_devfreq() */ + _remove_devfreq(devfreq, true); + +out: + if (central_polling) + mutex_unlock(&devfreq_list_lock); +} + +/** + * devfreq_monitor() - Periodically poll devfreq objects. + * @work: the work struct used to run devfreq_monitor periodically. + * + */ +static void devfreq_monitor(struct work_struct *work) +{ + static unsigned long last_polled_at; + struct devfreq *devfreq, *tmp; + int error; + unsigned long jiffies_passed; + unsigned long next_jiffies = ULONG_MAX, now = jiffies; + struct device *dev; + + /* Initially last_polled_at = 0, polling every device at bootup */ + jiffies_passed = now - last_polled_at; + last_polled_at = now; + if (jiffies_passed == 0) + jiffies_passed = 1; + + mutex_lock(&devfreq_list_lock); + list_for_each_entry_safe(devfreq, tmp, &devfreq_list, node) { + mutex_lock(&devfreq->lock); + dev = devfreq->dev.parent; + + /* Do not remove tmp for a while */ + wait_remove_device = tmp; + + if (devfreq->governor->no_central_polling || + devfreq->next_polling == 0) { + mutex_unlock(&devfreq->lock); + continue; + } + mutex_unlock(&devfreq_list_lock); + + /* + * Reduce more next_polling if devfreq_wq took an extra + * delay. (i.e., CPU has been idled.) + */ + if (devfreq->next_polling <= jiffies_passed) { + error = update_devfreq(devfreq); + + /* Remove a devfreq with an error. */ + if (error && error != -EAGAIN) { + + dev_err(dev, "Due to update_devfreq error(%d), devfreq(%s) is removed from the device\n", + error, devfreq->governor->name); + + /* + * Unlock devfreq before locking the list + * in order to avoid deadlock with + * find_device_devfreq or others + */ + mutex_unlock(&devfreq->lock); + mutex_lock(&devfreq_list_lock); + /* Check if devfreq is already removed */ + if (IS_ERR(find_device_devfreq(dev))) + continue; + mutex_lock(&devfreq->lock); + /* This unlocks devfreq->lock and free it */ + _remove_devfreq(devfreq, false); + continue; + } + devfreq->next_polling = devfreq->polling_jiffies; + } else { + devfreq->next_polling -= jiffies_passed; + } + + if (devfreq->next_polling) + next_jiffies = (next_jiffies > devfreq->next_polling) ? + devfreq->next_polling : next_jiffies; + + mutex_unlock(&devfreq->lock); + mutex_lock(&devfreq_list_lock); + } + wait_remove_device = NULL; + mutex_unlock(&devfreq_list_lock); + + if (next_jiffies > 0 && next_jiffies < ULONG_MAX) { + polling = true; + queue_delayed_work(devfreq_wq, &devfreq_work, next_jiffies); + } else { + polling = false; + } +} + +/** + * devfreq_add_device() - Add devfreq feature to the device + * @dev: the device to add devfreq feature. + * @profile: device-specific profile to run devfreq. + * @governor: the policy to choose frequency. + * @data: private data for the governor. The devfreq framework does not + * touch this value. + */ +struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + const struct devfreq_governor *governor, + void *data) +{ + struct devfreq *devfreq; + int err = 0; + + if (!dev || !profile || !governor) { + dev_err(dev, "%s: Invalid parameters.\n", __func__); + return ERR_PTR(-EINVAL); + } + + + if (!governor->no_central_polling) { + mutex_lock(&devfreq_list_lock); + devfreq = find_device_devfreq(dev); + mutex_unlock(&devfreq_list_lock); + if (!IS_ERR(devfreq)) { + dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__); + err = -EINVAL; + goto out; + } + } + + devfreq = kzalloc(sizeof(struct devfreq), GFP_KERNEL); + if (!devfreq) { + dev_err(dev, "%s: Unable to create devfreq for the device\n", + __func__); + err = -ENOMEM; + goto out; + } + + mutex_init(&devfreq->lock); + mutex_lock(&devfreq->lock); + devfreq->dev.parent = dev; + devfreq->dev.class = devfreq_class; + devfreq->dev.release = devfreq_dev_release; + devfreq->profile = profile; + devfreq->governor = governor; + devfreq->previous_freq = profile->initial_freq; + devfreq->data = data; + devfreq->next_polling = devfreq->polling_jiffies + = msecs_to_jiffies(devfreq->profile->polling_ms); + devfreq->nb.notifier_call = devfreq_notifier_call; + + dev_set_name(&devfreq->dev, dev_name(dev)); + err = device_register(&devfreq->dev); + if (err) { + put_device(&devfreq->dev); + goto err_dev; + } + + if (governor->init) + err = governor->init(devfreq); + if (err) + goto err_init; + + mutex_unlock(&devfreq->lock); + + if (governor->no_central_polling) + goto out; + + mutex_lock(&devfreq_list_lock); + + list_add(&devfreq->node, &devfreq_list); + + if (devfreq_wq && devfreq->next_polling && !polling) { + polling = true; + queue_delayed_work(devfreq_wq, &devfreq_work, + devfreq->next_polling); + } + mutex_unlock(&devfreq_list_lock); + goto out; +err_init: + device_unregister(&devfreq->dev); +err_dev: + mutex_unlock(&devfreq->lock); + kfree(devfreq); +out: + if (err) + return ERR_PTR(err); + else + return devfreq; +} + +/** + * devfreq_remove_device() - Remove devfreq feature from a device. + * @devfreq the devfreq instance to be removed + */ +int devfreq_remove_device(struct devfreq *devfreq) +{ + if (!devfreq) + return -EINVAL; + + if (!devfreq->governor->no_central_polling) { + mutex_lock(&devfreq_list_lock); + while (wait_remove_device == devfreq) { + mutex_unlock(&devfreq_list_lock); + schedule(); + mutex_lock(&devfreq_list_lock); + } + } + + mutex_lock(&devfreq->lock); + _remove_devfreq(devfreq, false); /* it unlocks devfreq->lock */ + + if (!devfreq->governor->no_central_polling) + mutex_unlock(&devfreq_list_lock); + + return 0; +} + +/** + * devfreq_start_polling() - Initialize data structure for devfreq framework and + * start polling registered devfreq devices. + */ +static int __init devfreq_start_polling(void) +{ + mutex_lock(&devfreq_list_lock); + polling = false; + devfreq_wq = create_freezable_workqueue("devfreq_wq"); + INIT_DELAYED_WORK_DEFERRABLE(&devfreq_work, devfreq_monitor); + mutex_unlock(&devfreq_list_lock); + + devfreq_monitor(&devfreq_work.work); + return 0; +} +late_initcall(devfreq_start_polling); + +static int __init devfreq_init(void) +{ + devfreq_class = class_create(THIS_MODULE, "devfreq"); + if (IS_ERR(devfreq_class)) { + pr_err("%s: couldn't create class\n", __FILE__); + return PTR_ERR(devfreq_class); + } + return 0; +} +subsys_initcall(devfreq_init); + +static void __exit devfreq_exit(void) +{ + class_destroy(devfreq_class); +} +module_exit(devfreq_exit); + +/* + * The followings are helper functions for devfreq user device drivers with + * OPP framework. + */ + +/** + * devfreq_recommended_opp() - Helper function to get proper OPP for the + * freq value given to target callback. + * @dev The devfreq user device. (parent of devfreq) + * @freq The frequency given to target function + * + */ +struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq) +{ + struct opp *opp = opp_find_freq_ceil(dev, freq); + + if (opp == ERR_PTR(-ENODEV)) + opp = opp_find_freq_floor(dev, freq); + return opp; +} + +/** + * devfreq_register_opp_notifier() - Helper function to get devfreq notified + * for any changes in the OPP availability + * changes + * @dev The devfreq user device. (parent of devfreq) + * @devfreq The devfreq object. + */ +int devfreq_register_opp_notifier(struct device *dev, struct devfreq *devfreq) +{ + struct srcu_notifier_head *nh = opp_get_notifier(dev); + + if (IS_ERR(nh)) + return PTR_ERR(nh); + return srcu_notifier_chain_register(nh, &devfreq->nb); +} + +/** + * devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq + * notified for any changes in the OPP + * availability changes anymore. + * @dev The devfreq user device. (parent of devfreq) + * @devfreq The devfreq object. + * + * At exit() callback of devfreq_dev_profile, this must be included if + * devfreq_recommended_opp is used. + */ +int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq) +{ + struct srcu_notifier_head *nh = opp_get_notifier(dev); + + if (IS_ERR(nh)) + return PTR_ERR(nh); + return srcu_notifier_chain_unregister(nh, &devfreq->nb); +} + +MODULE_AUTHOR("MyungJoo Ham "); +MODULE_DESCRIPTION("devfreq class support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h new file mode 100644 index 0000000..ea7f13c --- /dev/null +++ b/drivers/devfreq/governor.h @@ -0,0 +1,24 @@ +/* + * governor.h - internal header for devfreq governors. + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This header is for devfreq governors in drivers/devfreq/ + */ + +#ifndef _GOVERNOR_H +#define _GOVERNOR_H + +#include + +#define to_devfreq(DEV) container_of((DEV), struct devfreq, dev) + +/* Caution: devfreq->lock must be locked before calling update_devfreq */ +extern int update_devfreq(struct devfreq *devfreq); + +#endif /* _GOVERNOR_H */ diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h new file mode 100644 index 0000000..b3be3d3 --- /dev/null +++ b/include/linux/devfreq.h @@ -0,0 +1,203 @@ +/* + * devfreq: Generic Dynamic Voltage and Frequency Scaling (DVFS) Framework + * for Non-CPU Devices. + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_DEVFREQ_H__ +#define __LINUX_DEVFREQ_H__ + +#include +#include +#include + +#define DEVFREQ_NAME_LEN 16 + +struct devfreq; + +/** + * struct devfreq_dev_status - Data given from devfreq user device to + * governors. Represents the performance + * statistics. + * @total_time The total time represented by this instance of + * devfreq_dev_status + * @busy_time The time that the device was working among the + * total_time. + * @current_frequency The operating frequency. + * @private_data An entry not specified by the devfreq framework. + * A device and a specific governor may have their + * own protocol with private_data. However, because + * this is governor-specific, a governor using this + * will be only compatible with devices aware of it. + */ +struct devfreq_dev_status { + /* both since the last measure */ + unsigned long total_time; + unsigned long busy_time; + unsigned long current_frequency; + void *private_date; +}; + +/** + * struct devfreq_dev_profile - Devfreq's user device profile + * @initial_freq The operating frequency when devfreq_add_device() is + * called. + * @polling_ms The polling interval in ms. 0 disables polling. + * @target The device should set its operating frequency at + * freq or lowest-upper-than-freq value. If freq is + * higher than any operable frequency, set maximum. + * Before returning, target function should set + * freq at the current frequency. + * @get_dev_status The device should provide the current performance + * status to devfreq, which is used by governors. + * @exit An optional callback that is called when devfreq + * is removing the devfreq object due to error or + * from devfreq_remove_device() call. If the user + * has registered devfreq->nb at a notifier-head, + * this is the time to unregister it. + */ +struct devfreq_dev_profile { + unsigned long initial_freq; + unsigned int polling_ms; + + int (*target)(struct device *dev, unsigned long *freq); + int (*get_dev_status)(struct device *dev, + struct devfreq_dev_status *stat); + void (*exit)(struct device *dev); +}; + +/** + * struct devfreq_governor - Devfreq policy governor + * @name Governor's name + * @get_target_freq Returns desired operating frequency for the device. + * Basically, get_target_freq will run + * devfreq_dev_profile.get_dev_status() to get the + * status of the device (load = busy_time / total_time). + * If no_central_polling is set, this callback is called + * only with update_devfreq() notified by OPP. + * @init Called when the devfreq is being attached to a device + * @exit Called when the devfreq is being removed from a + * device. Governor should stop any internal routines + * before return because related data may be + * freed after exit(). + * @no_central_polling Do not use devfreq's central polling mechanism. + * When this is set, devfreq will not call + * get_target_freq with devfreq_monitor(). However, + * devfreq will call get_target_freq with + * devfreq_update() notified by OPP framework. + * + * Note that the callbacks are called with devfreq->lock locked by devfreq. + */ +struct devfreq_governor { + const char name[DEVFREQ_NAME_LEN]; + int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + int (*init)(struct devfreq *this); + void (*exit)(struct devfreq *this); + const bool no_central_polling; +}; + +/** + * struct devfreq - Device devfreq structure + * @node list node - contains the devices with devfreq that have been + * registered. + * @lock a mutex to protect accessing devfreq. + * @dev device registered by devfreq class. dev.parent is the device + * using devfreq. + * @profile device-specific devfreq profile + * @governor method how to choose frequency based on the usage. + * @nb notifier block used to notify devfreq object that it should + * reevaluate operable frequencies. Devfreq users may use + * devfreq.nb to the corresponding register notifier call chain. + * @polling_jiffies interval in jiffies. + * @previous_freq previously configured frequency value. + * @next_polling the number of remaining jiffies to poll with + * "devfreq_monitor" executions to reevaluate + * frequency/voltage of the device. Set by + * profile's polling_ms interval. + * @data Private data of the governor. The devfreq framework does not + * touch this. + * @being_removed a flag to mark that this object is being removed in + * order to prevent trying to remove the object multiple times. + * + * This structure stores the devfreq information for a give device. + * + * Note that when a governor accesses entries in struct devfreq in its + * functions except for the context of callbacks defined in struct + * devfreq_governor, the governor should protect its access with the + * struct mutex lock in struct devfreq. A governor may use this mutex + * to protect its own private data in void *data as well. + */ +struct devfreq { + struct list_head node; + + struct mutex lock; + struct device dev; + struct devfreq_dev_profile *profile; + const struct devfreq_governor *governor; + struct notifier_block nb; + + unsigned long polling_jiffies; + unsigned long previous_freq; + unsigned int next_polling; + + void *data; /* private data for governors */ + + bool being_removed; +}; + +#if defined(CONFIG_PM_DEVFREQ) +extern struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + const struct devfreq_governor *governor, + void *data); +extern int devfreq_remove_device(struct devfreq *devfreq); + +/* Helper functions for devfreq user device driver with OPP. */ +extern struct opp *devfreq_recommended_opp(struct device *dev, + unsigned long *freq); +extern int devfreq_register_opp_notifier(struct device *dev, + struct devfreq *devfreq); +extern int devfreq_unregister_opp_notifier(struct device *dev, + struct devfreq *devfreq); + +#else /* !CONFIG_PM_DEVFREQ */ +static struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + struct devfreq_governor *governor, + void *data); +{ + return NULL; +} + +static int devfreq_remove_device(struct devfreq *devfreq); +{ + return 0; +} + +static struct opp *devfreq_recommended_opp(struct device *dev, + unsigned long *freq) +{ + return -EINVAL; +} + +static int devfreq_register_opp_notifier(struct device *dev, + struct devfreq *devfreq) +{ + return -EINVAL; +} + +static int devfreq_unregister_opp_notifier(struct device *dev, + struct devfreq *devfreq) +{ + return -EINVAL; +} + +#endif /* CONFIG_PM_DEVFREQ */ + +#endif /* __LINUX_DEVFREQ_H__ */ -- cgit v0.10.2 From 9005b65099ee4f14b6be691c4574612fe947531a Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Sun, 2 Oct 2011 00:19:28 +0200 Subject: PM / devfreq: Add common sysfs interfaces Device specific sysfs interface /sys/devices/.../power/devfreq_* - governor R: name of governor - cur_freq R: current frequency - polling_interval R: polling interval in ms given with devfreq profile W: update polling interval. - central_polling R: 1 if polling is managed by devfreq framework Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Reviewed-by: Mike Turquette Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki -- Documentation/ABI/testing/sysfs-class-devfreq | 44 ++++++++++++++++ drivers/devfreq/devfreq.c | 69 ++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-devfreq diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq new file mode 100644 index 0000000..0ec855f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -0,0 +1,44 @@ +What: /sys/class/devfreq/.../ +Date: September 2011 +Contact: MyungJoo Ham +Description: + Provide a place in sysfs for the devfreq objects. + This allows accessing various devfreq specific variables. + The name of devfreq object denoted as ... is same as the + name of device using devfreq. + +What: /sys/class/devfreq/.../governor +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../governor shows the name of the + governor used by the corresponding devfreq object. + +What: /sys/class/devfreq/.../cur_freq +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../cur_freq shows the current + frequency of the corresponding devfreq object. + +What: /sys/class/devfreq/.../central_polling +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../central_polling shows whether + the devfreq ojbect is using devfreq-provided central + polling mechanism or not. + +What: /sys/class/devfreq/.../polling_interval +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../polling_interval shows and sets + the requested polling interval of the corresponding devfreq + object. The values are represented in ms. If the value is + less than 1 jiffy, it is considered to be 0, which means + no polling. This value is meaningless if the governor is + not polling; thus. If the governor is not using + devfreq-provided central polling + (/sys/class/devfreq/.../central_polling is 0), this value + may be useless. diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index f3100b1..5d15b81 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -437,6 +437,74 @@ int devfreq_remove_device(struct devfreq *devfreq) return 0; } +static ssize_t show_governor(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_devfreq(dev)->governor->name); +} + +static ssize_t show_freq(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", to_devfreq(dev)->previous_freq); +} + +static ssize_t show_polling_interval(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", to_devfreq(dev)->profile->polling_ms); +} + +static ssize_t store_polling_interval(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *df = to_devfreq(dev); + unsigned int value; + int ret; + + ret = sscanf(buf, "%u", &value); + if (ret != 1) + goto out; + + mutex_lock(&df->lock); + df->profile->polling_ms = value; + df->next_polling = df->polling_jiffies + = msecs_to_jiffies(value); + mutex_unlock(&df->lock); + + ret = count; + + if (df->governor->no_central_polling) + goto out; + + mutex_lock(&devfreq_list_lock); + if (df->next_polling > 0 && !polling) { + polling = true; + queue_delayed_work(devfreq_wq, &devfreq_work, + df->next_polling); + } + mutex_unlock(&devfreq_list_lock); +out: + return ret; +} + +static ssize_t show_central_polling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", + !to_devfreq(dev)->governor->no_central_polling); +} + +static struct device_attribute devfreq_attrs[] = { + __ATTR(governor, S_IRUGO, show_governor, NULL), + __ATTR(cur_freq, S_IRUGO, show_freq, NULL), + __ATTR(central_polling, S_IRUGO, show_central_polling, NULL), + __ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval, + store_polling_interval), + { }, +}; + /** * devfreq_start_polling() - Initialize data structure for devfreq framework and * start polling registered devfreq devices. @@ -461,6 +529,7 @@ static int __init devfreq_init(void) pr_err("%s: couldn't create class\n", __FILE__); return PTR_ERR(devfreq_class); } + devfreq_class->dev_attrs = devfreq_attrs; return 0; } subsys_initcall(devfreq_init); -- cgit v0.10.2 From ce26c5bb9569d8b826f01b8620fc16d8da6821e9 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Sun, 2 Oct 2011 00:19:34 +0200 Subject: PM / devfreq: Add basic governors Four cpufreq-like governors are provided as examples. powersave: use the lowest frequency possible. The user (device) should set the polling_ms as 0 because polling is useless for this governor. performance: use the highest freqeuncy possible. The user (device) should set the polling_ms as 0 because polling is useless for this governor. userspace: use the user specified frequency stored at devfreq.user_set_freq. With sysfs support in the following patch, a user may set the value with the sysfs interface. simple_ondemand: simplified version of cpufreq's ondemand governor. When a user updates OPP entries (enable/disable/add), OPP framework automatically notifies devfreq to update operating frequency accordingly. Thus, devfreq users (device drivers) do not need to update devfreq manually with OPP entry updates or set polling_ms for powersave , performance, userspace, or any other "static" governors. Note that these are given only as basic examples for governors and any devices with devfreq may implement their own governors with the drivers and use them. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Reviewed-by: Mike Turquette Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 0ec855f..23d78b5 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -42,3 +42,11 @@ Description: devfreq-provided central polling (/sys/class/devfreq/.../central_polling is 0), this value may be useless. + +What: /sys/class/devfreq/.../userspace/set_freq +Date: September 2011 +Contact: MyungJoo Ham +Description: + The /sys/class/devfreq/.../userspace/set_freq shows and + sets the requested frequency for the devfreq object if + userspace governor is in effect. diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 1fb42de..643b055 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -34,6 +34,42 @@ menuconfig PM_DEVFREQ if PM_DEVFREQ +comment "DEVFREQ Governors" + +config DEVFREQ_GOV_SIMPLE_ONDEMAND + bool "Simple Ondemand" + help + Chooses frequency based on the recent load on the device. Works + similar as ONDEMAND governor of CPUFREQ does. A device with + Simple-Ondemand should be able to provide busy/total counter + values that imply the usage rate. A device may provide tuned + values to the governor with data field at devfreq_add_device(). + +config DEVFREQ_GOV_PERFORMANCE + bool "Performance" + help + Sets the frequency at the maximum available frequency. + This governor always returns UINT_MAX as frequency so that + the DEVFREQ framework returns the highest frequency available + at any time. + +config DEVFREQ_GOV_POWERSAVE + bool "Powersave" + help + Sets the frequency at the minimum available frequency. + This governor always returns 0 as frequency so that + the DEVFREQ framework returns the lowest frequency available + at any time. + +config DEVFREQ_GOV_USERSPACE + bool "Userspace" + help + Sets the frequency at the user specified one. + This governor returns the user configured frequency if there + has been an input to /sys/devices/.../power/devfreq_set_freq. + Otherwise, the governor does not change the frequnecy + given at the initialization. + comment "DEVFREQ Drivers" endif # PM_DEVFREQ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 168934a..4564a89 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -1 +1,5 @@ obj-$(CONFIG_PM_DEVFREQ) += devfreq.o +obj-$(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) += governor_simpleondemand.o +obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE) += governor_performance.o +obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o +obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c new file mode 100644 index 0000000..c0596b2 --- /dev/null +++ b/drivers/devfreq/governor_performance.c @@ -0,0 +1,29 @@ +/* + * linux/drivers/devfreq/governor_performance.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +static int devfreq_performance_func(struct devfreq *df, + unsigned long *freq) +{ + /* + * target callback should be able to get floor value as + * said in devfreq.h + */ + *freq = UINT_MAX; + return 0; +} + +const struct devfreq_governor devfreq_performance = { + .name = "performance", + .get_target_freq = devfreq_performance_func, + .no_central_polling = true, +}; diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c new file mode 100644 index 0000000..2483a85 --- /dev/null +++ b/drivers/devfreq/governor_powersave.c @@ -0,0 +1,29 @@ +/* + * linux/drivers/devfreq/governor_powersave.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +static int devfreq_powersave_func(struct devfreq *df, + unsigned long *freq) +{ + /* + * target callback should be able to get ceiling value as + * said in devfreq.h + */ + *freq = 0; + return 0; +} + +const struct devfreq_governor devfreq_powersave = { + .name = "powersave", + .get_target_freq = devfreq_powersave_func, + .no_central_polling = true, +}; diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c new file mode 100644 index 0000000..efad8dc --- /dev/null +++ b/drivers/devfreq/governor_simpleondemand.c @@ -0,0 +1,88 @@ +/* + * linux/drivers/devfreq/governor_simpleondemand.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +/* Default constants for DevFreq-Simple-Ondemand (DFSO) */ +#define DFSO_UPTHRESHOLD (90) +#define DFSO_DOWNDIFFERENCTIAL (5) +static int devfreq_simple_ondemand_func(struct devfreq *df, + unsigned long *freq) +{ + struct devfreq_dev_status stat; + int err = df->profile->get_dev_status(df->dev.parent, &stat); + unsigned long long a, b; + unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD; + unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL; + struct devfreq_simple_ondemand_data *data = df->data; + + if (err) + return err; + + if (data) { + if (data->upthreshold) + dfso_upthreshold = data->upthreshold; + if (data->downdifferential) + dfso_downdifferential = data->downdifferential; + } + if (dfso_upthreshold > 100 || + dfso_upthreshold < dfso_downdifferential) + return -EINVAL; + + /* Assume MAX if it is going to be divided by zero */ + if (stat.total_time == 0) { + *freq = UINT_MAX; + return 0; + } + + /* Prevent overflow */ + if (stat.busy_time >= (1 << 24) || stat.total_time >= (1 << 24)) { + stat.busy_time >>= 7; + stat.total_time >>= 7; + } + + /* Set MAX if it's busy enough */ + if (stat.busy_time * 100 > + stat.total_time * dfso_upthreshold) { + *freq = UINT_MAX; + return 0; + } + + /* Set MAX if we do not know the initial frequency */ + if (stat.current_frequency == 0) { + *freq = UINT_MAX; + return 0; + } + + /* Keep the current frequency */ + if (stat.busy_time * 100 > + stat.total_time * (dfso_upthreshold - dfso_downdifferential)) { + *freq = stat.current_frequency; + return 0; + } + + /* Set the desired frequency based on the load */ + a = stat.busy_time; + a *= stat.current_frequency; + b = div_u64(a, stat.total_time); + b *= 100; + b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2)); + *freq = (unsigned long) b; + + return 0; +} + +const struct devfreq_governor devfreq_simple_ondemand = { + .name = "simple_ondemand", + .get_target_freq = devfreq_simple_ondemand_func, +}; diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c new file mode 100644 index 0000000..4f8b563 --- /dev/null +++ b/drivers/devfreq/governor_userspace.c @@ -0,0 +1,116 @@ +/* + * linux/drivers/devfreq/governor_simpleondemand.c + * + * Copyright (C) 2011 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include "governor.h" + +struct userspace_data { + unsigned long user_frequency; + bool valid; +}; + +static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq) +{ + struct userspace_data *data = df->data; + + if (!data->valid) + *freq = df->previous_freq; /* No user freq specified yet */ + else + *freq = data->user_frequency; + return 0; +} + +static ssize_t store_freq(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *devfreq = to_devfreq(dev); + struct userspace_data *data; + unsigned long wanted; + int err = 0; + + + mutex_lock(&devfreq->lock); + data = devfreq->data; + + sscanf(buf, "%lu", &wanted); + data->user_frequency = wanted; + data->valid = true; + err = update_devfreq(devfreq); + if (err == 0) + err = count; + mutex_unlock(&devfreq->lock); + return err; +} + +static ssize_t show_freq(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + struct userspace_data *data; + int err = 0; + + mutex_lock(&devfreq->lock); + data = devfreq->data; + + if (data->valid) + err = sprintf(buf, "%lu\n", data->user_frequency); + else + err = sprintf(buf, "undefined\n"); + mutex_unlock(&devfreq->lock); + return err; +} + +static DEVICE_ATTR(set_freq, 0644, show_freq, store_freq); +static struct attribute *dev_entries[] = { + &dev_attr_set_freq.attr, + NULL, +}; +static struct attribute_group dev_attr_group = { + .name = "userspace", + .attrs = dev_entries, +}; + +static int userspace_init(struct devfreq *devfreq) +{ + int err = 0; + struct userspace_data *data = kzalloc(sizeof(struct userspace_data), + GFP_KERNEL); + + if (!data) { + err = -ENOMEM; + goto out; + } + data->valid = false; + devfreq->data = data; + + err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group); +out: + return err; +} + +static void userspace_exit(struct devfreq *devfreq) +{ + sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group); + kfree(devfreq->data); + devfreq->data = NULL; +} + +const struct devfreq_governor devfreq_userspace = { + .name = "userspace", + .get_target_freq = devfreq_userspace_func, + .init = userspace_init, + .exit = userspace_exit, + .no_central_polling = true, +}; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index b3be3d3..afb9458 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -166,6 +166,36 @@ extern int devfreq_register_opp_notifier(struct device *dev, extern int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); +#ifdef CONFIG_DEVFREQ_GOV_POWERSAVE +extern const struct devfreq_governor devfreq_powersave; +#endif +#ifdef CONFIG_DEVFREQ_GOV_PERFORMANCE +extern const struct devfreq_governor devfreq_performance; +#endif +#ifdef CONFIG_DEVFREQ_GOV_USERSPACE +extern const struct devfreq_governor devfreq_userspace; +#endif +#ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND +extern const struct devfreq_governor devfreq_simple_ondemand; +/** + * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq + * and devfreq_add_device + * @ upthreshold If the load is over this value, the frequency jumps. + * Specify 0 to use the default. Valid value = 0 to 100. + * @ downdifferential If the load is under upthreshold - downdifferential, + * the governor may consider slowing the frequency down. + * Specify 0 to use the default. Valid value = 0 to 100. + * downdifferential < upthreshold must hold. + * + * If the fed devfreq_simple_ondemand_data pointer is NULL to the governor, + * the governor uses the default values. + */ +struct devfreq_simple_ondemand_data { + unsigned int upthreshold; + unsigned int downdifferential; +}; +#endif + #else /* !CONFIG_PM_DEVFREQ */ static struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, @@ -198,6 +228,11 @@ static int devfreq_unregister_opp_notifier(struct device *dev, return -EINVAL; } +#define devfreq_powersave NULL +#define devfreq_performance NULL +#define devfreq_userspace NULL +#define devfreq_simple_ondemand NULL + #endif /* CONFIG_PM_DEVFREQ */ #endif /* __LINUX_DEVFREQ_H__ */ -- cgit v0.10.2 From 1a9a91525d806f2b3bd8b57b963755a96fd36ce2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 Sep 2011 22:29:44 +0200 Subject: PM / QoS: Add function dev_pm_qos_read_value() (v3) To read the current PM QoS value for a given device we need to make sure that the device's power.constraints object won't be removed while we're doing that. For this reason, put the operation under dev->power.lock and acquire the lock around the initialization and removal of power.constraints. Moreover, since we're using the value of power.constraints to determine whether or not the object is present, the power.constraints_state field isn't necessary any more and may be removed. However, dev_pm_qos_add_request() needs to check if the device is being removed from the system before allocating a new PM QoS constraints object for it, so make it use the power.power_state field of struct device for this purpose. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 956443f8..c6291ab 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -66,6 +65,7 @@ void device_pm_init(struct device *dev) spin_lock_init(&dev->power.lock); pm_runtime_init(dev); INIT_LIST_HEAD(&dev->power.entry); + dev->power.power_state = PMSG_INVALID; } /** @@ -97,8 +97,8 @@ void device_pm_add(struct device *dev) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); - mutex_unlock(&dpm_list_mtx); dev_pm_qos_constraints_init(dev); + mutex_unlock(&dpm_list_mtx); } /** @@ -109,9 +109,9 @@ void device_pm_remove(struct device *dev) { pr_debug("PM: Removing info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); - dev_pm_qos_constraints_destroy(dev); complete_all(&dev->power.completion); mutex_lock(&dpm_list_mtx); + dev_pm_qos_constraints_destroy(dev); list_del_init(&dev->power.entry); mutex_unlock(&dpm_list_mtx); device_wakeup_disable(dev); diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index f2a25f1..9bf6232 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -1,3 +1,5 @@ +#include + #ifdef CONFIG_PM_RUNTIME extern void pm_runtime_init(struct device *dev); @@ -35,15 +37,21 @@ extern void device_pm_move_last(struct device *); static inline void device_pm_init(struct device *dev) { spin_lock_init(&dev->power.lock); + dev->power.power_state = PMSG_INVALID; pm_runtime_init(dev); } +static inline void device_pm_add(struct device *dev) +{ + dev_pm_qos_constraints_init(dev); +} + static inline void device_pm_remove(struct device *dev) { + dev_pm_qos_constraints_destroy(dev); pm_runtime_remove(dev); } -static inline void device_pm_add(struct device *dev) {} static inline void device_pm_move_before(struct device *deva, struct device *devb) {} static inline void device_pm_move_after(struct device *deva, diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 8d0b811..91e0614 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -30,15 +30,6 @@ * . To minimize the data usage by the per-device constraints, the data struct * is only allocated at the first call to dev_pm_qos_add_request. * . The data is later free'd when the device is removed from the system. - * . The constraints_state variable from dev_pm_info tracks the data struct - * allocation state: - * DEV_PM_QOS_NO_DEVICE: No device present or device removed, no data - * allocated, - * DEV_PM_QOS_DEVICE_PRESENT: Device present, data not allocated and will be - * allocated at the first call to dev_pm_qos_add_request, - * DEV_PM_QOS_ALLOCATED: Device present, data allocated. The per-device - * PM QoS constraints framework is operational and constraints can be - * added, updated or removed using the dev_pm_qos_* API. * . A global mutex protects the constraints users from the data being * allocated and free'd. */ @@ -51,8 +42,30 @@ static DEFINE_MUTEX(dev_pm_qos_mtx); + static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers); +/** + * dev_pm_qos_read_value - Get PM QoS constraint for a given device. + * @dev: Device to get the PM QoS constraint value for. + */ +s32 dev_pm_qos_read_value(struct device *dev) +{ + struct pm_qos_constraints *c; + unsigned long flags; + s32 ret = 0; + + spin_lock_irqsave(&dev->power.lock, flags); + + c = dev->power.constraints; + if (c) + ret = pm_qos_read_value(c); + + spin_unlock_irqrestore(&dev->power.lock, flags); + + return ret; +} + /* * apply_constraint * @req: constraint request to apply @@ -105,27 +118,31 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) } BLOCKING_INIT_NOTIFIER_HEAD(n); + plist_head_init(&c->list); + c->target_value = PM_QOS_DEV_LAT_DEFAULT_VALUE; + c->default_value = PM_QOS_DEV_LAT_DEFAULT_VALUE; + c->type = PM_QOS_MIN; + c->notifiers = n; + + spin_lock_irq(&dev->power.lock); dev->power.constraints = c; - plist_head_init(&dev->power.constraints->list); - dev->power.constraints->target_value = PM_QOS_DEV_LAT_DEFAULT_VALUE; - dev->power.constraints->default_value = PM_QOS_DEV_LAT_DEFAULT_VALUE; - dev->power.constraints->type = PM_QOS_MIN; - dev->power.constraints->notifiers = n; - dev->power.constraints_state = DEV_PM_QOS_ALLOCATED; + spin_unlock_irq(&dev->power.lock); return 0; } /** - * dev_pm_qos_constraints_init + * dev_pm_qos_constraints_init - Initalize device's PM QoS constraints pointer. * @dev: target device * - * Called from the device PM subsystem at device insertion + * Called from the device PM subsystem during device insertion under + * device_pm_lock(). */ void dev_pm_qos_constraints_init(struct device *dev) { mutex_lock(&dev_pm_qos_mtx); - dev->power.constraints_state = DEV_PM_QOS_DEVICE_PRESENT; + dev->power.constraints = NULL; + dev->power.power_state = PMSG_ON; mutex_unlock(&dev_pm_qos_mtx); } @@ -133,34 +150,38 @@ void dev_pm_qos_constraints_init(struct device *dev) * dev_pm_qos_constraints_destroy * @dev: target device * - * Called from the device PM subsystem at device removal + * Called from the device PM subsystem on device removal under device_pm_lock(). */ void dev_pm_qos_constraints_destroy(struct device *dev) { struct dev_pm_qos_request *req, *tmp; + struct pm_qos_constraints *c; mutex_lock(&dev_pm_qos_mtx); - if (dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { - /* Flush the constraints list for the device */ - plist_for_each_entry_safe(req, tmp, - &dev->power.constraints->list, - node) { - /* - * Update constraints list and call the notification - * callbacks if needed - */ - apply_constraint(req, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); - memset(req, 0, sizeof(*req)); - } + dev->power.power_state = PMSG_INVALID; + c = dev->power.constraints; + if (!c) + goto out; - kfree(dev->power.constraints->notifiers); - kfree(dev->power.constraints); - dev->power.constraints = NULL; + /* Flush the constraints list for the device */ + plist_for_each_entry_safe(req, tmp, &c->list, node) { + /* + * Update constraints list and call the notification + * callbacks if needed + */ + apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); } - dev->power.constraints_state = DEV_PM_QOS_NO_DEVICE; + spin_lock_irq(&dev->power.lock); + dev->power.constraints = NULL; + spin_unlock_irq(&dev->power.lock); + + kfree(c->notifiers); + kfree(c); + + out: mutex_unlock(&dev_pm_qos_mtx); } @@ -178,8 +199,9 @@ void dev_pm_qos_constraints_destroy(struct device *dev) * * Returns 1 if the aggregated constraint value has changed, * 0 if the aggregated constraint value has not changed, - * -EINVAL in case of wrong parameters, -ENODEV if the device has been - * removed from the system + * -EINVAL in case of wrong parameters, -ENOMEM if there's not enough memory + * to allocate for data structures, -ENODEV if the device has just been removed + * from the system. */ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value) @@ -195,28 +217,32 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, return -EINVAL; } - mutex_lock(&dev_pm_qos_mtx); req->dev = dev; - /* Return if the device has been removed */ - if (req->dev->power.constraints_state == DEV_PM_QOS_NO_DEVICE) { - ret = -ENODEV; - goto out; - } + mutex_lock(&dev_pm_qos_mtx); - /* - * Allocate the constraints data on the first call to add_request, - * i.e. only if the data is not already allocated and if the device has - * not been removed - */ - if (dev->power.constraints_state == DEV_PM_QOS_DEVICE_PRESENT) - ret = dev_pm_qos_constraints_allocate(dev); + if (!dev->power.constraints) { + if (dev->power.power_state.event == PM_EVENT_INVALID) { + /* The device has been removed from the system. */ + req->dev = NULL; + ret = -ENODEV; + goto out; + } else { + /* + * Allocate the constraints data on the first call to + * add_request, i.e. only if the data is not already + * allocated and if the device has not been removed. + */ + ret = dev_pm_qos_constraints_allocate(dev); + } + } if (!ret) ret = apply_constraint(req, PM_QOS_ADD_REQ, value); -out: + out: mutex_unlock(&dev_pm_qos_mtx); + return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_request); @@ -252,7 +278,7 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, mutex_lock(&dev_pm_qos_mtx); - if (req->dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { + if (req->dev->power.constraints) { if (new_value != req->node.prio) ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value); @@ -293,7 +319,7 @@ int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) mutex_lock(&dev_pm_qos_mtx); - if (req->dev->power.constraints_state == DEV_PM_QOS_ALLOCATED) { + if (req->dev->power.constraints) { ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); @@ -323,15 +349,12 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) mutex_lock(&dev_pm_qos_mtx); - /* Silently return if the device has been removed */ - if (dev->power.constraints_state != DEV_PM_QOS_ALLOCATED) - goto out; - - retval = blocking_notifier_chain_register( - dev->power.constraints->notifiers, - notifier); + /* Silently return if the constraints object is not present. */ + if (dev->power.constraints) + retval = blocking_notifier_chain_register( + dev->power.constraints->notifiers, + notifier); -out: mutex_unlock(&dev_pm_qos_mtx); return retval; } @@ -354,15 +377,12 @@ int dev_pm_qos_remove_notifier(struct device *dev, mutex_lock(&dev_pm_qos_mtx); - /* Silently return if the device has been removed */ - if (dev->power.constraints_state != DEV_PM_QOS_ALLOCATED) - goto out; - - retval = blocking_notifier_chain_unregister( - dev->power.constraints->notifiers, - notifier); + /* Silently return if the constraints object is not present. */ + if (dev->power.constraints) + retval = blocking_notifier_chain_unregister( + dev->power.constraints->notifiers, + notifier); -out: mutex_unlock(&dev_pm_qos_mtx); return retval; } diff --git a/include/linux/pm.h b/include/linux/pm.h index d78187e..62a876e 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -326,6 +326,7 @@ extern struct dev_pm_ops generic_subsys_pm_ops; * requested by a driver. */ +#define PM_EVENT_INVALID (-1) #define PM_EVENT_ON 0x0000 #define PM_EVENT_FREEZE 0x0001 #define PM_EVENT_SUSPEND 0x0002 @@ -346,6 +347,7 @@ extern struct dev_pm_ops generic_subsys_pm_ops; #define PM_EVENT_AUTO_SUSPEND (PM_EVENT_AUTO | PM_EVENT_SUSPEND) #define PM_EVENT_AUTO_RESUME (PM_EVENT_AUTO | PM_EVENT_RESUME) +#define PMSG_INVALID ((struct pm_message){ .event = PM_EVENT_INVALID, }) #define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) #define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) #define PMSG_QUIESCE ((struct pm_message){ .event = PM_EVENT_QUIESCE, }) @@ -419,13 +421,6 @@ enum rpm_request { RPM_REQ_RESUME, }; -/* Per-device PM QoS constraints data struct state */ -enum dev_pm_qos_state { - DEV_PM_QOS_NO_DEVICE, /* No device present */ - DEV_PM_QOS_DEVICE_PRESENT, /* Device present, data not allocated */ - DEV_PM_QOS_ALLOCATED, /* Device present, data allocated */ -}; - struct wakeup_source; struct pm_domain_data { @@ -488,7 +483,6 @@ struct dev_pm_info { #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ struct pm_qos_constraints *constraints; - enum dev_pm_qos_state constraints_state; }; extern void update_pm_runtime_accounting(struct device *dev); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index ca7bd3f..83b0ea3 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -7,6 +7,7 @@ #include #include #include +#include #define PM_QOS_RESERVED 0 #define PM_QOS_CPU_DMA_LATENCY 1 @@ -77,6 +78,7 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); s32 pm_qos_read_value(struct pm_qos_constraints *c); +s32 dev_pm_qos_read_value(struct device *dev); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value); int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value); @@ -117,6 +119,8 @@ static inline int pm_qos_request_active(struct pm_qos_request *req) static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) { return 0; } +static inline s32 dev_pm_qos_read_value(struct device *dev) + { return 0; } static inline int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value) @@ -139,9 +143,13 @@ static inline int dev_pm_qos_remove_global_notifier( struct notifier_block *notifier) { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) - { return; } +{ + dev->power.power_state = PMSG_ON; +} static inline void dev_pm_qos_constraints_destroy(struct device *dev) - { return; } +{ + dev->power.power_state = PMSG_INVALID; +} #endif #endif -- cgit v0.10.2 From e3cba3243eb853a052613c804dea033bc4c9cf2d Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Tue, 4 Oct 2011 21:54:45 +0200 Subject: PM / QoS: Update Documentation for the pm_qos and dev_pm_qos frameworks Update the documentation for the recently updated pm_qos API, kernel and user space. Add documentation for the per-device PM QoS (dev_pm_qos) framework API. Signed-off-by: Jean Pihet Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index bfed898..17e130a 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -4,14 +4,19 @@ This interface provides a kernel and user mode interface for registering performance expectations by drivers, subsystems and user space applications on one of the parameters. -Currently we have {cpu_dma_latency, network_latency, network_throughput} as the -initial set of pm_qos parameters. +Two different PM QoS frameworks are available: +1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput. +2. the per-device PM QoS framework provides the API to manage the per-device latency +constraints. Each parameters have defined units: * latency: usec * timeout: usec * throughput: kbs (kilo bit / sec) + +1. PM QoS framework + The infrastructure exposes multiple misc device nodes one per implemented parameter. The set of parameters implement is defined by pm_qos_power_init() and pm_qos_params.h. This is done because having the available parameters @@ -23,14 +28,18 @@ an aggregated target value. The aggregated target value is updated with changes to the request list or elements of the list. Typically the aggregated target value is simply the max or min of the request values held in the parameter list elements. +Note: the aggregated target value is implemented as an atomic variable so that +reading the aggregated value does not require any locking mechanism. + From kernel mode the use of this interface is simple: -handle = pm_qos_add_request(param_class, target_value): -Will insert an element into the list for that identified PM_QOS class with the +void pm_qos_add_request(handle, param_class, target_value): +Will insert an element into the list for that identified PM QoS class with the target value. Upon change to this list the new target is recomputed and any registered notifiers are called only if the target value is now different. -Clients of pm_qos need to save the returned handle. +Clients of pm_qos need to save the returned handle for future use in other +pm_qos API functions. void pm_qos_update_request(handle, new_target_value): Will update the list element pointed to by the handle with the new target value @@ -42,6 +51,20 @@ Will remove the element. After removal it will update the aggregate target and call the notification tree if the target was changed as a result of removing the request. +int pm_qos_request(param_class): +Returns the aggregated value for a given PM QoS class. + +int pm_qos_request_active(handle): +Returns if the request is still active, i.e. it has not been removed from a +PM QoS class constraints list. + +int pm_qos_add_notifier(param_class, notifier): +Adds a notification callback function to the PM QoS class. The callback is +called when the aggregated value for the PM QoS class is changed. + +int pm_qos_remove_notifier(int param_class, notifier): +Removes the notification callback function for the PM QoS class. + From user mode: Only processes can register a pm_qos request. To provide for automatic @@ -63,4 +86,63 @@ To remove the user mode request for a target value simply close the device node. +2. PM QoS per-device latency framework + +For each device a list of performance requests is maintained along with +an aggregated target value. The aggregated target value is updated with +changes to the request list or elements of the list. Typically the +aggregated target value is simply the max or min of the request values held +in the parameter list elements. +Note: the aggregated target value is implemented as an atomic variable so that +reading the aggregated value does not require any locking mechanism. + + +From kernel mode the use of this interface is the following: + +int dev_pm_qos_add_request(device, handle, value): +Will insert an element into the list for that identified device with the +target value. Upon change to this list the new target is recomputed and any +registered notifiers are called only if the target value is now different. +Clients of dev_pm_qos need to save the handle for future use in other +dev_pm_qos API functions. + +int dev_pm_qos_update_request(handle, new_value): +Will update the list element pointed to by the handle with the new target value +and recompute the new aggregated target, calling the notification trees if the +target is changed. + +int dev_pm_qos_remove_request(handle): +Will remove the element. After removal it will update the aggregate target and +call the notification trees if the target was changed as a result of removing +the request. + +s32 dev_pm_qos_read_value(device): +Returns the aggregated value for a given device's constraints list. + + +Notification mechanisms: +The per-device PM QoS framework has 2 different and distinct notification trees: +a per-device notification tree and a global notification tree. + +int dev_pm_qos_add_notifier(device, notifier): +Adds a notification callback function for the device. +The callback is called when the aggregated value of the device constraints list +is changed. + +int dev_pm_qos_remove_notifier(device, notifier): +Removes the notification callback function for the device. + +int dev_pm_qos_add_global_notifier(notifier): +Adds a notification callback function in the global notification tree of the +framework. +The callback is called when the aggregated value for any device is changed. + +int dev_pm_qos_remove_global_notifier(notifier): +Removes the notification callback function from the global notification tree +of the framework. + + +From user mode: +No API for user space access to the per-device latency constraints is provided +yet - still under discussion. -- cgit v0.10.2 From 2fb242adcaab5defa2f208775ac4f181ac998fdd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 9 Oct 2011 11:40:25 +0800 Subject: PM / Runtime: Update document about callbacks Support for device power domains has been introduced in commit 9659cc0678b954f187290c6e8b247a673c5d37e1 (PM: Make system-wide PM and runtime PM treat subsystems consistently), also power domain callbacks will take precedence over subsystem ones from commit 4d27e9dcff00a6425d779b065ec8892e4f391661(PM: Make power domain callbacks take precedence over subsystem ones). So update part of "Device Runtime PM Callbacks" in Documentation/power/runtime_pm.txt. Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 1750740..f670836 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -43,13 +43,18 @@ struct dev_pm_ops { ... }; -The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks are -executed by the PM core for either the device type, or the class (if the device -type's struct dev_pm_ops object does not exist), or the bus type (if the -device type's and class' struct dev_pm_ops objects do not exist) of the given -device (this allows device types to override callbacks provided by bus types or -classes if necessary). The bus type, device type and class callbacks are -referred to as subsystem-level callbacks in what follows. +The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks +are executed by the PM core for either the power domain, or the device type +(if the device power domain's struct dev_pm_ops does not exist), or the class +(if the device power domain's and type's struct dev_pm_ops object does not +exist), or the bus type (if the device power domain's, type's and class' +struct dev_pm_ops objects do not exist) of the given device, so the priority +order of callbacks from high to low is that power domain callbacks, device +type callbacks, class callbacks and bus type callbacks, and the high priority +one will take precedence over low priority one. The bus type, device type and +class callbacks are referred to as subsystem-level callbacks in what follows, +and generally speaking, the power domain callbacks are used for representing +power domains within a SoC. By default, the callbacks are always invoked in process context with interrupts enabled. However, subsystems can use the pm_runtime_irq_safe() helper function -- cgit v0.10.2 From 47d8f0bac0fda4c15a030f92cd6da6c6bed87459 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 12 Oct 2011 11:53:32 +0800 Subject: PM / Runtime: Fix kerneldoc comment for rpm_suspend() This patch fix kerneldoc comments for rpm_suspend(): - 'Cancel a pending idle notification' should be put before, also should be changed to 'Cancel a pending idle notification, autosuspend or suspend'. - idle notification for the device after succeeding suspend has been removed, so update the comment accordingly. [rjw: Modified the subject and changelog slightly.] Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 7a6fb5e..aa23a64 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -286,14 +286,16 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) * @dev: Device to suspend. * @rpmflags: Flag bits. * - * Check if the device's runtime PM status allows it to be suspended. If - * another suspend has been started earlier, either return immediately or wait - * for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC flags. Cancel a - * pending idle notification. If the RPM_ASYNC flag is set then queue a - * suspend request; otherwise run the ->runtime_suspend() callback directly. - * If a deferred resume was requested while the callback was running then carry - * it out; otherwise send an idle notification for the device (if the suspend - * failed) or for its parent (if the suspend succeeded). + * Check if the device's runtime PM status allows it to be suspended. + * Cancel a pending idle notification, autosuspend or suspend. If + * another suspend has been started earlier, either return immediately + * or wait for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC + * flags. If the RPM_ASYNC flag is set then queue a suspend request; + * otherwise run the ->runtime_suspend() callback directly. If a deferred + * resume was requested while the callback was running then carry it out; + * otherwise send an idle notification for its parent (if the suspend + * succeeded and both ignore_children of parent->power and irq_safe of + * dev->power are not set). * * This function must be called under dev->power.lock with interrupts disabled. */ -- cgit v0.10.2 From 857b36c7b038ac56a882ee914df93e5985443074 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 12 Oct 2011 22:59:33 +0200 Subject: PM / Runtime: Handle .runtime_suspend() failure correctly If .runtime_suspend() returns -EAGAIN or -EBUSY, the device should still be in ACTIVE state, so it is not necessary to send an idle notification to its parent. If .runtime_suspend() returns other fatal failure, it doesn't make sense to send idle notification to its parent. Skip parent idle notification when failure is returned from .runtime_suspend() and update comments in rpm_suspend() to reflect that change. [rjw: Modified the subject and changelog slightly.] Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index aa23a64..6bb3aaf 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -291,11 +291,11 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) * another suspend has been started earlier, either return immediately * or wait for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC * flags. If the RPM_ASYNC flag is set then queue a suspend request; - * otherwise run the ->runtime_suspend() callback directly. If a deferred - * resume was requested while the callback was running then carry it out; - * otherwise send an idle notification for its parent (if the suspend - * succeeded and both ignore_children of parent->power and irq_safe of - * dev->power are not set). + * otherwise run the ->runtime_suspend() callback directly. When + * ->runtime_suspend succeeded, if a deferred resume was requested while + * the callback was running then carry it out, otherwise send an idle + * notification for its parent (if the suspend succeeded and both + * ignore_children of parent->power and irq_safe of dev->power are not set). * * This function must be called under dev->power.lock with interrupts disabled. */ @@ -420,15 +420,16 @@ static int rpm_suspend(struct device *dev, int rpmflags) dev->power.runtime_error = 0; else pm_runtime_cancel_pending(dev); - } else { + wake_up_all(&dev->power.wait_queue); + goto out; + } no_callback: - __update_runtime_status(dev, RPM_SUSPENDED); - pm_runtime_deactivate_timer(dev); + __update_runtime_status(dev, RPM_SUSPENDED); + pm_runtime_deactivate_timer(dev); - if (dev->parent) { - parent = dev->parent; - atomic_add_unless(&parent->power.child_count, -1, 0); - } + if (dev->parent) { + parent = dev->parent; + atomic_add_unless(&parent->power.child_count, -1, 0); } wake_up_all(&dev->power.wait_queue); -- cgit v0.10.2 From 2a77c46de1e3dace73745015635ebbc648eca69c Mon Sep 17 00:00:00 2001 From: ShuoX Liu Date: Wed, 10 Aug 2011 23:01:26 +0200 Subject: PM / Suspend: Add statistics debugfs file for suspend to RAM Record S3 failure time about each reason and the latest two failed devices' names in S3 progress. We can check it through 'suspend_stats' entry in debugfs. The motivation of the patch: We are enabling power features on Medfield. Comparing with PC/notebook, a mobile enters/exits suspend-2-ram (we call it s3 on Medfield) far more frequently. If it can't enter suspend-2-ram in time, the power might be used up soon. We often find sometimes, a device suspend fails. Then, system retries s3 over and over again. As display is off, testers and developers don't know what happens. Some testers and developers complain they don't know if system tries suspend-2-ram, and what device fails to suspend. They need such info for a quick check. The patch adds suspend_stats under debugfs for users to check suspend to RAM statistics quickly. If not using this patch, we have other methods to get info about what device fails. One is to turn on CONFIG_PM_DEBUG, but users would get too much info and testers need recompile the system. In addition, dynamic debug is another good tool to dump debug info. But it still doesn't match our utilization scenario closely. 1) user need write a user space parser to process the syslog output; 2) Our testing scenario is we leave the mobile for at least hours. Then, check its status. No serial console available during the testing. One is because console would be suspended, and the other is serial console connecting with spi or HSU devices would consume power. These devices are powered off at suspend-2-ram. Signed-off-by: ShuoX Liu Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt index ddd78172..62eca08 100644 --- a/Documentation/power/basic-pm-debugging.txt +++ b/Documentation/power/basic-pm-debugging.txt @@ -201,3 +201,27 @@ case, you may be able to search for failing drivers by following the procedure analogous to the one described in section 1. If you find some failing drivers, you will have to unload them every time before an STR transition (ie. before you run s2ram), and please report the problems with them. + +There is a debugfs entry which shows the suspend to RAM statistics. Here is an +example of its output. + # mount -t debugfs none /sys/kernel/debug + # cat /sys/kernel/debug/suspend_stats + success: 20 + fail: 5 + failed_freeze: 0 + failed_prepare: 0 + failed_suspend: 5 + failed_suspend_noirq: 0 + failed_resume: 0 + failed_resume_noirq: 0 + failures: + last_failed_dev: alarm + adc + last_failed_errno: -16 + -16 + last_failed_step: suspend + suspend +Field success means the success number of suspend to RAM, and field fail means +the failure number. Others are the failure number of different steps of suspend +to RAM. suspend_stats just lists the last 2 failed devices, error number and +failed step of suspend. diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c6291ab..b1b5826 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -46,6 +46,7 @@ LIST_HEAD(dpm_prepared_list); LIST_HEAD(dpm_suspended_list); LIST_HEAD(dpm_noirq_list); +struct suspend_stats suspend_stats; static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; @@ -467,8 +468,12 @@ void dpm_resume_noirq(pm_message_t state) mutex_unlock(&dpm_list_mtx); error = device_resume_noirq(dev, state); - if (error) + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, " early", error); + } mutex_lock(&dpm_list_mtx); put_device(dev); @@ -629,8 +634,12 @@ void dpm_resume(pm_message_t state) mutex_unlock(&dpm_list_mtx); error = device_resume(dev, state, false); - if (error) + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, "", error); + } mutex_lock(&dpm_list_mtx); } @@ -805,6 +814,9 @@ int dpm_suspend_noirq(pm_message_t state) mutex_lock(&dpm_list_mtx); if (error) { pm_dev_err(dev, state, " late", error); + suspend_stats.failed_suspend_noirq++; + dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); put_device(dev); break; } @@ -926,8 +938,10 @@ static void async_suspend(void *data, async_cookie_t cookie) int error; error = __device_suspend(dev, pm_transition, true); - if (error) + if (error) { + dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, pm_transition, " async", error); + } put_device(dev); } @@ -970,6 +984,7 @@ int dpm_suspend(pm_message_t state) mutex_lock(&dpm_list_mtx); if (error) { pm_dev_err(dev, state, "", error); + dpm_save_failed_dev(dev_name(dev)); put_device(dev); break; } @@ -983,7 +998,10 @@ int dpm_suspend(pm_message_t state) async_synchronize_full(); if (!error) error = async_error; - if (!error) + if (error) { + suspend_stats.failed_suspend++; + dpm_save_failed_step(SUSPEND_SUSPEND); + } else dpm_show_time(starttime, state, NULL); return error; } @@ -1091,7 +1109,10 @@ int dpm_suspend_start(pm_message_t state) int error; error = dpm_prepare(state); - if (!error) + if (error) { + suspend_stats.failed_prepare++; + dpm_save_failed_step(SUSPEND_PREPARE); + } else error = dpm_suspend(state); return error; } diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 6bbcef2..76f42e4 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -34,6 +34,58 @@ typedef int __bitwise suspend_state_t; #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) +enum suspend_stat_step { + SUSPEND_FREEZE = 1, + SUSPEND_PREPARE, + SUSPEND_SUSPEND, + SUSPEND_SUSPEND_NOIRQ, + SUSPEND_RESUME_NOIRQ, + SUSPEND_RESUME +}; + +struct suspend_stats { + int success; + int fail; + int failed_freeze; + int failed_prepare; + int failed_suspend; + int failed_suspend_noirq; + int failed_resume; + int failed_resume_noirq; +#define REC_FAILED_NUM 2 + int last_failed_dev; + char failed_devs[REC_FAILED_NUM][40]; + int last_failed_errno; + int errno[REC_FAILED_NUM]; + int last_failed_step; + enum suspend_stat_step failed_steps[REC_FAILED_NUM]; +}; + +extern struct suspend_stats suspend_stats; + +static inline void dpm_save_failed_dev(const char *name) +{ + strlcpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], + name, + sizeof(suspend_stats.failed_devs[0])); + suspend_stats.last_failed_dev++; + suspend_stats.last_failed_dev %= REC_FAILED_NUM; +} + +static inline void dpm_save_failed_errno(int err) +{ + suspend_stats.errno[suspend_stats.last_failed_errno] = err; + suspend_stats.last_failed_errno++; + suspend_stats.last_failed_errno %= REC_FAILED_NUM; +} + +static inline void dpm_save_failed_step(enum suspend_stat_step step) +{ + suspend_stats.failed_steps[suspend_stats.last_failed_step] = step; + suspend_stats.last_failed_step++; + suspend_stats.last_failed_step %= REC_FAILED_NUM; +} + /** * struct platform_suspend_ops - Callbacks for managing platform dependent * system sleep states. diff --git a/kernel/power/main.c b/kernel/power/main.c index 6c601f8..2757acb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "power.h" @@ -133,6 +135,101 @@ power_attr(pm_test); #endif /* CONFIG_PM_SLEEP */ +#ifdef CONFIG_DEBUG_FS +static char *suspend_step_name(enum suspend_stat_step step) +{ + switch (step) { + case SUSPEND_FREEZE: + return "freeze"; + case SUSPEND_PREPARE: + return "prepare"; + case SUSPEND_SUSPEND: + return "suspend"; + case SUSPEND_SUSPEND_NOIRQ: + return "suspend_noirq"; + case SUSPEND_RESUME_NOIRQ: + return "resume_noirq"; + case SUSPEND_RESUME: + return "resume"; + default: + return ""; + } +} + +static int suspend_stats_show(struct seq_file *s, void *unused) +{ + int i, index, last_dev, last_errno, last_step; + + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; + last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; + last_errno %= REC_FAILED_NUM; + last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; + last_step %= REC_FAILED_NUM; + seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" + "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", + "success", suspend_stats.success, + "fail", suspend_stats.fail, + "failed_freeze", suspend_stats.failed_freeze, + "failed_prepare", suspend_stats.failed_prepare, + "failed_suspend", suspend_stats.failed_suspend, + "failed_suspend_noirq", + suspend_stats.failed_suspend_noirq, + "failed_resume", suspend_stats.failed_resume, + "failed_resume_noirq", + suspend_stats.failed_resume_noirq); + seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", + suspend_stats.failed_devs[last_dev]); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_dev + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-s\n", + suspend_stats.failed_devs[index]); + } + seq_printf(s, " last_failed_errno:\t%-d\n", + suspend_stats.errno[last_errno]); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_errno + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-d\n", + suspend_stats.errno[index]); + } + seq_printf(s, " last_failed_step:\t%-s\n", + suspend_step_name( + suspend_stats.failed_steps[last_step])); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_step + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-s\n", + suspend_step_name( + suspend_stats.failed_steps[index])); + } + + return 0; +} + +static int suspend_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, suspend_stats_show, NULL); +} + +static const struct file_operations suspend_stats_operations = { + .open = suspend_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init pm_debugfs_init(void) +{ + debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO, + NULL, NULL, &suspend_stats_operations); + return 0; +} + +late_initcall(pm_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ + struct kobject *power_kobj; /** @@ -194,6 +291,11 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, } if (state < PM_SUSPEND_MAX && *s) error = enter_state(state); + if (error) { + suspend_stats.fail++; + dpm_save_failed_errno(error); + } else + suspend_stats.success++; #endif Exit: diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index b6b71ad..595a3dd 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -104,7 +104,10 @@ static int suspend_prepare(void) goto Finish; error = suspend_freeze_processes(); - if (!error) + if (error) { + suspend_stats.failed_freeze++; + dpm_save_failed_step(SUSPEND_FREEZE); + } else return 0; suspend_thaw_processes(); @@ -315,8 +318,16 @@ int enter_state(suspend_state_t state) */ int pm_suspend(suspend_state_t state) { - if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) - return enter_state(state); + int ret; + if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) { + ret = enter_state(state); + if (ret) { + suspend_stats.fail++; + dpm_save_failed_errno(ret); + } else + suspend_stats.success++; + return ret; + } return -EINVAL; } EXPORT_SYMBOL(pm_suspend); -- cgit v0.10.2 From ca123102f69fb260221502ade9bbc069290fae84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Aug 2011 22:38:12 +0200 Subject: PM: Fix build issue in main.c for CONFIG_PM_SLEEP unset Suspend statistics should depend on CONFIG_PM_SLEEP, so make that happen. Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/main.c b/kernel/power/main.c index 2757acb..a52e884 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -133,8 +133,6 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_test); #endif /* CONFIG_PM_DEBUG */ -#endif /* CONFIG_PM_SLEEP */ - #ifdef CONFIG_DEBUG_FS static char *suspend_step_name(enum suspend_stat_step step) { @@ -230,6 +228,8 @@ static int __init pm_debugfs_init(void) late_initcall(pm_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_PM_SLEEP */ + struct kobject *power_kobj; /** -- cgit v0.10.2 From 85055dd805f0822f13f736bee2a521e222c38293 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 17 Aug 2011 20:42:24 +0200 Subject: PM / Hibernate: Include storage keys in hibernation image on s390 For s390 there is one additional byte associated with each page, the storage key. This byte contains the referenced and changed bits and needs to be included into the hibernation image. If the storage keys are not restored to their previous state all original pages would appear to be dirty. This can cause inconsistencies e.g. with read-only filesystems. Signed-off-by: Martin Schwidefsky Signed-off-by: Rafael J. Wysocki diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed5cb5a..6b99fc3 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -91,6 +91,7 @@ config S390 select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP + select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index cf9e5c6..b6f9afe 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include +#include #include /* @@ -14,6 +15,123 @@ */ extern const void __nosave_begin, __nosave_end; +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 51bcdb5..acb78cd 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -136,11 +136,14 @@ ENTRY(swsusp_arch_resume) 0: lg %r2,8(%r1) lg %r4,0(%r1) + iske %r0,%r4 lghi %r3,PAGE_SIZE lghi %r5,PAGE_SIZE 1: mvcle %r2,%r4,0 jo 1b + lg %r2,8(%r1) + sske %r0,%r2 lg %r1,16(%r1) ltgr %r1,%r1 jnz 0b diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 76f42e4..46f3548 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -386,4 +386,38 @@ static inline void unlock_system_sleep(void) } #endif +#ifdef CONFIG_ARCH_SAVE_PAGE_KEYS +/* + * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture + * to save/restore additional information to/from the array of page + * frame numbers in the hibernation image. For s390 this is used to + * save and restore the storage key for each page that is included + * in the hibernation image. + */ +unsigned long page_key_additional_pages(unsigned long pages); +int page_key_alloc(unsigned long pages); +void page_key_free(void); +void page_key_read(unsigned long *pfn); +void page_key_memorize(unsigned long *pfn); +void page_key_write(void *address); + +#else /* !CONFIG_ARCH_SAVE_PAGE_KEYS */ + +static inline unsigned long page_key_additional_pages(unsigned long pages) +{ + return 0; +} + +static inline int page_key_alloc(unsigned long pages) +{ + return 0; +} + +static inline void page_key_free(void) {} +static inline void page_key_read(unsigned long *pfn) {} +static inline void page_key_memorize(unsigned long *pfn) {} +static inline void page_key_write(void *address) {} + +#endif /* !CONFIG_ARCH_SAVE_PAGE_KEYS */ + #endif /* _LINUX_SUSPEND_H */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 3744c59..e01e689 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -65,6 +65,9 @@ config HIBERNATION For more information take a look at . +config ARCH_SAVE_PAGE_KEYS + bool + config PM_STD_PARTITION string "Default resume partition" depends on HIBERNATION diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 06efa54..cbe2c14 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1339,6 +1339,9 @@ int hibernate_preallocate_memory(void) count += highmem; count -= totalreserve_pages; + /* Add number of pages required for page keys (s390 only). */ + size += page_key_additional_pages(saveable); + /* Compute the maximum number of saveable pages to leave in memory. */ max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); @@ -1662,6 +1665,8 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm) buf[j] = memory_bm_next_pfn(bm); if (unlikely(buf[j] == BM_END_OF_MAP)) break; + /* Save page key for data page (s390 only). */ + page_key_read(buf + j); } } @@ -1821,6 +1826,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) if (unlikely(buf[j] == BM_END_OF_MAP)) break; + /* Extract and buffer page key for data page (s390 only). */ + page_key_memorize(buf + j); + if (memory_bm_pfn_present(bm, buf[j])) memory_bm_set_bit(bm, buf[j]); else @@ -2223,6 +2231,11 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + /* Allocate buffer for page keys. */ + error = page_key_alloc(nr_copy_pages); + if (error) + return error; + } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm); if (error) @@ -2243,6 +2256,8 @@ int snapshot_write_next(struct snapshot_handle *handle) } } else { copy_last_highmem_page(); + /* Restore page key for data page (s390 only). */ + page_key_write(handle->buffer); handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); @@ -2264,6 +2279,9 @@ int snapshot_write_next(struct snapshot_handle *handle) void snapshot_write_finalize(struct snapshot_handle *handle) { copy_last_highmem_page(); + /* Restore page key for data page (s390 only). */ + page_key_write(handle->buffer); + page_key_free(); /* Free only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); -- cgit v0.10.2 From 528f7ce6e439edeac38f6b3f8561f1be129b5e91 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Sep 2011 20:55:04 +0200 Subject: PM / Suspend: Off by one in pm_suspend() In enter_state() we use "state" as an offset for the pm_states[] array. The pm_states[] array only has PM_SUSPEND_MAX elements so this test is off by one. Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 595a3dd..fdd4263 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -319,7 +319,7 @@ int enter_state(suspend_state_t state) int pm_suspend(suspend_state_t state) { int ret; - if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) { + if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { ret = enter_state(state); if (ret) { suspend_stats.fail++; -- cgit v0.10.2 From 37cce26b32142f09a8967f6d238178af654b20de Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 21 Sep 2011 22:47:55 +0200 Subject: PM / VT: Cleanup #if defined uglyness and fix compile error Introduce the config option CONFIG_VT_CONSOLE_SLEEP in order to cleanup the #if defined ugliness for the vt suspend support functions. Note that CONFIG_VT_CONSOLE is already dependant on CONFIG_VT. The function pm_set_vt_switch is actually dependant on CONFIG_VT and not CONFIG_PM_SLEEP. This fixes a compile error when CONFIG_PM_SLEEP is not set: drivers/tty/vt/vt_ioctl.c:1794: error: redefinition of 'pm_set_vt_switch' include/linux/suspend.h:17: error: previous definition of 'pm_set_vt_switch' was here Also, remove the incorrect path from the comment in console.c. [rjw: Replaced #if defined() with #ifdef in suspend.h.] Signed-off-by: H Hartley Sweeten Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index bd7cc05..a7188a0 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -60,6 +60,10 @@ config VT_CONSOLE If unsure, say Y. +config VT_CONSOLE_SLEEP + def_bool y + depends on VT_CONSOLE && PM_SLEEP + config HW_CONSOLE bool depends on VT && !S390 && !UML diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 46f3548..57a6924 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -8,15 +8,18 @@ #include #include -#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) +#ifdef CONFIG_VT extern void pm_set_vt_switch(int); -extern int pm_prepare_console(void); -extern void pm_restore_console(void); #else static inline void pm_set_vt_switch(int do_switch) { } +#endif +#ifdef CONFIG_VT_CONSOLE_SLEEP +extern int pm_prepare_console(void); +extern void pm_restore_console(void); +#else static inline int pm_prepare_console(void) { return 0; diff --git a/kernel/power/Makefile b/kernel/power/Makefile index ad6bdd8..07e0e28 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -2,7 +2,7 @@ ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG obj-$(CONFIG_PM) += main.o qos.o -obj-$(CONFIG_PM_SLEEP) += console.o +obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o diff --git a/kernel/power/console.c b/kernel/power/console.c index 218e5af..b1dc456 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -1,5 +1,5 @@ /* - * drivers/power/process.c - Functions for saving/restoring console. + * Functions for saving/restoring console. * * Originally from swsusp. */ @@ -10,7 +10,6 @@ #include #include "power.h" -#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) static int orig_fgconsole, orig_kmsg; @@ -32,4 +31,3 @@ void pm_restore_console(void) vt_kmsg_redirect(orig_kmsg); } } -#endif -- cgit v0.10.2 From 8f88893c05f2f677f18f2ce5591b4bed5d4a7535 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 26 Sep 2011 17:38:50 +0200 Subject: PM: Update the policy on default wakeup settings This patch (as1485) documents a change to the kernel's default wakeup policy. Devices that forward wakeup requests between buses should be enabled for wakeup by default. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 3384d59..29b7a98 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -152,7 +152,9 @@ try to use its wakeup mechanism. device_set_wakeup_enable() affects this flag; for the most part drivers should not change its value. The initial value of should_wakeup is supposed to be false for the majority of devices; the major exceptions are power buttons, keyboards, and Ethernet adapters whose WoL -(wake-on-LAN) feature has been set up with ethtool. +(wake-on-LAN) feature has been set up with ethtool. It should also default +to true for devices that don't generate wakeup requests on their own but merely +forward wakeup requests from one bus to another (like PCI bridges). Whether or not a device is capable of issuing wakeup events is a hardware matter, and the kernel is responsible for keeping track of it. By contrast, diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 84f7c7d..14ee07e 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -276,7 +276,9 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_capable); * * By default, most devices should leave wakeup disabled. The exceptions are * devices that everyone expects to be wakeup sources: keyboards, power buttons, - * possibly network interfaces, etc. + * possibly network interfaces, etc. Also, devices that don't generate their + * own wakeup requests but merely forward requests from one bus to another + * (like PCI bridges) should have wakeup enabled by default. */ int device_init_wakeup(struct device *dev, bool enable) { -- cgit v0.10.2 From 2aede851ddf08666f68ffc17be446420e9d2a056 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Sep 2011 20:32:27 +0200 Subject: PM / Hibernate: Freeze kernel threads after preallocating memory There is a problem with the current ordering of hibernate code which leads to deadlocks in some filesystems' memory shrinkers. Namely, some filesystems use freezable kernel threads that are inactive when the hibernate memory preallocation is carried out. Those same filesystems use memory shrinkers that may be triggered by the hibernate memory preallocation. If those memory shrinkers wait for the frozen kernel threads, the hibernate process deadlocks (this happens with XFS, for one example). Apparently, it is not technically viable to redesign the filesystems in question to avoid the situation described above, so the only possible solution of this issue is to defer the freezing of kernel threads until the hibernate memory preallocation is done, which is implemented by this change. Unfortunately, this requires the memory preallocation to be done before the "prepare" stage of device freeze, so after this change the only way drivers can allocate additional memory for their freeze routines in a clean way is to use PM notifiers. Reported-by: Christoph Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 29b7a98..646a89e 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -281,10 +281,6 @@ When the system goes into the standby or memory sleep state, the phases are: time.) Unlike the other suspend-related phases, during the prepare phase the device tree is traversed top-down. - In addition to that, if device drivers need to allocate additional - memory to be able to hadle device suspend correctly, that should be - done in the prepare phase. - After the prepare callback method returns, no new children may be registered below the device. The method may also prepare the device or driver in some way for the upcoming system power transition (for diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 1effc8b..aa56cf3 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,6 +49,7 @@ extern int thaw_process(struct task_struct *p); extern void refrigerator(void); extern int freeze_processes(void); +extern int freeze_kernel_threads(void); extern void thaw_processes(void); static inline int try_to_freeze(void) @@ -171,7 +172,8 @@ static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } static inline void refrigerator(void) {} -static inline int freeze_processes(void) { BUG(); return 0; } +static inline int freeze_processes(void) { return -ENOSYS; } +static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline int try_to_freeze(void) { return 0; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8f7b1db..3a20466 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -334,13 +334,17 @@ int hibernation_snapshot(int platform_mode) if (error) goto Close; - error = dpm_prepare(PMSG_FREEZE); - if (error) - goto Complete_devices; - /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); if (error) + goto Close; + + error = freeze_kernel_threads(); + if (error) + goto Close; + + error = dpm_prepare(PMSG_FREEZE); + if (error) goto Complete_devices; suspend_console(); diff --git a/kernel/power/power.h b/kernel/power/power.h index 9a00a0a..e620639 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -228,7 +228,8 @@ extern int pm_test_level; #ifdef CONFIG_SUSPEND_FREEZER static inline int suspend_freeze_processes(void) { - return freeze_processes(); + int error = freeze_processes(); + return error ? : freeze_kernel_threads(); } static inline void suspend_thaw_processes(void) diff --git a/kernel/power/process.c b/kernel/power/process.c index 0cf3a27..addbbe5 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -135,7 +135,7 @@ static int try_to_freeze_tasks(bool sig_only) } /** - * freeze_processes - tell processes to enter the refrigerator + * freeze_processes - Signal user space processes to enter the refrigerator. */ int freeze_processes(void) { @@ -143,20 +143,30 @@ int freeze_processes(void) printk("Freezing user space processes ... "); error = try_to_freeze_tasks(true); - if (error) - goto Exit; - printk("done.\n"); + if (!error) { + printk("done."); + oom_killer_disable(); + } + printk("\n"); + BUG_ON(in_atomic()); + + return error; +} + +/** + * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + */ +int freeze_kernel_threads(void) +{ + int error; printk("Freezing remaining freezable tasks ... "); error = try_to_freeze_tasks(false); - if (error) - goto Exit; - printk("done."); + if (!error) + printk("done."); - oom_killer_disable(); - Exit: - BUG_ON(in_atomic()); printk("\n"); + BUG_ON(in_atomic()); return error; } -- cgit v0.10.2 From 21e82808fc465b66fedaac0f4e885cafb304e843 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 27 Sep 2011 22:05:44 +0200 Subject: PM / Hibernate: Fix typo in a kerneldoc comment Fix a typo in a function name in the kerneldoc comment next to resume_target_kernel(). [rjw: Changed the subject slightly, added the changelog.] Signed-off-by: Barry Song Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 3a20466..7f44e5c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -467,7 +467,7 @@ static int resume_target_kernel(bool platform_mode) * @platform_mode: If set, use platform driver to prepare for the transition. * * This routine must be called with pm_mutex held. If it is successful, control - * reappears in the restored target kernel in hibernation_snaphot(). + * reappears in the restored target kernel in hibernation_snapshot(). */ int hibernation_restore(int platform_mode) { -- cgit v0.10.2 From 6f8d7022a842809aeb24db1d15669198ef02c131 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 6 Oct 2011 20:34:46 +0200 Subject: PM / Hibernate: Add resumewait param to support MMC-like devices as resume file Some devices like MMC are async detected very slow. For example, drivers/mmc/host/sdhci.c launches a 200ms delayed work to detect MMC partitions then add disk. We have wait_for_device_probe() and scsi_complete_async_scans() before calling swsusp_check(), but it is not enough to wait for MMC. This patch adds resumewait kernel param just like rootwait so that we have enough time to wait until MMC is ready. The difference is that we wait for resume partition whereas rootwait waits for rootfs partition (which may be on a different device). This patch will make hibernation support many embedded products without SCSI devices, but with devices like MMC. [rjw: Modified the changelog slightly.] Signed-off-by: Barry Song Reviewed-by: Valdis Kletnieks Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 854ed5ca..88a7b19 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2240,6 +2240,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. in units (needed only for swap files). See Documentation/power/swsusp-and-swap-files.txt + resumewait [HIBERNATION] Wait (indefinitely) for resume device to show up. + Useful for devices that are detected asynchronously + (e.g. USB and MMC devices). + hibernate= [HIBERNATION] noresume Don't check if there's a hibernation image present during boot. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 7f44e5c..0f87850 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ static int nocompress = 0; static int noresume = 0; +static int resume_wait = 0; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; @@ -736,6 +738,13 @@ static int software_resume(void) * to wait for this to finish. */ wait_for_device_probe(); + + if (resume_wait) { + while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) + msleep(10); + async_synchronize_full(); + } + /* * We can't depend on SCSI devices being available after loading * one of their modules until scsi_complete_async_scans() is @@ -1064,7 +1073,14 @@ static int __init noresume_setup(char *str) return 1; } +static int __init resumewait_setup(char *str) +{ + resume_wait = 1; + return 1; +} + __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); __setup("hibernate=", hibernate_setup); +__setup("resumewait", resumewait_setup); -- cgit v0.10.2 From 89e8ea1278fb3b237159a1ca193002ef5c8652d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Oct 2011 20:35:03 +0200 Subject: PM / ACPI: Blacklist Sony Vaio known to require acpi_sleep=nonvs Apparently, Sony Vaio VGN-SR26GN_P does not resume correctly without acpi_sleep=nonvs, so add it to the ACPI sleep blacklist. References: https://bugzilla.kernel.org/show_bug.cgi?id=16396#c91 Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 3ed80b2..8c3e514 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -444,6 +444,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"), }, }, + { + .callback = init_nvs_nosave, + .ident = "Sony Vaio VGN-SR26GN_P", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR26GN_P"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ -- cgit v0.10.2 From 731b25a4ad3c27b44f3447382da18b59167eb7a1 Mon Sep 17 00:00:00 2001 From: Bogdan Radulescu Date: Thu, 6 Oct 2011 20:35:12 +0200 Subject: PM / ACPI: Blacklist Vaio VGN-FW520F machine known to require acpi_sleep=nonvs Sony Vaio VGN-FW520F does not resume correctly without acpi_sleep=nonvs, so add it to the ACPI sleep blacklist. References: https://bugzilla.kernel.org/show_bug.cgi?id=16396#c86 Signed-off-by: Bogdan Radulescu Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 8c3e514..28a3e96 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -452,6 +452,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR26GN_P"), }, }, + { + .callback = init_nvs_nosave, + .ident = "Sony Vaio VGN-FW520F", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FW520F"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ -- cgit v0.10.2 From bf1c138e350155edb06709c7fbf0946f252b257c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 8 Oct 2011 20:57:50 +0200 Subject: MAINTAINERS: Update linux-pm list address Change the linux-pm list address in MAINTAINERS, as it has been moved to vger.kernel.org now. Signed-off-by: WANG Cong Signed-off-by: Rafael J. Wysocki diff --git a/MAINTAINERS b/MAINTAINERS index ace8f9c..1774b68 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2733,7 +2733,7 @@ F: fs/freevxfs/ FREEZER M: Pavel Machek M: "Rafael J. Wysocki" -L: linux-pm@lists.linux-foundation.org +L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.txt F: include/linux/freezer.h @@ -2995,7 +2995,7 @@ F: drivers/video/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: Pavel Machek M: "Rafael J. Wysocki" -L: linux-pm@lists.linux-foundation.org +L: linux-pm@vger.kernel.org S: Supported F: arch/x86/power/ F: drivers/base/power/ @@ -3189,7 +3189,7 @@ F: drivers/ide/ide-cd* IDLE-I7300 M: Andy Henroid -L: linux-pm@lists.linux-foundation.org +L: linux-pm@vger.kernel.org S: Supported F: drivers/idle/i7300_idle.c @@ -3272,7 +3272,7 @@ F: firmware/isci/ INTEL IDLE DRIVER M: Len Brown -L: linux-pm@lists.linux-foundation.org +L: linux-pm@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-idle-2.6.git S: Supported F: drivers/idle/intel_idle.c @@ -3376,7 +3376,7 @@ F: drivers/net/ixgbevf/ INTEL MRST PMU DRIVER M: Len Brown -L: linux-pm@lists.linux-foundation.org +L: linux-pm@vger.kernel.org S: Supported F: arch/x86/platform/mrst/pmu.* @@ -6306,7 +6306,7 @@ SUSPEND TO RAM M: Len Brown M: Pavel Machek M: "Rafael J. Wysocki" -L: linux-pm@lists.linux-foundation.org +L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/ F: arch/x86/kernel/acpi/ -- cgit v0.10.2 From f126f7334f72e2fd1b7a62bba20c488b86e6e7c4 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 10 Oct 2011 23:38:41 +0200 Subject: PM / Hibernate: Add resumedelay kernel param in addition to resumewait Patch "PM / Hibernate: Add resumewait param to support MMC-like devices as resume file" added the resumewait kernel command line option. The present patch adds resumedelay so that resumewait/delay were analogous to rootwait/delay. [rjw: Modified the subject and changelog slightly.] Signed-off-by: Barry Song Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 88a7b19..831bde2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2240,6 +2240,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. in units (needed only for swap files). See Documentation/power/swsusp-and-swap-files.txt + resumedelay= [HIBERNATION] Delay (in seconds) to pause before attempting to + read the resume files + resumewait [HIBERNATION] Wait (indefinitely) for resume device to show up. Useful for devices that are detected asynchronously (e.g. USB and MMC devices). diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 0f87850..50f5379 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -33,6 +33,7 @@ static int nocompress = 0; static int noresume = 0; static int resume_wait = 0; +static int resume_delay = 0; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; @@ -730,6 +731,12 @@ static int software_resume(void) pr_debug("PM: Checking hibernation image partition %s\n", resume_file); + if (resume_delay) { + printk(KERN_INFO "Waiting %dsec before reading resume device...\n", + resume_delay); + ssleep(resume_delay); + } + /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { @@ -1079,8 +1086,15 @@ static int __init resumewait_setup(char *str) return 1; } +static int __init resumedelay_setup(char *str) +{ + resume_delay = simple_strtoul(str, NULL, 0); + return 1; +} + __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); __setup("hibernate=", hibernate_setup); __setup("resumewait", resumewait_setup); +__setup("resumedelay=", resumedelay_setup); -- cgit v0.10.2 From 27920651fe0d16a25632dc238e81b54f3a504869 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Oct 2011 21:20:55 +0200 Subject: PM / Freezer: Make fake_signal_wake_up() wake TASK_KILLABLE tasks too TASK_KILLABLE is often used to put tasks to sleep for quite some time. One of the most common uses is to put tasks to sleep while waiting for replies from a server on a networked filesystem (such as CIFS or NFS). Unfortunately, fake_signal_wake_up does not currently wake up tasks that are sleeping in TASK_KILLABLE state. This means that even if the code were in place to allow them to freeze while in this sleep, it wouldn't work anyway. This patch changes this function to wake tasks in this state as well. This should be harmless -- if the code doing the sleeping doesn't have handling to deal with freezer events, it should just go back to sleep. If it does, then this will allow that code to do the right thing. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki diff --git a/kernel/freezer.c b/kernel/freezer.c index 7b01de9..66a594e 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -67,7 +67,7 @@ static void fake_signal_wake_up(struct task_struct *p) unsigned long flags; spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, 0); + signal_wake_up(p, 1); spin_unlock_irqrestore(&p->sighand->siglock, flags); } -- cgit v0.10.2 From d231ff1af70a2df43d809173cf8c94e9c3beb853 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 11 Oct 2011 23:29:18 -0700 Subject: PM / Hibernate: Do not initialize static and extern variables to 0 Static and extern variables in kernel/power/hibernate.c need not be initialized to 0 explicitly, so remove those initializations. [rjw: Modified subject, added changelog.] Signed-off-by: Barry Song Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 50f5379..ea12c8f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -30,14 +30,14 @@ #include "power.h" -static int nocompress = 0; -static int noresume = 0; -static int resume_wait = 0; -static int resume_delay = 0; +static int nocompress; +static int noresume; +static int resume_wait; +static int resume_delay; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; -int in_suspend __nosavedata = 0; +int in_suspend __nosavedata; enum { HIBERNATION_INVALID, -- cgit v0.10.2 From 081a9d043c983f161b78fdc4671324d1342b86bc Mon Sep 17 00:00:00 2001 From: Bojan Smojver Date: Thu, 13 Oct 2011 23:58:07 +0200 Subject: PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image Use threads for LZO compression/decompression on hibernate/thaw. Improve buffering on hibernate/thaw. Calculate/verify CRC32 of the image pages on hibernate/thaw. In my testing, this improved write/read speed by a factor of about two. Signed-off-by: Bojan Smojver Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index e01e689..cedd998 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -27,6 +27,7 @@ config HIBERNATION select HIBERNATE_CALLBACKS select LZO_COMPRESS select LZO_DECOMPRESS + select CRC32 ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index ea12c8f..1c53f7f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -657,6 +657,9 @@ int hibernate(void) flags |= SF_PLATFORM_MODE; if (nocompress) flags |= SF_NOCOMPRESS_MODE; + else + flags |= SF_CRC32_MODE; + pr_debug("PM: writing image.\n"); error = swsusp_write(flags); swsusp_free(); diff --git a/kernel/power/power.h b/kernel/power/power.h index e620639..23a2db1 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -146,6 +146,7 @@ extern int swsusp_swap_in_use(void); */ #define SF_PLATFORM_MODE 1 #define SF_NOCOMPRESS_MODE 2 +#define SF_CRC32_MODE 4 /* kernel/power/hibernate.c */ extern int swsusp_check(void); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 7c97c3a..11a594c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -27,6 +27,10 @@ #include #include #include +#include +#include +#include +#include #include "power.h" @@ -43,8 +47,7 @@ * allocated and populated one at a time, so we only need one memory * page to set up the entire structure. * - * During resume we also only need to use one swap_map_page structure - * at a time. + * During resume we pick up all swap_map_page structures into a list. */ #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) @@ -54,6 +57,11 @@ struct swap_map_page { sector_t next_swap; }; +struct swap_map_page_list { + struct swap_map_page *map; + struct swap_map_page_list *next; +}; + /** * The swap_map_handle structure is used for handling swap in * a file-alike way @@ -61,13 +69,18 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; + struct swap_map_page_list *maps; sector_t cur_swap; sector_t first_sector; unsigned int k; + unsigned long nr_free_pages, written; + u32 crc32; }; struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - + sizeof(u32)]; + u32 crc32; sector_t image; unsigned int flags; /* Flags to pass to the "boot" kernel */ char orig_sig[10]; @@ -199,6 +212,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); swsusp_header->image = handle->first_sector; swsusp_header->flags = flags; + if (flags & SF_CRC32_MODE) + swsusp_header->crc32 = handle->crc32; error = hib_bio_write_page(swsusp_resume_block, swsusp_header, NULL); } else { @@ -245,6 +260,7 @@ static int swsusp_swap_check(void) static int write_page(void *buf, sector_t offset, struct bio **bio_chain) { void *src; + int ret; if (!offset) return -ENOSPC; @@ -254,9 +270,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) if (src) { copy_page(src, buf); } else { - WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ - src = buf; + ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + if (ret) + return ret; + src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (src) { + copy_page(src, buf); + } else { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + src = buf; + } } } else { src = buf; @@ -293,6 +317,8 @@ static int get_swap_writer(struct swap_map_handle *handle) goto err_rel; } handle->k = 0; + handle->nr_free_pages = nr_free_pages() >> 1; + handle->written = 0; handle->first_sector = handle->cur_swap; return 0; err_rel: @@ -316,20 +342,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, return error; handle->cur->entries[handle->k++] = offset; if (handle->k >= MAP_PAGE_ENTRIES) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; offset = alloc_swapdev_block(root_swap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap, NULL); + error = write_page(handle->cur, handle->cur_swap, bio_chain); if (error) goto out; clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; } + if (bio_chain && ++handle->written > handle->nr_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + handle->written = 0; + } out: return error; } @@ -372,6 +401,13 @@ static int swap_writer_finish(struct swap_map_handle *handle, LZO_HEADER, PAGE_SIZE) #define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) +/* Maximum number of threads for compression/decompression. */ +#define LZO_THREADS 3 + +/* Maximum number of pages for read buffering. */ +#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) + + /** * save_image - save the suspend image data */ @@ -419,6 +455,92 @@ static int save_image(struct swap_map_handle *handle, return ret; } +/** + * Structure used for CRC32. + */ +struct crc_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + unsigned run_threads; /* nr current threads */ + wait_queue_head_t go; /* start crc update */ + wait_queue_head_t done; /* crc update done */ + u32 *crc32; /* points to handle's crc32 */ + size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */ + unsigned char *unc[LZO_THREADS]; /* uncompressed data */ +}; + +/** + * CRC32 update function that runs in its own thread. + */ +static int crc32_threadfn(void *data) +{ + struct crc_data *d = data; + unsigned i; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + for (i = 0; i < d->run_threads; i++) + *d->crc32 = crc32_le(*d->crc32, + d->unc[i], *d->unc_len[i]); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +/** + * Structure used for LZO data compression. + */ +struct cmp_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + int ret; /* return code */ + wait_queue_head_t go; /* start compression */ + wait_queue_head_t done; /* compression done */ + size_t unc_len; /* uncompressed length */ + size_t cmp_len; /* compressed length */ + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ + unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */ +}; + +/** + * Compression function that runs in its own thread. + */ +static int lzo_compress_threadfn(void *data) +{ + struct cmp_data *d = data; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + d->ret = lzo1x_1_compress(d->unc, d->unc_len, + d->cmp + LZO_HEADER, &d->cmp_len, + d->wrk); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} /** * save_image_lzo - Save the suspend image data compressed with LZO. @@ -437,42 +559,93 @@ static int save_image_lzo(struct swap_map_handle *handle, struct bio *bio; struct timeval start; struct timeval stop; - size_t off, unc_len, cmp_len; - unsigned char *unc, *cmp, *wrk, *page; + size_t off; + unsigned thr, run_threads, nr_threads; + unsigned char *page = NULL; + struct cmp_data *data = NULL; + struct crc_data *crc = NULL; + + /* + * We'll limit the number of threads for compression to limit memory + * footprint. + */ + nr_threads = num_online_cpus() - 1; + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out_clean; } - wrk = vmalloc(LZO1X_1_MEM_COMPRESS); - if (!wrk) { - printk(KERN_ERR "PM: Failed to allocate LZO workspace\n"); - free_page((unsigned long)page); - return -ENOMEM; + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct cmp_data, go)); - unc = vmalloc(LZO_UNC_SIZE); - if (!unc) { - printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); - vfree(wrk); - free_page((unsigned long)page); - return -ENOMEM; + crc = kmalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + printk(KERN_ERR "PM: Failed to allocate crc\n"); + ret = -ENOMEM; + goto out_clean; + } + memset(crc, 0, offsetof(struct crc_data, go)); + + /* + * Start the compression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + + data[thr].thr = kthread_run(lzo_compress_threadfn, + &data[thr], + "image_compress/%u", thr); + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start compression threads\n"); + ret = -ENOMEM; + goto out_clean; + } } - cmp = vmalloc(LZO_CMP_SIZE); - if (!cmp) { - printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); - vfree(unc); - vfree(wrk); - free_page((unsigned long)page); - return -ENOMEM; + /* + * Adjust number of free pages after all allocations have been done. + * We don't want to run out of pages when writing. + */ + handle->nr_free_pages = nr_free_pages() >> 1; + + /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); + init_waitqueue_head(&crc->done); + + handle->crc32 = 0; + crc->crc32 = &handle->crc32; + for (thr = 0; thr < nr_threads; thr++) { + crc->unc[thr] = data[thr].unc; + crc->unc_len[thr] = &data[thr].unc_len; + } + + crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); + if (IS_ERR(crc->thr)) { + crc->thr = NULL; + printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); + ret = -ENOMEM; + goto out_clean; } printk(KERN_INFO + "PM: Using %u thread(s) for compression.\n" "PM: Compressing and saving image data (%u pages) ... ", - nr_to_write); + nr_threads, nr_to_write); m = nr_to_write / 100; if (!m) m = 1; @@ -480,55 +653,83 @@ static int save_image_lzo(struct swap_map_handle *handle, bio = NULL; do_gettimeofday(&start); for (;;) { - for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { - ret = snapshot_read_next(snapshot); - if (ret < 0) - goto out_finish; - - if (!ret) + for (thr = 0; thr < nr_threads; thr++) { + for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { + ret = snapshot_read_next(snapshot); + if (ret < 0) + goto out_finish; + + if (!ret) + break; + + memcpy(data[thr].unc + off, + data_of(*snapshot), PAGE_SIZE); + + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", + nr_pages / m); + nr_pages++; + } + if (!off) break; - memcpy(unc + off, data_of(*snapshot), PAGE_SIZE); + data[thr].unc_len = off; - if (!(nr_pages % m)) - printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; + atomic_set(&data[thr].ready, 1); + wake_up(&data[thr].go); } - if (!off) + if (!thr) break; - unc_len = off; - ret = lzo1x_1_compress(unc, unc_len, - cmp + LZO_HEADER, &cmp_len, wrk); - if (ret < 0) { - printk(KERN_ERR "PM: LZO compression failed\n"); - break; - } + crc->run_threads = thr; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); - if (unlikely(!cmp_len || - cmp_len > lzo1x_worst_compress(unc_len))) { - printk(KERN_ERR "PM: Invalid LZO compressed length\n"); - ret = -1; - break; - } + for (run_threads = thr, thr = 0; thr < run_threads; thr++) { + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); - *(size_t *)cmp = cmp_len; + ret = data[thr].ret; - /* - * Given we are writing one page at a time to disk, we copy - * that much from the buffer, although the last bit will likely - * be smaller than full page. This is OK - we saved the length - * of the compressed data, so any garbage at the end will be - * discarded when we read it. - */ - for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { - memcpy(page, cmp + off, PAGE_SIZE); + if (ret < 0) { + printk(KERN_ERR "PM: LZO compression failed\n"); + goto out_finish; + } - ret = swap_write_page(handle, page, &bio); - if (ret) + if (unlikely(!data[thr].cmp_len || + data[thr].cmp_len > + lzo1x_worst_compress(data[thr].unc_len))) { + printk(KERN_ERR + "PM: Invalid LZO compressed length\n"); + ret = -1; goto out_finish; + } + + *(size_t *)data[thr].cmp = data[thr].cmp_len; + + /* + * Given we are writing one page at a time to disk, we + * copy that much from the buffer, although the last + * bit will likely be smaller than full page. This is + * OK - we saved the length of the compressed data, so + * any garbage at the end will be discarded when we + * read it. + */ + for (off = 0; + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(page, data[thr].cmp + off, PAGE_SIZE); + + ret = swap_write_page(handle, page, &bio); + if (ret) + goto out_finish; + } } + + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); } out_finish: @@ -536,16 +737,25 @@ out_finish: do_gettimeofday(&stop); if (!ret) ret = err2; - if (!ret) + if (!ret) { printk(KERN_CONT "\b\b\b\bdone\n"); - else + } else { printk(KERN_CONT "\n"); + } swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); - - vfree(cmp); - vfree(unc); - vfree(wrk); - free_page((unsigned long)page); +out_clean: + if (crc) { + if (crc->thr) + kthread_stop(crc->thr); + kfree(crc); + } + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) free_page((unsigned long)page); return ret; } @@ -625,8 +835,15 @@ out_finish: static void release_swap_reader(struct swap_map_handle *handle) { - if (handle->cur) - free_page((unsigned long)handle->cur); + struct swap_map_page_list *tmp; + + while (handle->maps) { + if (handle->maps->map) + free_page((unsigned long)handle->maps->map); + tmp = handle->maps; + handle->maps = handle->maps->next; + kfree(tmp); + } handle->cur = NULL; } @@ -634,22 +851,46 @@ static int get_swap_reader(struct swap_map_handle *handle, unsigned int *flags_p) { int error; + struct swap_map_page_list *tmp, *last; + sector_t offset; *flags_p = swsusp_header->flags; if (!swsusp_header->image) /* how can this happen? */ return -EINVAL; - handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); - if (!handle->cur) - return -ENOMEM; + handle->cur = NULL; + last = handle->maps = NULL; + offset = swsusp_header->image; + while (offset) { + tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL); + if (!tmp) { + release_swap_reader(handle); + return -ENOMEM; + } + memset(tmp, 0, sizeof(*tmp)); + if (!handle->maps) + handle->maps = tmp; + if (last) + last->next = tmp; + last = tmp; + + tmp->map = (struct swap_map_page *) + __get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!tmp->map) { + release_swap_reader(handle); + return -ENOMEM; + } - error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL); - if (error) { - release_swap_reader(handle); - return error; + error = hib_bio_read_page(offset, tmp->map, NULL); + if (error) { + release_swap_reader(handle); + return error; + } + offset = tmp->map->next_swap; } handle->k = 0; + handle->cur = handle->maps->map; return 0; } @@ -658,6 +899,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, { sector_t offset; int error; + struct swap_map_page_list *tmp; if (!handle->cur) return -EINVAL; @@ -668,13 +910,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { - error = hib_wait_on_bio_chain(bio_chain); handle->k = 0; - offset = handle->cur->next_swap; - if (!offset) + free_page((unsigned long)handle->maps->map); + tmp = handle->maps; + handle->maps = handle->maps->next; + kfree(tmp); + if (!handle->maps) release_swap_reader(handle); - else if (!error) - error = hib_bio_read_page(offset, handle->cur, NULL); + else + handle->cur = handle->maps->map; } return error; } @@ -697,7 +941,7 @@ static int load_image(struct swap_map_handle *handle, unsigned int nr_to_read) { unsigned int m; - int error = 0; + int ret = 0; struct timeval start; struct timeval stop; struct bio *bio; @@ -713,15 +957,15 @@ static int load_image(struct swap_map_handle *handle, bio = NULL; do_gettimeofday(&start); for ( ; ; ) { - error = snapshot_write_next(snapshot); - if (error <= 0) + ret = snapshot_write_next(snapshot); + if (ret <= 0) break; - error = swap_read_page(handle, data_of(*snapshot), &bio); - if (error) + ret = swap_read_page(handle, data_of(*snapshot), &bio); + if (ret) break; if (snapshot->sync_read) - error = hib_wait_on_bio_chain(&bio); - if (error) + ret = hib_wait_on_bio_chain(&bio); + if (ret) break; if (!(nr_pages % m)) printk("\b\b\b\b%3d%%", nr_pages / m); @@ -729,17 +973,61 @@ static int load_image(struct swap_map_handle *handle, } err2 = hib_wait_on_bio_chain(&bio); do_gettimeofday(&stop); - if (!error) - error = err2; - if (!error) { + if (!ret) + ret = err2; + if (!ret) { printk("\b\b\b\bdone\n"); snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) - error = -ENODATA; + ret = -ENODATA; } else printk("\n"); swsusp_show_speed(&start, &stop, nr_to_read, "Read"); - return error; + return ret; +} + +/** + * Structure used for LZO data decompression. + */ +struct dec_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + int ret; /* return code */ + wait_queue_head_t go; /* start decompression */ + wait_queue_head_t done; /* decompression done */ + size_t unc_len; /* uncompressed length */ + size_t cmp_len; /* compressed length */ + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ +}; + +/** + * Deompression function that runs in its own thread. + */ +static int lzo_decompress_threadfn(void *data) +{ + struct dec_data *d = data; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + d->unc_len = LZO_UNC_SIZE; + d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len, + d->unc, &d->unc_len); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; } /** @@ -753,50 +1041,120 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned int nr_to_read) { unsigned int m; - int error = 0; + int ret = 0; + int eof = 0; struct bio *bio; struct timeval start; struct timeval stop; unsigned nr_pages; - size_t i, off, unc_len, cmp_len; - unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; - - for (i = 0; i < LZO_CMP_PAGES; i++) { - page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); - if (!page[i]) { - printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + size_t off; + unsigned i, thr, run_threads, nr_threads; + unsigned ring = 0, pg = 0, ring_size = 0, + have = 0, want, need, asked = 0; + unsigned long read_pages; + unsigned char **page = NULL; + struct dec_data *data = NULL; + struct crc_data *crc = NULL; + + /* + * We'll limit the number of threads for decompression to limit memory + * footprint. + */ + nr_threads = num_online_cpus() - 1; + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + + page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + ret = -ENOMEM; + goto out_clean; + } - while (i) - free_page((unsigned long)page[--i]); + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; + } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct dec_data, go)); - return -ENOMEM; + crc = kmalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + printk(KERN_ERR "PM: Failed to allocate crc\n"); + ret = -ENOMEM; + goto out_clean; + } + memset(crc, 0, offsetof(struct crc_data, go)); + + /* + * Start the decompression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + + data[thr].thr = kthread_run(lzo_decompress_threadfn, + &data[thr], + "image_decompress/%u", thr); + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start decompression threads\n"); + ret = -ENOMEM; + goto out_clean; } } - unc = vmalloc(LZO_UNC_SIZE); - if (!unc) { - printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); - - for (i = 0; i < LZO_CMP_PAGES; i++) - free_page((unsigned long)page[i]); - - return -ENOMEM; + /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); + init_waitqueue_head(&crc->done); + + handle->crc32 = 0; + crc->crc32 = &handle->crc32; + for (thr = 0; thr < nr_threads; thr++) { + crc->unc[thr] = data[thr].unc; + crc->unc_len[thr] = &data[thr].unc_len; } - cmp = vmalloc(LZO_CMP_SIZE); - if (!cmp) { - printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); + crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); + if (IS_ERR(crc->thr)) { + crc->thr = NULL; + printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); + ret = -ENOMEM; + goto out_clean; + } - vfree(unc); - for (i = 0; i < LZO_CMP_PAGES; i++) - free_page((unsigned long)page[i]); + /* + * Adjust number of pages for read buffering, in case we are short. + */ + read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; + read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); - return -ENOMEM; + for (i = 0; i < read_pages; i++) { + page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? + __GFP_WAIT | __GFP_HIGH : + __GFP_WAIT); + if (!page[i]) { + if (i < LZO_CMP_PAGES) { + ring_size = i; + printk(KERN_ERR + "PM: Failed to allocate LZO pages\n"); + ret = -ENOMEM; + goto out_clean; + } else { + break; + } + } } + want = ring_size = i; printk(KERN_INFO + "PM: Using %u thread(s) for decompression.\n" "PM: Loading and decompressing image data (%u pages) ... ", - nr_to_read); + nr_threads, nr_to_read); m = nr_to_read / 100; if (!m) m = 1; @@ -804,85 +1162,189 @@ static int load_image_lzo(struct swap_map_handle *handle, bio = NULL; do_gettimeofday(&start); - error = snapshot_write_next(snapshot); - if (error <= 0) + ret = snapshot_write_next(snapshot); + if (ret <= 0) goto out_finish; - for (;;) { - error = swap_read_page(handle, page[0], NULL); /* sync */ - if (error) - break; - - cmp_len = *(size_t *)page[0]; - if (unlikely(!cmp_len || - cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { - printk(KERN_ERR "PM: Invalid LZO compressed length\n"); - error = -1; - break; + for(;;) { + for (i = 0; !eof && i < want; i++) { + ret = swap_read_page(handle, page[ring], &bio); + if (ret) { + /* + * On real read error, finish. On end of data, + * set EOF flag and just exit the read loop. + */ + if (handle->cur && + handle->cur->entries[handle->k]) { + goto out_finish; + } else { + eof = 1; + break; + } + } + if (++ring >= ring_size) + ring = 0; } + asked += i; + want -= i; - for (off = PAGE_SIZE, i = 1; - off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { - error = swap_read_page(handle, page[i], &bio); - if (error) + /* + * We are out of data, wait for some more. + */ + if (!have) { + if (!asked) + break; + + ret = hib_wait_on_bio_chain(&bio); + if (ret) goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; } - error = hib_wait_on_bio_chain(&bio); /* need all data now */ - if (error) - goto out_finish; - - for (off = 0, i = 0; - off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { - memcpy(cmp + off, page[i], PAGE_SIZE); + if (crc->run_threads) { + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + crc->run_threads = 0; } - unc_len = LZO_UNC_SIZE; - error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len, - unc, &unc_len); - if (error < 0) { - printk(KERN_ERR "PM: LZO decompression failed\n"); - break; + for (thr = 0; have && thr < nr_threads; thr++) { + data[thr].cmp_len = *(size_t *)page[pg]; + if (unlikely(!data[thr].cmp_len || + data[thr].cmp_len > + lzo1x_worst_compress(LZO_UNC_SIZE))) { + printk(KERN_ERR + "PM: Invalid LZO compressed length\n"); + ret = -1; + goto out_finish; + } + + need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER, + PAGE_SIZE); + if (need > have) { + if (eof > 1) { + ret = -1; + goto out_finish; + } + break; + } + + for (off = 0; + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(data[thr].cmp + off, + page[pg], PAGE_SIZE); + have--; + want++; + if (++pg >= ring_size) + pg = 0; + } + + atomic_set(&data[thr].ready, 1); + wake_up(&data[thr].go); } - if (unlikely(!unc_len || - unc_len > LZO_UNC_SIZE || - unc_len & (PAGE_SIZE - 1))) { - printk(KERN_ERR "PM: Invalid LZO uncompressed length\n"); - error = -1; - break; + /* + * Wait for more data while we are decompressing. + */ + if (have < LZO_CMP_PAGES && asked) { + ret = hib_wait_on_bio_chain(&bio); + if (ret) + goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; } - for (off = 0; off < unc_len; off += PAGE_SIZE) { - memcpy(data_of(*snapshot), unc + off, PAGE_SIZE); + for (run_threads = thr, thr = 0; thr < run_threads; thr++) { + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); + + ret = data[thr].ret; - if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; + if (ret < 0) { + printk(KERN_ERR + "PM: LZO decompression failed\n"); + goto out_finish; + } - error = snapshot_write_next(snapshot); - if (error <= 0) + if (unlikely(!data[thr].unc_len || + data[thr].unc_len > LZO_UNC_SIZE || + data[thr].unc_len & (PAGE_SIZE - 1))) { + printk(KERN_ERR + "PM: Invalid LZO uncompressed length\n"); + ret = -1; goto out_finish; + } + + for (off = 0; + off < data[thr].unc_len; off += PAGE_SIZE) { + memcpy(data_of(*snapshot), + data[thr].unc + off, PAGE_SIZE); + + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + + ret = snapshot_write_next(snapshot); + if (ret <= 0) { + crc->run_threads = thr + 1; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); + goto out_finish; + } + } } + + crc->run_threads = thr; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); } out_finish: + if (crc->run_threads) { + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + } do_gettimeofday(&stop); - if (!error) { + if (!ret) { printk("\b\b\b\bdone\n"); snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) - error = -ENODATA; + ret = -ENODATA; + if (!ret) { + if (swsusp_header->flags & SF_CRC32_MODE) { + if(handle->crc32 != swsusp_header->crc32) { + printk(KERN_ERR + "PM: Invalid image CRC32!\n"); + ret = -ENODATA; + } + } + } } else printk("\n"); swsusp_show_speed(&start, &stop, nr_to_read, "Read"); - - vfree(cmp); - vfree(unc); - for (i = 0; i < LZO_CMP_PAGES; i++) +out_clean: + for (i = 0; i < ring_size; i++) free_page((unsigned long)page[i]); + if (crc) { + if (crc->thr) + kthread_stop(crc->thr); + kfree(crc); + } + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) vfree(page); - return error; + return ret; } /** -- cgit v0.10.2 From 4ca46ff3e0d8c234cb40ebb6457653b59584426c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 16 Oct 2011 23:34:36 +0200 Subject: PM / Sleep: Mark devices involved in wakeup signaling during suspend The generic PM domains code in drivers/base/power/domain.c has to avoid powering off domains that provide power to wakeup devices during system suspend. Currently, however, this only works for wakeup devices directly belonging to the given domain and not for their children (or the children of their children and so on). Thus, if there's a wakeup device whose parent belongs to a power domain handled by the generic PM domains code, the domain will be powered off during system suspend preventing the device from signaling wakeup. To address this problem introduce a device flag, power.wakeup_path, that will be set during system suspend for all wakeup devices, their parents, the parents of their parents and so on. This way, all wakeup paths in the device hierarchy will be marked and the generic PM domains code will only need to avoid powering off domains containing devices whose power.wakeup_path is set. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 22fe029..6790cf7 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -714,7 +714,7 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (ret) return ret; - if (device_may_wakeup(dev) + if (dev->power.wakeup_path && genpd->active_wakeup && genpd->active_wakeup(dev)) return 0; @@ -938,7 +938,7 @@ static int pm_genpd_dev_poweroff_noirq(struct device *dev) if (ret) return ret; - if (device_may_wakeup(dev) + if (dev->power.wakeup_path && genpd->active_wakeup && genpd->active_wakeup(dev)) return 0; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a854591..1e15732 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -902,7 +902,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } End: - dev->power.is_suspended = !error; + if (!error) { + dev->power.is_suspended = true; + if (dev->power.wakeup_path && dev->parent) + dev->parent->power.wakeup_path = true; + } device_unlock(dev); complete_all(&dev->power.completion); @@ -999,6 +1003,8 @@ static int device_prepare(struct device *dev, pm_message_t state) device_lock(dev); + dev->power.wakeup_path = device_may_wakeup(dev); + if (dev->pm_domain) { pm_dev_dbg(dev, state, "preparing power domain "); if (dev->pm_domain->ops.prepare) diff --git a/include/linux/pm.h b/include/linux/pm.h index f256824..74711a9 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -448,6 +448,7 @@ struct dev_pm_info { struct list_head entry; struct completion completion; struct wakeup_source *wakeup; + bool wakeup_path:1; #else unsigned int should_wakeup:1; #endif -- cgit v0.10.2 From d93f5cdea968284f05aa9905ee9752874885a6fa Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 19 Oct 2011 23:52:41 +0200 Subject: ARM: mach-shmobile: sh7372 A3SP support (v4) This change adds support for the sh7372 A3SP power domain. The sh7372 A3SP hardware power domain contains a wide range of I/O devices. The list of I/O devices include SCIF serial ports, DMA Engine hardware, SD and MMC controller hardware, USB controllers and I2C master controllers. This patch adds the A3SP low level code which powers the hardware power domain on and off. It also ties in platform devices to the pm domain support code. It is worth noting that the serial console is hooked up to SCIFA0 on most sh7372 boards, and the SCIFA0 port is included in the A3SP hardware power domain. For this reason we cannot output debug messages from the low level power control code in the case of A3SP. QoS support is needed in drivers before we can enable the A3SP power control on the fly. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index d6c8ae8..bf4f637 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -1409,6 +1409,10 @@ static void __init ap4evb_init(void) sh7372_add_device_to_domain(&sh7372_a4lc, &lcdc_device); sh7372_add_device_to_domain(&sh7372_a4mp, &fsi_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &sh_mmcif_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi0_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi1_device); + hdmi_init_pm_clock(); fsi_init_pm_clock(); sh7372_pm_init(); diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 19f5d49..fdb1ca3 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1588,6 +1588,14 @@ static void __init mackerel_init(void) sh7372_add_device_to_domain(&sh7372_a4lc, &lcdc_device); sh7372_add_device_to_domain(&sh7372_a4lc, &hdmi_lcdc_device); sh7372_add_device_to_domain(&sh7372_a4mp, &fsi_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &usbhs0_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &usbhs1_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &sh_mmcif_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi0_device); +#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) + sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi1_device); +#endif + sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi2_device); hdmi_init_pm_clock(); sh7372_pm_init(); diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index efc984c..8542f2d 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -479,7 +479,9 @@ struct platform_device; struct sh7372_pm_domain { struct generic_pm_domain genpd; + struct dev_power_governor *gov; unsigned int bit_shift; + bool no_debug; }; static inline struct sh7372_pm_domain *to_sh7372_pd(struct generic_pm_domain *d) @@ -493,6 +495,7 @@ extern struct sh7372_pm_domain sh7372_a4mp; extern struct sh7372_pm_domain sh7372_d4; extern struct sh7372_pm_domain sh7372_a3rv; extern struct sh7372_pm_domain sh7372_a3ri; +extern struct sh7372_pm_domain sh7372_a3sp; extern struct sh7372_pm_domain sh7372_a3sg; extern void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd); diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 8e0944f..fde619d 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -92,8 +92,9 @@ static int pd_power_down(struct generic_pm_domain *genpd) } } - pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n", - mask, __raw_readl(PSTR)); + if (!sh7372_pd->no_debug) + pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n", + mask, __raw_readl(PSTR)); return 0; } @@ -122,8 +123,9 @@ static int pd_power_up(struct generic_pm_domain *genpd) ret = -EIO; out: - pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n", - mask, __raw_readl(PSTR)); + if (!sh7372_pd->no_debug) + pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n", + mask, __raw_readl(PSTR)); return ret; } @@ -133,11 +135,20 @@ static bool pd_active_wakeup(struct device *dev) return true; } +static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain) +{ + return false; +} + +struct dev_power_governor sh7372_always_on_gov = { + .power_down_ok = sh7372_power_down_forbidden, +}; + void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) { struct generic_pm_domain *genpd = &sh7372_pd->genpd; - pm_genpd_init(genpd, NULL, false); + pm_genpd_init(genpd, sh7372_pd->gov, false); genpd->stop_device = pm_clk_suspend; genpd->start_device = pm_clk_resume; genpd->dev_irq_safe = true; @@ -183,6 +194,12 @@ struct sh7372_pm_domain sh7372_a3ri = { .bit_shift = 8, }; +struct sh7372_pm_domain sh7372_a3sp = { + .bit_shift = 11, + .gov = &sh7372_always_on_gov, + .no_debug = true, +}; + struct sh7372_pm_domain sh7372_a3sg = { .bit_shift = 13, }; @@ -422,6 +439,9 @@ void __init sh7372_pm_init(void) __raw_writel(0x0000a501, DBGREG9); __raw_writel(0x00000000, DBGREG1); + /* do not convert A3SM, A3SP, A3SG, A4R power down into A4S */ + __raw_writel(0, PDNSEL); + sh7372_suspend_init(); sh7372_cpuidle_init(); } diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index d317c22..5f1afcc 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -994,6 +994,7 @@ void __init sh7372_add_standard_devices(void) sh7372_init_pm_domain(&sh7372_a3rv); sh7372_init_pm_domain(&sh7372_a3ri); sh7372_init_pm_domain(&sh7372_a3sg); + sh7372_init_pm_domain(&sh7372_a3sp); sh7372_pm_add_subdomain(&sh7372_a4lc, &sh7372_a3rv); @@ -1006,6 +1007,19 @@ void __init sh7372_add_standard_devices(void) sh7372_add_device_to_domain(&sh7372_a3rv, &vpu_device); sh7372_add_device_to_domain(&sh7372_a4mp, &spu0_device); sh7372_add_device_to_domain(&sh7372_a4mp, &spu1_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif0_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif1_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif2_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif3_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif4_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif5_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &scif6_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &iic1_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &dma0_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &dma1_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &dma2_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &usb_dma0_device); + sh7372_add_device_to_domain(&sh7372_a3sp, &usb_dma1_device); } void __init sh7372_add_early_devices(void) -- cgit v0.10.2 From 382414b93ac1e8ee7693be710e60c83eacc97c6f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 19 Oct 2011 23:52:50 +0200 Subject: ARM: mach-shmobile: sh7372 A4R support (v4) This change adds support for the sh7372 A4R power domain. The sh7372 A4R hardware power domain contains the SH CPU Core and a set of I/O devices including multimedia accelerators and I2C controllers. One special case about A4R is the INTCS interrupt controller that needs to be saved and restored to keep working as expected. Also the LCDC hardware blocks are in a different hardware power domain but have their IRQs routed only through INTCS. So as long as LCDCs are active we cannot power down INTCS because that would risk losing interrupts. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index bf4f637..7e90d06 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -1412,6 +1412,7 @@ static void __init ap4evb_init(void) sh7372_add_device_to_domain(&sh7372_a3sp, &sh_mmcif_device); sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi0_device); sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi1_device); + sh7372_add_device_to_domain(&sh7372_a4r, &ceu_device); hdmi_init_pm_clock(); fsi_init_pm_clock(); diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index fdb1ca3..56d4fed 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1596,6 +1596,7 @@ static void __init mackerel_init(void) sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi1_device); #endif sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi2_device); + sh7372_add_device_to_domain(&sh7372_a4r, &ceu_device); hdmi_init_pm_clock(); sh7372_pm_init(); diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index 8542f2d..84532f9 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -480,8 +480,11 @@ struct platform_device; struct sh7372_pm_domain { struct generic_pm_domain genpd; struct dev_power_governor *gov; + void (*suspend)(void); + void (*resume)(void); unsigned int bit_shift; bool no_debug; + bool stay_on; }; static inline struct sh7372_pm_domain *to_sh7372_pd(struct generic_pm_domain *d) @@ -493,6 +496,7 @@ static inline struct sh7372_pm_domain *to_sh7372_pd(struct generic_pm_domain *d) extern struct sh7372_pm_domain sh7372_a4lc; extern struct sh7372_pm_domain sh7372_a4mp; extern struct sh7372_pm_domain sh7372_d4; +extern struct sh7372_pm_domain sh7372_a4r; extern struct sh7372_pm_domain sh7372_a3rv; extern struct sh7372_pm_domain sh7372_a3ri; extern struct sh7372_pm_domain sh7372_a3sp; @@ -509,4 +513,7 @@ extern void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, #define sh7372_pm_add_subdomain(pd, sd) do { } while(0) #endif /* CONFIG_PM */ +extern void sh7372_intcs_suspend(void); +extern void sh7372_intcs_resume(void); + #endif /* __ASM_SH7372_H__ */ diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c index 739315e3..29cdc05 100644 --- a/arch/arm/mach-shmobile/intc-sh7372.c +++ b/arch/arm/mach-shmobile/intc-sh7372.c @@ -606,9 +606,16 @@ static void intcs_demux(unsigned int irq, struct irq_desc *desc) generic_handle_irq(intcs_evt2irq(evtcodeas)); } +static void __iomem *intcs_ffd2; +static void __iomem *intcs_ffd5; + void __init sh7372_init_irq(void) { - void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE); + void __iomem *intevtsa; + + intcs_ffd2 = ioremap_nocache(0xffd20000, PAGE_SIZE); + intevtsa = intcs_ffd2 + 0x100; + intcs_ffd5 = ioremap_nocache(0xffd50000, PAGE_SIZE); register_intc_controller(&intca_desc); register_intc_controller(&intcs_desc); @@ -617,3 +624,46 @@ void __init sh7372_init_irq(void) irq_set_handler_data(evt2irq(0xf80), (void *)intevtsa); irq_set_chained_handler(evt2irq(0xf80), intcs_demux); } + +static unsigned short ffd2[0x200]; +static unsigned short ffd5[0x100]; + +void sh7372_intcs_suspend(void) +{ + int k; + + for (k = 0x00; k <= 0x30; k += 4) + ffd2[k] = __raw_readw(intcs_ffd2 + k); + + for (k = 0x80; k <= 0xb0; k += 4) + ffd2[k] = __raw_readb(intcs_ffd2 + k); + + for (k = 0x180; k <= 0x188; k += 4) + ffd2[k] = __raw_readb(intcs_ffd2 + k); + + for (k = 0x00; k <= 0x3c; k += 4) + ffd5[k] = __raw_readw(intcs_ffd5 + k); + + for (k = 0x80; k <= 0x9c; k += 4) + ffd5[k] = __raw_readb(intcs_ffd5 + k); +} + +void sh7372_intcs_resume(void) +{ + int k; + + for (k = 0x00; k <= 0x30; k += 4) + __raw_writew(ffd2[k], intcs_ffd2 + k); + + for (k = 0x80; k <= 0xb0; k += 4) + __raw_writeb(ffd2[k], intcs_ffd2 + k); + + for (k = 0x180; k <= 0x188; k += 4) + __raw_writeb(ffd2[k], intcs_ffd2 + k); + + for (k = 0x00; k <= 0x3c; k += 4) + __raw_writew(ffd5[k], intcs_ffd5 + k); + + for (k = 0x80; k <= 0x9c; k += 4) + __raw_writeb(ffd5[k], intcs_ffd5 + k); +} diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index fde619d..7961273 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -44,6 +44,7 @@ #define SPDCR 0xe6180008 #define SWUCR 0xe6180014 #define SBAR 0xe6180020 +#define WUPRMSK 0xe6180028 #define WUPSMSK 0xe618002c #define WUPSMSK2 0xe6180048 #define PSTR 0xe6180080 @@ -80,6 +81,12 @@ static int pd_power_down(struct generic_pm_domain *genpd) struct sh7372_pm_domain *sh7372_pd = to_sh7372_pd(genpd); unsigned int mask = 1 << sh7372_pd->bit_shift; + if (sh7372_pd->suspend) + sh7372_pd->suspend(); + + if (sh7372_pd->stay_on) + return 0; + if (__raw_readl(PSTR) & mask) { unsigned int retry_count; @@ -106,6 +113,9 @@ static int pd_power_up(struct generic_pm_domain *genpd) unsigned int retry_count; int ret = 0; + if (sh7372_pd->stay_on) + goto out; + if (__raw_readl(PSTR) & mask) goto out; @@ -122,14 +132,23 @@ static int pd_power_up(struct generic_pm_domain *genpd) if (__raw_readl(SWUCR) & mask) ret = -EIO; - out: if (!sh7372_pd->no_debug) pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n", mask, __raw_readl(PSTR)); + out: + if (ret == 0 && sh7372_pd->resume) + sh7372_pd->resume(); + return ret; } +static void sh7372_a4r_suspend(void) +{ + sh7372_intcs_suspend(); + __raw_writel(0x300fffff, WUPRMSK); /* avoid wakeup */ +} + static bool pd_active_wakeup(struct device *dev) { return true; @@ -186,6 +205,14 @@ struct sh7372_pm_domain sh7372_d4 = { .bit_shift = 3, }; +struct sh7372_pm_domain sh7372_a4r = { + .bit_shift = 5, + .gov = &sh7372_always_on_gov, + .suspend = sh7372_a4r_suspend, + .resume = sh7372_intcs_resume, + .stay_on = true, +}; + struct sh7372_pm_domain sh7372_a3rv = { .bit_shift = 6, }; diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index 5f1afcc..2380389 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -991,12 +991,14 @@ void __init sh7372_add_standard_devices(void) sh7372_init_pm_domain(&sh7372_a4lc); sh7372_init_pm_domain(&sh7372_a4mp); sh7372_init_pm_domain(&sh7372_d4); + sh7372_init_pm_domain(&sh7372_a4r); sh7372_init_pm_domain(&sh7372_a3rv); sh7372_init_pm_domain(&sh7372_a3ri); sh7372_init_pm_domain(&sh7372_a3sg); sh7372_init_pm_domain(&sh7372_a3sp); sh7372_pm_add_subdomain(&sh7372_a4lc, &sh7372_a3rv); + sh7372_pm_add_subdomain(&sh7372_a4r, &sh7372_a4lc); platform_add_devices(sh7372_early_devices, ARRAY_SIZE(sh7372_early_devices)); @@ -1020,6 +1022,12 @@ void __init sh7372_add_standard_devices(void) sh7372_add_device_to_domain(&sh7372_a3sp, &dma2_device); sh7372_add_device_to_domain(&sh7372_a3sp, &usb_dma0_device); sh7372_add_device_to_domain(&sh7372_a3sp, &usb_dma1_device); + sh7372_add_device_to_domain(&sh7372_a4r, &iic0_device); + sh7372_add_device_to_domain(&sh7372_a4r, &veu0_device); + sh7372_add_device_to_domain(&sh7372_a4r, &veu1_device); + sh7372_add_device_to_domain(&sh7372_a4r, &veu2_device); + sh7372_add_device_to_domain(&sh7372_a4r, &veu3_device); + sh7372_add_device_to_domain(&sh7372_a4r, &jpu_device); } void __init sh7372_add_early_devices(void) -- cgit v0.10.2 From d11c78e97e1d46a93eb468794da82a090143a72e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 19 Oct 2011 23:15:11 +0200 Subject: ACPI / PM: Add Sony VGN-FW21E to nonvs blacklist. As noted by a user in https://bugzilla.redhat.com/show_bug.cgi?id=641789 The Sony VGN-FW21E also needs the nonvs by default workaround added. Signed-off-by: Dave Jones Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 28a3e96..0e46fae 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -390,6 +390,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { }, { .callback = init_nvs_nosave, + .ident = "Sony Vaio VGN-FW21E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FW21E"), + }, + }, + { + .callback = init_nvs_nosave, .ident = "Sony Vaio VGN-SR11M", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), -- cgit v0.10.2 From 7fef9fc83fbd7293ea9fe665d14046422ebf4219 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 19 Oct 2011 23:59:05 +0200 Subject: PM / Documentation: Update docs about suspend and CPU hotplug Update the documentation about the interaction between the suspend (S3) call path and the CPU hotplug infrastructure. This patch focusses only on the activities of the freezer, cpu hotplug and the notifications involved. It outlines how regular CPU hotplug differs from the way it is invoked during suspend and also tries to explain the locking involved. In addition to that, it discusses the issue of microcode update during CPU hotplug operations. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX index 45e9d4a..a4d682f 100644 --- a/Documentation/power/00-INDEX +++ b/Documentation/power/00-INDEX @@ -26,6 +26,8 @@ s2ram.txt - How to get suspend to ram working (and debug it when it isn't) states.txt - System power management states +suspend-and-cpuhotplug.txt + - Explains the interaction between Suspend-to-RAM (S3) and CPU hotplug swsusp-and-swap-files.txt - Using swap files with software suspend (to disk) swsusp-dmcrypt.txt diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt new file mode 100644 index 0000000..f28f9a6 --- /dev/null +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -0,0 +1,275 @@ +Interaction of Suspend code (S3) with the CPU hotplug infrastructure + + (C) 2011 Srivatsa S. Bhat + + +I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM + infrastructure uses it internally? And where do they share common code? + +Well, a picture is worth a thousand words... So ASCII art follows :-) + +[This depicts the current design in the kernel, and focusses only on the +interactions involving the freezer and CPU hotplug and also tries to explain +the locking involved. It outlines the notifications involved as well. +But please note that here, only the call paths are illustrated, with the aim +of describing where they take different paths and where they share code. +What happens when regular CPU hotplug and Suspend-to-RAM race with each other +is not depicted here.] + +On a high level, the suspend-resume cycle goes like this: + +|Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw | +|tasks | | cpus | | | | cpus | |tasks| + + +More details follow: + + Suspend call path + ----------------- + + Write 'mem' to + /sys/power/state + syfs file + | + v + Acquire pm_mutex lock + | + v + Send PM_SUSPEND_PREPARE + notifications + | + v + Freeze tasks + | + | + v + disable_nonboot_cpus() + /* start */ + | + v + Acquire cpu_add_remove_lock + | + v + Iterate over CURRENTLY + online CPUs + | + | + | ---------- + v | L + ======> _cpu_down() | + | [This takes cpuhotplug.lock | + Common | before taking down the CPU | + code | and releases it when done] | O + | While it is at it, notifications | + | are sent when notable events occur, | + ======> by running all registered callbacks. | + | | O + | | + | | + v | + Note down these cpus in | P + frozen_cpus mask ---------- + | + v + Disable regular cpu hotplug + by setting cpu_hotplug_disabled=1 + | + v + Release cpu_add_remove_lock + | + v + /* disable_nonboot_cpus() complete */ + | + v + Do suspend + + + +Resuming back is likewise, with the counterparts being (in the order of +execution during resume): +* enable_nonboot_cpus() which involves: + | Acquire cpu_add_remove_lock + | Reset cpu_hotplug_disabled to 0, thereby enabling regular cpu hotplug + | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop] + | Release cpu_add_remove_lock + v + +* thaw tasks +* send PM_POST_SUSPEND notifications +* Release pm_mutex lock. + + +It is to be noted here that the pm_mutex lock is acquired at the very +beginning, when we are just starting out to suspend, and then released only +after the entire cycle is complete (i.e., suspend + resume). + + + + Regular CPU hotplug call path + ----------------------------- + + Write 0 (or 1) to + /sys/devices/system/cpu/cpu*/online + sysfs file + | + | + v + cpu_down() + | + v + Acquire cpu_add_remove_lock + | + v + If cpu_hotplug_disabled is 1 + return gracefully + | + | + v + ======> _cpu_down() + | [This takes cpuhotplug.lock + Common | before taking down the CPU + code | and releases it when done] + | While it is at it, notifications + | are sent when notable events occur, + ======> by running all registered callbacks. + | + | + v + Release cpu_add_remove_lock + [That's it!, for + regular CPU hotplug] + + + +So, as can be seen from the two diagrams (the parts marked as "Common code"), +regular CPU hotplug and the suspend code path converge at the _cpu_down() and +_cpu_up() functions. They differ in the arguments passed to these functions, +in that during regular CPU hotplug, 0 is passed for the 'tasks_frozen' +argument. But during suspend, since the tasks are already frozen by the time +the non-boot CPUs are offlined or onlined, the _cpu_*() functions are called +with the 'tasks_frozen' argument set to 1. +[See below for some known issues regarding this.] + + +Important files and functions/entry points: +------------------------------------------ + +kernel/power/process.c : freeze_processes(), thaw_processes() +kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish() +kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus() + + + +II. What are the issues involved in CPU hotplug? + ------------------------------------------- + +There are some interesting situations involving CPU hotplug and microcode +update on the CPUs, as discussed below: + +[Please bear in mind that the kernel requests the microcode images from +userspace, using the request_firmware() function defined in +drivers/base/firmware_class.c] + + +a. When all the CPUs are identical: + + This is the most common situation and it is quite straightforward: we want + to apply the same microcode revision to each of the CPUs. + To give an example of x86, the collect_cpu_info() function defined in + arch/x86/kernel/microcode_core.c helps in discovering the type of the CPU + and thereby in applying the correct microcode revision to it. + But note that the kernel does not maintain a common microcode image for the + all CPUs, in order to handle case 'b' described below. + + +b. When some of the CPUs are different than the rest: + + In this case since we probably need to apply different microcode revisions + to different CPUs, the kernel maintains a copy of the correct microcode + image for each CPU (after appropriate CPU type/model discovery using + functions such as collect_cpu_info()). + + +c. When a CPU is physically hot-unplugged and a new (and possibly different + type of) CPU is hot-plugged into the system: + + In the current design of the kernel, whenever a CPU is taken offline during + a regular CPU hotplug operation, upon receiving the CPU_DEAD notification + (which is sent by the CPU hotplug code), the microcode update driver's + callback for that event reacts by freeing the kernel's copy of the + microcode image for that CPU. + + Hence, when a new CPU is brought online, since the kernel finds that it + doesn't have the microcode image, it does the CPU type/model discovery + afresh and then requests the userspace for the appropriate microcode image + for that CPU, which is subsequently applied. + + For example, in x86, the mc_cpu_callback() function (which is the microcode + update driver's callback registered for CPU hotplug events) calls + microcode_update_cpu() which would call microcode_init_cpu() in this case, + instead of microcode_resume_cpu() when it finds that the kernel doesn't + have a valid microcode image. This ensures that the CPU type/model + discovery is performed and the right microcode is applied to the CPU after + getting it from userspace. + + +d. Handling microcode update during suspend/hibernate: + + Strictly speaking, during a CPU hotplug operation which does not involve + physically removing or inserting CPUs, the CPUs are not actually powered + off during a CPU offline. They are just put to the lowest C-states possible. + Hence, in such a case, it is not really necessary to re-apply microcode + when the CPUs are brought back online, since they wouldn't have lost the + image during the CPU offline operation. + + This is the usual scenario encountered during a resume after a suspend. + However, in the case of hibernation, since all the CPUs are completely + powered off, during restore it becomes necessary to apply the microcode + images to all the CPUs. + + [Note that we don't expect someone to physically pull out nodes and insert + nodes with a different type of CPUs in-between a suspend-resume or a + hibernate/restore cycle.] + + In the current design of the kernel however, during a CPU offline operation + as part of the suspend/hibernate cycle (the CPU_DEAD_FROZEN notification), + the existing copy of microcode image in the kernel is not freed up. + And during the CPU online operations (during resume/restore), since the + kernel finds that it already has copies of the microcode images for all the + CPUs, it just applies them to the CPUs, avoiding any re-discovery of CPU + type/model and the need for validating whether the microcode revisions are + right for the CPUs or not (due to the above assumption that physical CPU + hotplug will not be done in-between suspend/resume or hibernate/restore + cycles). + + +III. Are there any known problems when regular CPU hotplug and suspend race + with each other? + +Yes, they are listed below: + +1. When invoking regular CPU hotplug, the 'tasks_frozen' argument passed to + the _cpu_down() and _cpu_up() functions is *always* 0. + This might not reflect the true current state of the system, since the + tasks could have been frozen by an out-of-band event such as a suspend + operation in progress. Hence, it will lead to wrong notifications being + sent during the cpu online/offline events (eg, CPU_ONLINE notification + instead of CPU_ONLINE_FROZEN) which in turn will lead to execution of + inappropriate code by the callbacks registered for such CPU hotplug events. + +2. If a regular CPU hotplug stress test happens to race with the freezer due + to a suspend operation in progress at the same time, then we could hit the + situation described below: + + * A regular cpu online operation continues its journey from userspace + into the kernel, since the freezing has not yet begun. + * Then freezer gets to work and freezes userspace. + * If cpu online has not yet completed the microcode update stuff by now, + it will now start waiting on the frozen userspace in the + TASK_UNINTERRUPTIBLE state, in order to get the microcode image. + * Now the freezer continues and tries to freeze the remaining tasks. But + due to this wait mentioned above, the freezer won't be able to freeze + the cpu online hotplug task and hence freezing of tasks fails. + + As a result of this task freezing failure, the suspend operation gets + aborted. -- cgit v0.10.2 From 0ab1e79b825a5cd8aeb3b34d89c9a89dea900056 Mon Sep 17 00:00:00 2001 From: Jonghwan Choi Date: Sat, 22 Oct 2011 00:22:54 +0200 Subject: PM / Clocks: Remove redundant NULL checks before kfree() Since kfree() checks it its argument is not NULL, it is not necessary to duplicate this check in __pm_clk_remove(). [rjw: Added the changelog.] Signed-off-by: Jonghwan Choi Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index b876e60..5f0f85d 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -104,9 +104,7 @@ static void __pm_clk_remove(struct pm_clock_entry *ce) clk_put(ce->clk); } - if (ce->con_id) - kfree(ce->con_id); - + kfree(ce->con_id); kfree(ce); } -- cgit v0.10.2