From 90b5c1d7c45eeb622302680ff96ed30c1a2b6f0e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 24 Oct 2012 14:54:13 +0800 Subject: PCI/PM: Fix deadlock when unbinding device if parent in D3cold If a PCI device and its parents are put into D3cold, unbinding the device will trigger deadlock as follow: - driver_unbind - device_release_driver - device_lock(dev) <--- previous lock here - __device_release_driver - pm_runtime_get_sync ... - rpm_resume(dev) - rpm_resume(dev->parent) ... - pci_pm_runtime_resume ... - pci_set_power_state - __pci_start_power_transition - pci_wakeup_bus(dev->parent->subordinate) - pci_walk_bus - device_lock(dev) <--- deadlock here If we do not do device_lock in pci_walk_bus, we can avoid deadlock. Device_lock in pci_walk_bus is introduced in commit: d71374dafbba7ec3f67371d3b7e9f6310a588808, corresponding email thread is: https://lkml.org/lkml/2006/5/26/38. The patch author Zhang Yanmin said device_lock is added to pci_walk_bus because: Some error handling functions call pci_walk_bus. For example, PCIe aer. Here we lock the device, so the driver wouldn't detach from the device, as the cb might call driver's callback function. So I fixed the deadlock as follows: - remove device_lock from pci_walk_bus - add device_lock into callback if callback will call driver's callback I checked pci_walk_bus users one by one, and found only PCIe aer needs device lock. Signed-off-by: Huang Ying Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: stable@vger.kernel.org # v3.6+ CC: Zhang Yanmin diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 6241fd0..a543746 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -320,10 +320,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), } else next = dev->bus_list.next; - /* Run device routines with the device locked */ - device_lock(&dev->dev); retval = cb(dev, userdata); - device_unlock(&dev->dev); if (retval) break; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 06bad96..af4e31c 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -213,6 +213,7 @@ static int report_error_detected(struct pci_dev *dev, void *data) struct aer_broadcast_data *result_data; result_data = (struct aer_broadcast_data *) data; + device_lock(&dev->dev); dev->error_state = result_data->state; if (!dev->driver || @@ -231,12 +232,14 @@ static int report_error_detected(struct pci_dev *dev, void *data) dev->driver ? "no AER-aware driver" : "no driver"); } - return 0; + goto out; } err_handler = dev->driver->err_handler; vote = err_handler->error_detected(dev, result_data->state); result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); return 0; } @@ -247,14 +250,17 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data) struct aer_broadcast_data *result_data; result_data = (struct aer_broadcast_data *) data; + device_lock(&dev->dev); if (!dev->driver || !dev->driver->err_handler || !dev->driver->err_handler->mmio_enabled) - return 0; + goto out; err_handler = dev->driver->err_handler; vote = err_handler->mmio_enabled(dev); result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); return 0; } @@ -265,14 +271,17 @@ static int report_slot_reset(struct pci_dev *dev, void *data) struct aer_broadcast_data *result_data; result_data = (struct aer_broadcast_data *) data; + device_lock(&dev->dev); if (!dev->driver || !dev->driver->err_handler || !dev->driver->err_handler->slot_reset) - return 0; + goto out; err_handler = dev->driver->err_handler; vote = err_handler->slot_reset(dev); result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); return 0; } @@ -280,15 +289,18 @@ static int report_resume(struct pci_dev *dev, void *data) { const struct pci_error_handlers *err_handler; + device_lock(&dev->dev); dev->error_state = pci_channel_io_normal; if (!dev->driver || !dev->driver->err_handler || !dev->driver->err_handler->resume) - return 0; + goto out; err_handler = dev->driver->err_handler; err_handler->resume(dev); +out: + device_unlock(&dev->dev); return 0; } -- cgit v0.10.2 From 3ff2de9ba1a2e22e548979dbcd46e999b22c93d8 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 24 Oct 2012 14:54:14 +0800 Subject: PCI/PM: Resume device before shutdown Some actions during shutdown need device to be in D0 state, such as MSI shutdown etc, so resume device before shutdown. Without this patch, a device may not be enumerated after a kexec because the corresponding bridge is not in D0, so that configuration space of the device is not accessible. Signed-off-by: Huang Ying Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: stable@vger.kernel.org # v3.6+ diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 94c6e2a..6c94fc9 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -398,6 +398,8 @@ static void pci_device_shutdown(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; + pm_runtime_resume(dev); + if (drv && drv->shutdown) drv->shutdown(pci_dev); pci_msi_shutdown(pci_dev); @@ -408,16 +410,6 @@ static void pci_device_shutdown(struct device *dev) * continue to do DMA */ pci_disable_device(pci_dev); - - /* - * Devices may be enabled to wake up by runtime PM, but they need not - * be supposed to wake up the system from its "power off" state (e.g. - * ACPI S5). Therefore disable wakeup for all devices that aren't - * supposed to wake up the system at this point. The state argument - * will be ignored by pci_enable_wake(). - */ - if (!device_may_wakeup(dev)) - pci_enable_wake(pci_dev, PCI_UNKNOWN, false); } #ifdef CONFIG_PM -- cgit v0.10.2 From b3c32c4f9565f93407921c0d8a4458042eb8998e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 25 Oct 2012 09:36:03 +0800 Subject: PCI/PM: Fix proc config reg access for D3cold and bridge suspending In https://bugzilla.kernel.org/show_bug.cgi?id=48981 Peter reported that /proc/bus/pci/??/??.? does not work for 3.6. This is because the device configuration space registers are not accessible if the corresponding parent bridge is suspended or the device is put into D3cold state. This is the same as /sys/bus/pci/devices/0000:??:??.?/config access issue. So the function used to solve sysfs issue is used to solve this issue. This patch moves pci_config_pm_runtime_get()/_put() from pci/pci-sysfs.c to pci/pci.c and makes them extern so they can be used by both the sysfs and proc paths. [bhelgaas: changelog, references, reporters] Reference: https://bugzilla.kernel.org/show_bug.cgi?id=48981 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=49031 Reported-by: Forrest Loomis Reported-by: Peter Reported-by: Micael Dias Signed-off-by: Huang Ying Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: stable@vger.kernel.org # v3.6+ diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 02d107b..f39378d 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -458,40 +458,6 @@ boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) } struct device_attribute vga_attr = __ATTR_RO(boot_vga); -static void -pci_config_pm_runtime_get(struct pci_dev *pdev) -{ - struct device *dev = &pdev->dev; - struct device *parent = dev->parent; - - if (parent) - pm_runtime_get_sync(parent); - pm_runtime_get_noresume(dev); - /* - * pdev->current_state is set to PCI_D3cold during suspending, - * so wait until suspending completes - */ - pm_runtime_barrier(dev); - /* - * Only need to resume devices in D3cold, because config - * registers are still accessible for devices suspended but - * not in D3cold. - */ - if (pdev->current_state == PCI_D3cold) - pm_runtime_resume(dev); -} - -static void -pci_config_pm_runtime_put(struct pci_dev *pdev) -{ - struct device *dev = &pdev->dev; - struct device *parent = dev->parent; - - pm_runtime_put(dev); - if (parent) - pm_runtime_put_sync(parent); -} - static ssize_t pci_read_config(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5485883..aabf647 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1858,6 +1858,38 @@ bool pci_dev_run_wake(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_dev_run_wake); +void pci_config_pm_runtime_get(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct device *parent = dev->parent; + + if (parent) + pm_runtime_get_sync(parent); + pm_runtime_get_noresume(dev); + /* + * pdev->current_state is set to PCI_D3cold during suspending, + * so wait until suspending completes + */ + pm_runtime_barrier(dev); + /* + * Only need to resume devices in D3cold, because config + * registers are still accessible for devices suspended but + * not in D3cold. + */ + if (pdev->current_state == PCI_D3cold) + pm_runtime_resume(dev); +} + +void pci_config_pm_runtime_put(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct device *parent = dev->parent; + + pm_runtime_put(dev); + if (parent) + pm_runtime_put_sync(parent); +} + /** * pci_pm_init - Initialize PM functions of given PCI device * @dev: PCI device to handle. diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index bacbcba..fd92aab 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -72,6 +72,8 @@ extern void pci_disable_enabled_device(struct pci_dev *dev); extern int pci_finish_runtime_suspend(struct pci_dev *dev); extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); extern void pci_wakeup_bus(struct pci_bus *bus); +extern void pci_config_pm_runtime_get(struct pci_dev *dev); +extern void pci_config_pm_runtime_put(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index eb907a8f..9b8505c 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -76,6 +76,8 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp if (!access_ok(VERIFY_WRITE, buf, cnt)) return -EINVAL; + pci_config_pm_runtime_get(dev); + if ((pos & 1) && cnt) { unsigned char val; pci_user_read_config_byte(dev, pos, &val); @@ -121,6 +123,8 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp cnt--; } + pci_config_pm_runtime_put(dev); + *ppos = pos; return nbytes; } @@ -146,6 +150,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof if (!access_ok(VERIFY_READ, buf, cnt)) return -EINVAL; + pci_config_pm_runtime_get(dev); + if ((pos & 1) && cnt) { unsigned char val; __get_user(val, buf); @@ -191,6 +197,8 @@ proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, lof cnt--; } + pci_config_pm_runtime_put(dev); + *ppos = pos; i_size_write(ino, dp->size); return nbytes; -- cgit v0.10.2