From 013c074f8642d8e815ad670601f8e27155a74b57 Mon Sep 17 00:00:00 2001 From: "Strashko, Grygorii" Date: Tue, 10 Nov 2015 11:42:34 +0200 Subject: [PATCH 1/7] PM / sleep: prohibit devices probing during suspend/hibernation It is unsafe [1] if probing of devices will happen during suspend or hibernation and system behavior will be unpredictable in this case. So, let's prohibit device's probing in dpm_prepare() and defer their probing instead. The normal behavior will be restored in dpm_complete(). This patch introduces new DD core APIs: device_block_probing() It will disable probing of devices and defer their probes instead. device_unblock_probing() It will restore normal behavior and trigger re-probing of deferred devices. [1] https://lkml.org/lkml/2015/9/11/554 Signed-off-by: Grygorii Strashko Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/base.h | 2 ++ drivers/base/dd.c | 48 ++++++++++++++++++++++++++++++++++++++- drivers/base/power/main.c | 17 ++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 1782f3aa386e6..e05db388bd1ca 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -131,6 +131,8 @@ extern void device_remove_groups(struct device *dev, extern char *make_class_name(const char *name, struct kobject *kobj); extern int devres_release_all(struct device *dev); +extern void device_block_probing(void); +extern void device_unblock_probing(void); /* /sys/devices directory */ extern struct kset *devices_kset; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a641cf3ccad69..b605f734b8827 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -54,6 +54,13 @@ static LIST_HEAD(deferred_probe_active_list); static struct workqueue_struct *deferred_wq; static atomic_t deferred_trigger_count = ATOMIC_INIT(0); +/* + * In some cases, like suspend to RAM or hibernation, It might be reasonable + * to prohibit probing of devices as it could be unsafe. + * Once defer_all_probes is true all drivers probes will be forcibly deferred. + */ +static bool defer_all_probes; + /* * deferred_probe_work_func() - Retry probing devices in the active list. */ @@ -171,6 +178,30 @@ static void driver_deferred_probe_trigger(void) queue_work(deferred_wq, &deferred_probe_work); } +/** + * device_block_probing() - Block/defere device's probes + * + * It will disable probing of devices and defer their probes instead. + */ +void device_block_probing(void) +{ + defer_all_probes = true; + /* sync with probes to avoid races. */ + wait_for_device_probe(); +} + +/** + * device_unblock_probing() - Unblock/enable device's probes + * + * It will restore normal behavior and trigger re-probing of deferred + * devices. + */ +void device_unblock_probing(void) +{ + defer_all_probes = false; + driver_deferred_probe_trigger(); +} + /** * deferred_probe_initcall() - Enable probing of deferred devices * @@ -277,9 +308,20 @@ static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); static int really_probe(struct device *dev, struct device_driver *drv) { - int ret = 0; + int ret = -EPROBE_DEFER; int local_trigger_count = atomic_read(&deferred_trigger_count); + if (defer_all_probes) { + /* + * Value of defer_all_probes can be set only by + * device_defer_all_probes_enable() which, in turn, will call + * wait_for_device_probe() right after that to avoid any races. + */ + dev_dbg(dev, "Driver %s force probe deferral\n", drv->name); + driver_deferred_probe_add(dev); + return ret; + } + atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); @@ -393,6 +435,10 @@ int driver_probe_done(void) */ void wait_for_device_probe(void) { + /* wait for the deferred probe workqueue to finish */ + if (driver_deferred_probe_enable) + flush_workqueue(deferred_wq); + /* wait for the known devices to complete their probing */ wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1710c26ba097d..9d626ac08d9c0 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -963,6 +963,9 @@ void dpm_complete(pm_message_t state) } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); + + /* Allow device probing and trigger re-probing of deferred devices */ + device_unblock_probing(); trace_suspend_resume(TPS("dpm_complete"), state.event, false); } @@ -1624,6 +1627,20 @@ int dpm_prepare(pm_message_t state) trace_suspend_resume(TPS("dpm_prepare"), state.event, true); might_sleep(); + /* + * Give a chance for the known devices to complete their probes, before + * disable probing of devices. This sync point is important at least + * at boot time + hibernation restore. + */ + wait_for_device_probe(); + /* + * It is unsafe if probing of devices will happen during suspend or + * hibernation and system behavior will be unpredictable in this case. + * So, let's prohibit device's probing here and defer their probes + * instead. The normal behavior will be restored in dpm_complete(). + */ + device_block_probing(); + mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_list)) { struct device *dev = to_device(dpm_list.next); From 5de85b9d57aba3ed2e04759e6db3b9e826dd0b06 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 18 Nov 2015 11:48:39 +0100 Subject: [PATCH 2/7] PM / runtime: Re-init runtime PM states at probe error and driver unbind There are two common expectations among several subsystems/drivers that deploys runtime PM support, but which isn't met by the driver core. Expectation 1) At ->probe() the subsystem/driver expects the runtime PM status of the device to be RPM_SUSPENDED, which is the initial status being assigned at device registration. This expectation is especially common among some of those subsystems/ drivers that manages devices with an attached PM domain, as those requires the ->runtime_resume() callback at the PM domain level to be invoked during ->probe(). Moreover these subsystems/drivers entirely relies on runtime PM resources being managed at the PM domain level, thus don't implement their own set of runtime PM callbacks. These are two scenarios that suffers from this unmet expectation. i) A failed ->probe() sequence requests probe deferral: ->probe() ... pm_runtime_enable() pm_runtime_get_sync() ... err: pm_runtime_put() pm_runtime_disable() ... As there are no guarantees that such sequence turns the runtime PM status of the device into RPM_SUSPENDED, the re-trying ->probe() may start with the status in RPM_ACTIVE. In such case the runtime PM core won't invoke the ->runtime_resume() callback because of a pm_runtime_get_sync(), as it considers the device to be already runtime resumed. ii) A driver re-bind sequence: At driver unbind, the subsystem/driver's >remove() callback invokes a sequence of runtime PM APIs, to undo actions during ->probe() and to put the device into low power state. ->remove() ... pm_runtime_put() pm_runtime_disable() ... Similar as in the failing ->probe() case, this sequence don't guarantee the runtime PM status of the device to turn into RPM_SUSPENDED. Trying to re-bind the driver thus causes the same issue as when re-trying ->probe(), in the probe deferral scenario. Expectation 2) Drivers that invokes the pm_runtime_irq_safe() API during ->probe(), triggers the runtime PM core to increase the usage count for the device's parent and permanently make it runtime resumed. The usage count is only dropped at device removal, which also allows it to be runtime suspended again. A re-trying ->probe() repeats the call to pm_runtime_irq_safe() and thus once more triggers the usage count of the device's parent to be increased. This leads to not only an imbalance issue of the usage count of the device's parent, but also to keep it runtime resumed permanently even if ->probe() fails. To address these issues, let's change the policy of the driver core to meet these expectations. More precisely, at ->probe() failures and driver unbind, restore the initial states of runtime PM. Although to still allow subsystem's to control PM for devices that doesn't ->probe() successfully, don't restore the initial states unless runtime PM is disabled. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/dd.c | 2 ++ drivers/base/power/power.h | 2 ++ drivers/base/power/runtime.c | 26 ++++++++++++++++++++------ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a641cf3ccad69..cd2d79b1bf017 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -340,6 +340,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); + pm_runtime_reinit(dev); switch (ret) { case -EPROBE_DEFER: @@ -695,6 +696,7 @@ static void __device_release_driver(struct device *dev) dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); + pm_runtime_reinit(dev); klist_remove(&dev->p->knode_driver); if (dev->bus) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 998fa6b230844..8b06193d4a5e9 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -18,6 +18,7 @@ static inline void pm_runtime_early_init(struct device *dev) } extern void pm_runtime_init(struct device *dev); +extern void pm_runtime_reinit(struct device *dev); extern void pm_runtime_remove(struct device *dev); struct wake_irq { @@ -84,6 +85,7 @@ static inline void pm_runtime_early_init(struct device *dev) } static inline void pm_runtime_init(struct device *dev) {} +static inline void pm_runtime_reinit(struct device *dev) {} static inline void pm_runtime_remove(struct device *dev) {} static inline int dpm_sysfs_add(struct device *dev) { return 0; } diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e1a10a03df8ec..ab3fcd9f6c982 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1389,6 +1389,25 @@ void pm_runtime_init(struct device *dev) init_waitqueue_head(&dev->power.wait_queue); } +/** + * pm_runtime_reinit - Re-initialize runtime PM fields in given device object. + * @dev: Device object to re-initialize. + */ +void pm_runtime_reinit(struct device *dev) +{ + if (!pm_runtime_enabled(dev)) { + if (dev->power.runtime_status == RPM_ACTIVE) + pm_runtime_set_suspended(dev); + if (dev->power.irq_safe) { + spin_lock_irq(&dev->power.lock); + dev->power.irq_safe = 0; + spin_unlock_irq(&dev->power.lock); + if (dev->parent) + pm_runtime_put(dev->parent); + } + } +} + /** * pm_runtime_remove - Prepare for removing a device from device hierarchy. * @dev: Device object being removed from device hierarchy. @@ -1396,12 +1415,7 @@ void pm_runtime_init(struct device *dev) void pm_runtime_remove(struct device *dev) { __pm_runtime_disable(dev, false); - - /* Change the status back to 'suspended' to match the initial status. */ - if (dev->power.runtime_status == RPM_ACTIVE) - pm_runtime_set_suspended(dev); - if (dev->power.irq_safe && dev->parent) - pm_runtime_put(dev->parent); + pm_runtime_reinit(dev); } /** From 7b06a6d7bff563d82ddf8769617632f26793a83e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 Dec 2015 01:54:47 +0100 Subject: [PATCH 3/7] MAINTAINERS: Add an entry for the PM core Add a MAINTAINERS entry for the PM core with myself as the maintainer and linux-pm as the mailing list. This actually documents the current state of things. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 69c8a9c3289ad..8e4019cb79b0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8438,6 +8438,17 @@ F: fs/timerfd.c F: include/linux/timer* F: kernel/time/*timer* +POWER MANAGEMENT CORE +M: "Rafael J. Wysocki" +L: linux-pm@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +S: Supported +F: drivers/base/power/ +F: include/linux/pm.h +F: include/linux/pm_* +F: include/linux/powercap.h +F: drivers/powercap/ + POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS M: Sebastian Reichel M: Dmitry Eremin-Solenikov From 299f2ffed329c1a2ea8d6e90f0df26b885d16e08 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 6 Dec 2015 17:33:45 +0100 Subject: [PATCH 4/7] PCI / PM: constify pci_platform_pm_ops structure The pci_platform_pm_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 2 +- drivers/pci/pci.c | 4 ++-- drivers/pci/pci.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a32ba753e4135..8400f80178822 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -529,7 +529,7 @@ static bool acpi_pci_need_resume(struct pci_dev *dev) return !!adev->power.flags.dsw_present; } -static struct pci_platform_pm_ops acpi_pci_platform_pm = { +static const struct pci_platform_pm_ops acpi_pci_platform_pm = { .is_manageable = acpi_pci_power_manageable, .set_state = acpi_pci_set_power_state, .choose_state = acpi_pci_choose_state, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 314db8c1047a3..d1a7105b92760 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -527,9 +527,9 @@ static void pci_restore_bars(struct pci_dev *dev) pci_update_resource(dev, i); } -static struct pci_platform_pm_ops *pci_platform_pm; +static const struct pci_platform_pm_ops *pci_platform_pm; -int pci_set_platform_pm(struct pci_platform_pm_ops *ops) +int pci_set_platform_pm(const struct pci_platform_pm_ops *ops) { if (!ops->is_manageable || !ops->set_state || !ops->choose_state || !ops->sleep_wake) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d390fc1475ecc..f6f151a421470 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -68,7 +68,7 @@ struct pci_platform_pm_ops { bool (*need_resume)(struct pci_dev *dev); }; -int pci_set_platform_pm(struct pci_platform_pm_ops *ops); +int pci_set_platform_pm(const struct pci_platform_pm_ops *ops); void pci_update_current_state(struct pci_dev *dev, pci_power_t state); void pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); From 76fc35ddf8c075aa0e3f52384591d613b906ebb6 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 8 Dec 2015 16:17:25 +0200 Subject: [PATCH 5/7] PCI / PM: Fix small typo in documentation cuased -> caused Signed-off-by: Jarkko Nikula Signed-off-by: Rafael J. Wysocki --- Documentation/power/pci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index b0e911e0e8f50..44558882aa602 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -999,7 +999,7 @@ from its probe routine to make runtime PM work for the device. It is important to remember that the driver's runtime_suspend() callback may be executed right after the usage counter has been decremented, because -user space may already have cuased the pm_runtime_allow() helper function +user space may already have caused the pm_runtime_allow() helper function unblocking the runtime PM of the device to run via sysfs, so the driver must be prepared to cope with that. From a436b6a19f57656a6557439523923d89eb4a880d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Dec 2015 02:54:26 +0100 Subject: [PATCH 6/7] PM / runtime: Add new helper for conditional usage count incrementation Introduce a new runtime PM function, pm_runtime_get_if_in_use(), that will increment the device's runtime PM usage counter and return 1 if its status is RPM_ACTIVE and its usage counter is greater than 0 at the same time (0 will be returned otherwise). This is useful for things that should only be done if the device is active (from the runtime PM perspective) and used by somebody (as indicated by the usage counter) already and they are not worth bothering otherwise. Requested-by: Imre Deak Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 6 ++++++ drivers/base/power/runtime.c | 24 ++++++++++++++++++++++++ include/linux/pm_runtime.h | 5 +++++ 3 files changed, 35 insertions(+) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 0784bc3a2ab51..7328cf85236c2 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -371,6 +371,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: - increment the device's usage counter, run pm_runtime_resume(dev) and return its result + int pm_runtime_get_if_in_use(struct device *dev); + - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the + runtime PM status is RPM_ACTIVE and the runtime PM usage counter is + nonzero, increment the counter and return 1; otherwise return 0 without + changing the counter + void pm_runtime_put_noidle(struct device *dev); - decrement the device's usage counter diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index ab3fcd9f6c982..4c7055009bd6a 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -965,6 +965,30 @@ int __pm_runtime_resume(struct device *dev, int rpmflags) } EXPORT_SYMBOL_GPL(__pm_runtime_resume); +/** + * pm_runtime_get_if_in_use - Conditionally bump up the device's usage counter. + * @dev: Device to handle. + * + * Return -EINVAL if runtime PM is disabled for the device. + * + * If that's not the case and if the device's runtime PM status is RPM_ACTIVE + * and the runtime PM usage counter is nonzero, increment the counter and + * return 1. Otherwise return 0 without changing the counter. + */ +int pm_runtime_get_if_in_use(struct device *dev) +{ + unsigned long flags; + int retval; + + spin_lock_irqsave(&dev->power.lock, flags); + retval = dev->power.disable_depth > 0 ? -EINVAL : + dev->power.runtime_status == RPM_ACTIVE + && atomic_inc_not_zero(&dev->power.usage_count); + spin_unlock_irqrestore(&dev->power.lock, flags); + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use); + /** * __pm_runtime_set_status - Set runtime PM status of a device. * @dev: Device to handle. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 3bdbb41897800..7af093d6a4dd1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -39,6 +39,7 @@ extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); +extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); @@ -143,6 +144,10 @@ static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } +static inline int pm_runtime_get_if_in_use(struct device *dev) +{ + return -EINVAL; +} static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } From 4295733eee4e69eda432d95765b7762dc6013271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Tue, 29 Dec 2015 11:03:21 +0100 Subject: [PATCH 7/7] PM / core: fix typo in documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation for detach() said attach. Signed-off-by: Manuel Pégourié-Gonnard Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index f32b802b98f4b..f48e33385b3e1 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach); /** * dev_pm_domain_detach - Detach a device from its PM domain. - * @dev: Device to attach. + * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. * * This functions will reverse the actions from dev_pm_domain_attach() and thus