From 14e6c70671858401d7ffb93aeeb41e8dc4bb467e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 13 Nov 2021 14:06:18 +0800 Subject: [PATCH 01/12] cpuidle: menu: Fix typo in a comment The word `these' in a comment is repeated, so drop one. Signed-off-by: Jason Wang [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 2e5670446991f..c4922684f3058 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -34,7 +34,7 @@ * 1) Energy break even point * 2) Performance impact * 3) Latency tolerance (from pmqos infrastructure) - * These these three factors are treated independently. + * These three factors are treated independently. * * Energy break even point * ----------------------- From 74d9555580c48a04b2c3b742dfb0c80777aa0b26 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 8 Nov 2021 16:09:41 +0000 Subject: [PATCH 02/12] PM: hibernate: Allow ACPI hardware signature to be honoured Theoretically, when the hardware signature in FACS changes, the OS is supposed to gracefully decline to attempt to resume from S4: "If the signature has changed, OSPM will not restore the system context and can boot from scratch" In practice, Windows doesn't do this and many laptop vendors do allow the signature to change especially when docking/undocking, so it would be a bad idea to simply comply with the specification by default in the general case. However, there are use cases where we do want the compliant behaviour and we know it's safe. Specifically, when resuming virtual machines where we know the hypervisor has changed sufficiently that resume will fail. We really want to be able to *tell* the guest kernel not to try, so it boots cleanly and doesn't just crash. This patch provides a way to opt in to the spec-compliant behaviour on the command line. A follow-up patch may do this automatically for certain "known good" machines based on a DMI match, or perhaps just for all hypervisor guests since there's no good reason a hypervisor would change the hardware_signature that it exposes to guests *unless* it wants them to obey the ACPI specification. Signed-off-by: David Woodhouse Signed-off-by: Rafael J. Wysocki --- .../admin-guide/kernel-parameters.txt | 15 ++++++++--- arch/x86/kernel/acpi/sleep.c | 4 ++- drivers/acpi/sleep.c | 26 +++++++++++++++---- include/linux/acpi.h | 2 +- include/linux/suspend.h | 1 + kernel/power/power.h | 1 + kernel/power/swap.c | 16 ++++++++++-- 7 files changed, 53 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9725c546a0d46..10e0a3a275168 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -225,14 +225,23 @@ For broken nForce2 BIOS resulting in XT-PIC timer. acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, nonvs, sci_force_enable, nobl } + Format: { s3_bios, s3_mode, s3_beep, s4_hwsig, + s4_nohwsig, old_ordering, nonvs, + sci_force_enable, nobl } See Documentation/power/video.rst for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. + s4_hwsig causes the kernel to check the ACPI hardware + signature during resume from hibernation, and gracefully + refuse to resume if it has changed. This complies with + the ACPI specification but not with reality, since + Windows does not do this and many laptops do change it + on docking. So the default behaviour is to allow resume + and simply warn when the signature changes, unless the + s4_hwsig option is enabled. s4_nohwsig prevents ACPI hardware signature from being - used during resume from hibernation. + used (or even warned about) during resume. old_ordering causes the ACPI 1.0 ordering of the _PTS control method, with respect to putting devices into low power states, to be enforced (the ACPI 2.0 ordering diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 3f85fcae450ce..1e97f944b47d7 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -139,8 +139,10 @@ static int __init acpi_sleep_setup(char *str) if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; #ifdef CONFIG_HIBERNATION + if (strncmp(str, "s4_hwsig", 8) == 0) + acpi_check_s4_hw_signature(1); if (strncmp(str, "s4_nohwsig", 10) == 0) - acpi_no_s4_hw_signature(); + acpi_check_s4_hw_signature(0); #endif if (strncmp(str, "nonvs", 5) == 0) acpi_nvs_nosave(); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index eaa47753b7584..4dfbfc69db346 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -877,11 +877,11 @@ static inline void acpi_sleep_syscore_init(void) {} #ifdef CONFIG_HIBERNATION static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; -static bool nosigcheck; +static int sigcheck = -1; /* Default behaviour is just to warn */ -void __init acpi_no_s4_hw_signature(void) +void __init acpi_check_s4_hw_signature(int check) { - nosigcheck = true; + sigcheck = check; } static int acpi_hibernation_begin(pm_message_t stage) @@ -1009,12 +1009,28 @@ static void acpi_sleep_hibernate_setup(void) hibernation_set_ops(old_suspend_ordering ? &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; - if (nosigcheck) + if (!sigcheck) return; acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs); - if (facs) + if (facs) { + /* + * s4_hardware_signature is the local variable which is just + * used to warn about mismatch after we're attempting to + * resume (in violation of the ACPI specification.) + */ s4_hardware_signature = facs->hardware_signature; + + if (sigcheck > 0) { + /* + * If we're actually obeying the ACPI specification + * then the signature is written out as part of the + * swsusp header, in order to allow the boot kernel + * to gracefully decline to resume. + */ + swsusp_hardware_signature = facs->hardware_signature; + } + } } #else /* !CONFIG_HIBERNATION */ static inline void acpi_sleep_hibernate_setup(void) {} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b28f8790192a2..6c0798db6bde0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -506,7 +506,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -void __init acpi_no_s4_hw_signature(void); +void __init acpi_check_s4_hw_signature(int check); #endif #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8af13ba60c7e4..5785d909c3215 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern u32 swsusp_hardware_signature; extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); extern bool system_entering_hibernation(void); diff --git a/kernel/power/power.h b/kernel/power/power.h index 326f8d032eb5c..b4f4339432096 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -170,6 +170,7 @@ extern int swsusp_swap_in_use(void); #define SF_PLATFORM_MODE 1 #define SF_NOCOMPRESS_MODE 2 #define SF_CRC32_MODE 4 +#define SF_HW_SIG 8 /* kernel/power/hibernate.c */ extern int swsusp_check(void); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ff326c2cb77bd..ad10359030a4c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -36,6 +36,8 @@ #define HIBERNATE_SIG "S1SUSPEND" +u32 swsusp_hardware_signature; + /* * When reading an {un,}compressed image, we may restore pages in place, * in which case some architectures need these pages cleaning before they @@ -104,7 +106,8 @@ struct swap_map_handle { struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - - sizeof(u32)]; + sizeof(u32) - sizeof(u32)]; + u32 hw_sig; u32 crc32; sector_t image; unsigned int flags; /* Flags to pass to the "boot" kernel */ @@ -312,7 +315,6 @@ static int hib_wait_io(struct hib_bio_batch *hb) /* * Saving part */ - static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; @@ -324,6 +326,10 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); swsusp_header->image = handle->first_sector; + if (swsusp_hardware_signature) { + swsusp_header->hw_sig = swsusp_hardware_signature; + flags |= SF_HW_SIG; + } swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; @@ -1537,6 +1543,12 @@ int swsusp_check(void) } else { error = -EINVAL; } + if (!error && swsusp_header->flags & SF_HW_SIG && + swsusp_header->hw_sig != swsusp_hardware_signature) { + pr_info("Suspend image hardware signature mismatch (%08x now %08x); aborting resume.\n", + swsusp_header->hw_sig, swsusp_hardware_signature); + error = -EINVAL; + } put: if (error) From 5ef11c56b2332613cacd5e7ac17cfb1a073b66ab Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:20:58 +0000 Subject: [PATCH 03/12] r8169: Avoid misuse of pm_ptr() macro The pm_ptr() macro should be used when the suspend and resume functions can be compiled independently of the CONFIG_PM Kconfig option. In the case of this driver, the suspend and resume functions are inside a section protected by a #ifdef CONFIG_PM guard. Therefore pm_ptr() should not be used. Signed-off-by: Paul Cercueil Reviewed-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/net/ethernet/realtek/r8169_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 86c44bc5f73f8..6b81f95698f05 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5441,7 +5441,9 @@ static struct pci_driver rtl8169_pci_driver = { .probe = rtl_init_one, .remove = rtl_remove_one, .shutdown = rtl_shutdown, - .driver.pm = pm_ptr(&rtl8169_pm_ops), +#ifdef CONFIG_PM + .driver.pm = &rtl8169_pm_ops, +#endif }; module_pci_driver(rtl8169_pci_driver); From c06ef740d401d0f4ab188882bf6f8d9cf0f75eaf Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:20:59 +0000 Subject: [PATCH 04/12] PM: core: Redefine pm_ptr() macro The pm_ptr() macro was previously conditionally defined, according to the value of the CONFIG_PM option. This meant that the pointed structure was either referenced (if CONFIG_PM was set), or never referenced (if CONFIG_PM was not set), causing it to be detected as unused by the compiler. This worked fine, but required the __maybe_unused compiler attribute to be used to every symbol pointed to by a pointer wrapped with pm_ptr(). We can do better. With this change, the pm_ptr() is now defined the same, independently of the value of CONFIG_PM. It now uses the (?:) ternary operator to conditionally resolve to its argument. Since the condition is known at compile time, the compiler will then choose to discard the unused symbols, which won't need to be tagged with __maybe_unused anymore. This pm_ptr() macro is usually used with pointers to dev_pm_ops structures created with SIMPLE_DEV_PM_OPS() or similar macros. These do use a __maybe_unused flag, which is now useless with this change, so it later can be removed. However in the meantime it causes no harm, and all the drivers still compile fine with the new pm_ptr() macro. Signed-off-by: Paul Cercueil Reviewed-by: Jonathan Cameron Reviewed-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index 1d8209c09686c..b88ac7dcf2a20 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -373,11 +373,7 @@ const struct dev_pm_ops __maybe_unused name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define pm_ptr(_ptr) (_ptr) -#else -#define pm_ptr(_ptr) NULL -#endif +#define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) /* * PM_EVENT_ messages From 1a3c7bb088266fa2db017be299f91f1c1894c857 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:21:00 +0000 Subject: [PATCH 05/12] PM: core: Add new *_PM_OPS macros, deprecate old ones This commit introduces the following macros: SYSTEM_SLEEP_PM_OPS() LATE_SYSTEM_SLEEP_PM_OPS() NOIRQ_SYSTEM_SLEEP_PM_OPS() RUNTIME_PM_OPS() These new macros are very similar to their SET_*_PM_OPS() equivalent. They however differ in the fact that the callbacks they set will always be seen as referenced by the compiler. This means that the callback functions don't need to be wrapped with a #ifdef CONFIG_PM guard, or tagged with __maybe_unused, to prevent the compiler from complaining about unused static symbols. The compiler will then simply evaluate at compile time whether or not these symbols are dead code. The callbacks that are only useful with CONFIG_PM_SLEEP is enabled, are now also wrapped with a new pm_sleep_ptr() macro, which is inspired from pm_ptr(). This is needed for drivers that use different callbacks for sleep and runtime PM, to handle the case where CONFIG_PM is set and CONFIG_PM_SLEEP is not. This commit also deprecates the following macros: SIMPLE_DEV_PM_OPS() UNIVERSAL_DEV_PM_OPS() And introduces the following macros: DEFINE_SIMPLE_DEV_PM_OPS() DEFINE_UNIVERSAL_DEV_PM_OPS() These macros are similar to the functions they were created to replace, with the following differences: - They use the new macros introduced above, and as such always reference the provided callback functions. - They are not tagged with __maybe_unused. They are meant to be used with pm_ptr() or pm_sleep_ptr() for DEFINE_UNIVERSAL_DEV_PM_OPS() and DEFINE_SIMPLE_DEV_PM_OPS() respectively. - They declare the symbol static, since every driver seems to do that anyway; and if a non-static use-case is needed an indirection pointer could be used. The point of this change, is to progressively switch from a code model where PM callbacks are all protected behind CONFIG_PM guards, to a code model where the PM callbacks are always seen by the compiler, but discarded if not used. Signed-off-by: Paul Cercueil Reviewed-by: Jonathan Cameron Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 74 +++++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index b88ac7dcf2a20..fc9691cb01b4f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -300,47 +300,59 @@ struct dev_pm_ops { int (*runtime_idle)(struct device *dev); }; +#define SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend = pm_sleep_ptr(suspend_fn), \ + .resume = pm_sleep_ptr(resume_fn), \ + .freeze = pm_sleep_ptr(suspend_fn), \ + .thaw = pm_sleep_ptr(resume_fn), \ + .poweroff = pm_sleep_ptr(suspend_fn), \ + .restore = pm_sleep_ptr(resume_fn), + +#define LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_late = pm_sleep_ptr(suspend_fn), \ + .resume_early = pm_sleep_ptr(resume_fn), \ + .freeze_late = pm_sleep_ptr(suspend_fn), \ + .thaw_early = pm_sleep_ptr(resume_fn), \ + .poweroff_late = pm_sleep_ptr(suspend_fn), \ + .restore_early = pm_sleep_ptr(resume_fn), + +#define NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_noirq = pm_sleep_ptr(suspend_fn), \ + .resume_noirq = pm_sleep_ptr(resume_fn), \ + .freeze_noirq = pm_sleep_ptr(suspend_fn), \ + .thaw_noirq = pm_sleep_ptr(resume_fn), \ + .poweroff_noirq = pm_sleep_ptr(suspend_fn), \ + .restore_noirq = pm_sleep_ptr(resume_fn), + +#define RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + .runtime_suspend = suspend_fn, \ + .runtime_resume = resume_fn, \ + .runtime_idle = idle_fn, + #ifdef CONFIG_PM_SLEEP #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend = suspend_fn, \ - .resume = resume_fn, \ - .freeze = suspend_fn, \ - .thaw = resume_fn, \ - .poweroff = suspend_fn, \ - .restore = resume_fn, + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM_SLEEP #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend_late = suspend_fn, \ - .resume_early = resume_fn, \ - .freeze_late = suspend_fn, \ - .thaw_early = resume_fn, \ - .poweroff_late = suspend_fn, \ - .restore_early = resume_fn, + LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM_SLEEP #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend_noirq = suspend_fn, \ - .resume_noirq = resume_fn, \ - .freeze_noirq = suspend_fn, \ - .thaw_noirq = resume_fn, \ - .poweroff_noirq = suspend_fn, \ - .restore_noirq = resume_fn, + NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ - .runtime_suspend = suspend_fn, \ - .runtime_resume = resume_fn, \ - .runtime_idle = idle_fn, + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #else #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #endif @@ -349,9 +361,9 @@ struct dev_pm_ops { * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. */ -#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops __maybe_unused name = { \ - SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +#define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +static const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } /* @@ -367,6 +379,19 @@ const struct dev_pm_ops __maybe_unused name = { \ * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). */ +#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ +static const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ +} + +/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ +#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops __maybe_unused name = { \ + SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + +/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ @@ -374,6 +399,7 @@ const struct dev_pm_ops __maybe_unused name = { \ } #define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) +#define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr)) /* * PM_EVENT_ messages From c24efa6732788f0be22cdf5d2aedd5e3117e983f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Dec 2021 19:54:32 +0100 Subject: [PATCH 06/12] PM: runtime: Capture device status before disabling runtime PM In some cases (for example, during system-wide suspend and resume of devices) it is useful to know whether or not runtime PM has ever been enabled for a given device and, if so, what the runtime PM status of it had been right before runtime PM was disabled for it last time. For this reason, introduce a new struct dev_pm_info field called last_status that will be used for capturing the runtime PM status of the device when its power.disable_depth counter changes from 0 to 1. The new field will be set to RPM_INVALID to start with and whenever power.disable_depth changes from 1 to 0, so it will be valid only when runtime PM of the device is currently disabled, but it has been enabled at least once. Immediately use power.last_status in rpm_resume() to make it handle the case when PM runtime is disabled for the device, but its runtime PM status is RPM_ACTIVE more consistently. Namely, make it return 1 if power.last_status is also equal to RPM_ACTIVE in that case (the idea being that if the status was RPM_ACTIVE last time when power.disable_depth was changing from 0 to 1 and it is still RPM_ACTIVE, it can be assumed to reflect what happened to the device last time when it was using runtime PM) and -EACCES otherwise. Update the documentation to provide a description of last_status and change the description of pm_runtime_resume() in it to reflect the new behavior of rpm_active(). While at it, rearrange the code in pm_runtime_enable() to be more straightforward and replace the WARN() macro in it with a pr_warn() invocation which is less disruptive. Link: https://lore.kernel.org/linux-pm/20211026222626.39222-1-ulf.hansson@linaro.org/t/#u Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 14 +++++++--- drivers/base/power/runtime.c | 45 +++++++++++++++++------------- include/linux/pm.h | 2 ++ 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index d6bf84f061f41..65b86e487afe0 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -265,6 +265,10 @@ defined in include/linux/pm.h: RPM_SUSPENDED, which means that each device is initially regarded by the PM core as 'suspended', regardless of its real hardware status + `enum rpm_status last_status;` + - the last runtime PM status of the device captured before disabling runtime + PM for it (invalid initially and when disable_depth is 0) + `unsigned int runtime_auto;` - if set, indicates that the user space has allowed the device driver to power manage the device at run time via the /sys/devices/.../power/control @@ -333,10 +337,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: `int pm_runtime_resume(struct device *dev);` - execute the subsystem-level resume callback for the device; returns 0 on - success, 1 if the device's runtime PM status was already 'active' or - error code on failure, where -EAGAIN means it may be safe to attempt to - resume the device again in future, but 'power.runtime_error' should be - checked additionally, and -EACCES means that 'power.disable_depth' is + success, 1 if the device's runtime PM status is already 'active' (also if + 'power.disable_depth' is nonzero, but the status was 'active' when it was + changing from 0 to 1) or error code on failure, where -EAGAIN means it may + be safe to attempt to resume the device again in future, but + 'power.runtime_error' should be checked additionally, and -EACCES means + that the callback could not be run, because 'power.disable_depth' was different from 0 `int pm_runtime_resume_and_get(struct device *dev);` diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index d504cd4ab3cbf..844b6b8116100 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -742,13 +742,15 @@ static int rpm_resume(struct device *dev, int rpmflags) trace_rpm_resume_rcuidle(dev, rpmflags); repeat: - if (dev->power.runtime_error) + if (dev->power.runtime_error) { retval = -EINVAL; - else if (dev->power.disable_depth == 1 && dev->power.is_suspended - && dev->power.runtime_status == RPM_ACTIVE) - retval = 1; - else if (dev->power.disable_depth > 0) - retval = -EACCES; + } else if (dev->power.disable_depth > 0) { + if (dev->power.runtime_status == RPM_ACTIVE && + dev->power.last_status == RPM_ACTIVE) + retval = 1; + else + retval = -EACCES; + } if (retval) goto out; @@ -1410,8 +1412,10 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) /* Update time accounting before disabling PM-runtime. */ update_pm_runtime_accounting(dev); - if (!dev->power.disable_depth++) + if (!dev->power.disable_depth++) { __pm_runtime_barrier(dev); + dev->power.last_status = dev->power.runtime_status; + } out: spin_unlock_irq(&dev->power.lock); @@ -1428,23 +1432,23 @@ void pm_runtime_enable(struct device *dev) spin_lock_irqsave(&dev->power.lock, flags); - if (dev->power.disable_depth > 0) { - dev->power.disable_depth--; - - /* About to enable runtime pm, set accounting_timestamp to now */ - if (!dev->power.disable_depth) - dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); - } else { + if (!dev->power.disable_depth) { dev_warn(dev, "Unbalanced %s!\n", __func__); + goto out; } - WARN(!dev->power.disable_depth && - dev->power.runtime_status == RPM_SUSPENDED && - !dev->power.ignore_children && - atomic_read(&dev->power.child_count) > 0, - "Enabling runtime PM for inactive device (%s) with active children\n", - dev_name(dev)); + if (--dev->power.disable_depth > 0) + goto out; + dev->power.last_status = RPM_INVALID; + dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); + + if (dev->power.runtime_status == RPM_SUSPENDED && + !dev->power.ignore_children && + atomic_read(&dev->power.child_count) > 0) + dev_warn(dev, "Enabling runtime PM for inactive device with active children\n"); + +out: spin_unlock_irqrestore(&dev->power.lock, flags); } EXPORT_SYMBOL_GPL(pm_runtime_enable); @@ -1640,6 +1644,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend); void pm_runtime_init(struct device *dev) { dev->power.runtime_status = RPM_SUSPENDED; + dev->power.last_status = RPM_INVALID; dev->power.idle_notification = false; dev->power.disable_depth = 1; diff --git a/include/linux/pm.h b/include/linux/pm.h index fc9691cb01b4f..e1e9402180b9b 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -521,6 +521,7 @@ const struct dev_pm_ops __maybe_unused name = { \ */ enum rpm_status { + RPM_INVALID = -1, RPM_ACTIVE = 0, RPM_RESUMING, RPM_SUSPENDED, @@ -634,6 +635,7 @@ struct dev_pm_info { unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; + enum rpm_status last_status; int runtime_error; int autosuspend_delay; u64 last_busy; From d00ebcc6542da052b0afe28b1d448cc0f95827ad Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 10 Dec 2021 12:28:48 +0800 Subject: [PATCH 07/12] cpuidle: Fix cpuidle_remove_state_sysfs() kerneldoc comment Fix function name in sysfs.c kernel-doc comment to remove a warning found by running scripts/kernel-doc, which is caused by using 'make W=1'. drivers/cpuidle/sysfs.c:512: warning: expecting prototype for cpuidle_remove_driver_sysfs(). Prototype was for cpuidle_remove_state_sysfs() instead Reported-by: Abaci Robot Signed-off-by: Yang Li [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 469e18547d06c..e708e593db88f 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -505,7 +505,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) } /** - * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes + * cpuidle_remove_state_sysfs - removes the cpuidle states sysfs attributes * @device: the target device */ static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) From d1579e61192e0e686faa4208500ef4c3b529b16c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Dec 2021 17:10:13 +0100 Subject: [PATCH 08/12] PM: runtime: Add safety net to supplier device release Because refcount_dec_not_one() returns true if the target refcount becomes saturated, it is generally unsafe to use its return value as a loop termination condition, but that is what happens when a device link's supplier device is released during runtime PM suspend operations and on device link removal. To address this, introduce pm_runtime_release_supplier() to be used in the above cases which will check the supplier device's runtime PM usage counter in addition to the refcount_dec_not_one() return value, so the loop can be terminated in case the rpm_active refcount value becomes invalid, and update the code in question to use it as appropriate. This change is not expected to have any visible functional impact. Reported-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Peter Zijlstra (Intel) --- drivers/base/core.c | 3 +-- drivers/base/power/runtime.c | 41 ++++++++++++++++++++++++++---------- include/linux/pm_runtime.h | 3 +++ 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index fd034d7424472..b191bd17de891 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -485,8 +485,7 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_release_supplier(link, true); put_device(link->consumer); put_device(link->supplier); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 844b6b8116100..f1b856ea8e10b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -305,19 +305,40 @@ static int rpm_get_suppliers(struct device *dev) return 0; } +/** + * pm_runtime_release_supplier - Drop references to device link's supplier. + * @link: Target device link. + * @check_idle: Whether or not to check if the supplier device is idle. + * + * Drop all runtime PM references associated with @link to its supplier device + * and if @check_idle is set, check if that device is idle (and so it can be + * suspended). + */ +void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +{ + struct device *supplier = link->supplier; + + /* + * The additional power.usage_count check is a safety net in case + * the rpm_active refcount becomes saturated, in which case + * refcount_dec_not_one() would return true forever, but it is not + * strictly necessary. + */ + while (refcount_dec_not_one(&link->rpm_active) && + atomic_read(&supplier->power.usage_count) > 0) + pm_runtime_put_noidle(supplier); + + if (check_idle) + pm_request_idle(supplier); +} + static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) { struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) { - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put_noidle(link->supplier); - - if (try_to_suspend) - pm_request_idle(link->supplier); - } + device_links_read_lock_held()) + pm_runtime_release_supplier(link, try_to_suspend); } static void rpm_put_suppliers(struct device *dev) @@ -1777,9 +1798,7 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_release_supplier(link, true); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index eddd66d426caf..016de5776b6db 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); +extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); extern int devm_pm_runtime_enable(struct device *dev); @@ -283,6 +284,8 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} +static inline void pm_runtime_release_supplier(struct device_link *link, + bool check_idle) {} #endif /* !CONFIG_PM */ From e0d64ecc621715e9c7807e952b68475c62bbf630 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:21:01 +0000 Subject: [PATCH 09/12] mmc: jz4740: Use the new PM macros - Use DEFINE_SIMPLE_DEV_PM_OPS() instead of the SIMPLE_DEV_PM_OPS() macro. This makes it possible to remove the __maybe_unused flags on the callback functions. - Since we only have callbacks for suspend/resume, we can conditionally compile the dev_pm_ops structure for when CONFIG_PM_SLEEP is enabled; so use the pm_sleep_ptr() macro instead of pm_ptr(). Signed-off-by: Paul Cercueil Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/mmc/host/jz4740_mmc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 80a2c270d502e..bb612fce7ead8 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1103,17 +1103,17 @@ static int jz4740_mmc_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused jz4740_mmc_suspend(struct device *dev) +static int jz4740_mmc_suspend(struct device *dev) { return pinctrl_pm_select_sleep_state(dev); } -static int __maybe_unused jz4740_mmc_resume(struct device *dev) +static int jz4740_mmc_resume(struct device *dev) { return pinctrl_select_default_state(dev); } -static SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, +DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, jz4740_mmc_resume); static struct platform_driver jz4740_mmc_driver = { @@ -1123,7 +1123,7 @@ static struct platform_driver jz4740_mmc_driver = { .name = "jz4740-mmc", .probe_type = PROBE_PREFER_ASYNCHRONOUS, .of_match_table = of_match_ptr(jz4740_mmc_of_match), - .pm = pm_ptr(&jz4740_mmc_pm_ops), + .pm = pm_sleep_ptr(&jz4740_mmc_pm_ops), }, }; From 2cdbd92c2d1dff07ad56c39f5857ee644bbd2c8a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:21:02 +0000 Subject: [PATCH 10/12] mmc: mxc: Use the new PM macros Use DEFINE_SIMPLE_DEV_PM_OPS() instead of the SIMPLE_DEV_PM_OPS() macro, along with using pm_sleep_ptr() as this driver doesn't handle runtime PM. This makes it possible to remove the #ifdef CONFIG_PM guard around the suspend/resume functions. Signed-off-by: Paul Cercueil Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/mmc/host/mxcmmc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 2fe6fcdbb1b30..98c218bd66697 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -1183,7 +1183,6 @@ static int mxcmci_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP static int mxcmci_suspend(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); @@ -1210,9 +1209,8 @@ static int mxcmci_resume(struct device *dev) return ret; } -#endif -static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume); +DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume); static struct platform_driver mxcmci_driver = { .probe = mxcmci_probe, @@ -1220,7 +1218,7 @@ static struct platform_driver mxcmci_driver = { .driver = { .name = DRIVER_NAME, .probe_type = PROBE_PREFER_ASYNCHRONOUS, - .pm = &mxcmci_pm_ops, + .pm = pm_sleep_ptr(&mxcmci_pm_ops), .of_match_table = mxcmci_of_match, } }; From 50a4606655582f310b3f07c9492af9a72b40003b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Dec 2021 20:16:02 +0100 Subject: [PATCH 11/12] PM: runtime: Simplify locking in pm_runtime_put_suppliers() Notice that pm_runtime_put_suppliers() cannot be called with disabled interrupts, because it may sleep (due to the device links read locking in the non-SRCU case), and so it can use spin_lock_irq() and spin_unlock_irq() for the locking. Update the function accordingly and while at it move the "put" local variable in it into the inner block where it is used. This change is not expected to have any visible functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- drivers/base/power/runtime.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index f1b856ea8e10b..2f3cce17219bd 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1748,8 +1748,6 @@ void pm_runtime_get_suppliers(struct device *dev) void pm_runtime_put_suppliers(struct device *dev) { struct device_link *link; - unsigned long flags; - bool put; int idx; idx = device_links_read_lock(); @@ -1757,11 +1755,17 @@ void pm_runtime_put_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) if (link->supplier_preactivated) { + bool put; + link->supplier_preactivated = false; - spin_lock_irqsave(&dev->power.lock, flags); + + spin_lock_irq(&dev->power.lock); + put = pm_runtime_status_suspended(dev) && refcount_dec_not_one(&link->rpm_active); - spin_unlock_irqrestore(&dev->power.lock, flags); + + spin_unlock_irq(&dev->power.lock); + if (put) pm_runtime_put(link->supplier); } From 7dfc5b6e909e8c434ce458d61c7d7c8cd95d5ad6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jan 2022 17:43:51 +0100 Subject: [PATCH 12/12] cpuidle: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the cpuidle sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index e708e593db88f..2b496a53cbca1 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -335,6 +335,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_default_status.attr, NULL }; +ATTRIBUTE_GROUPS(cpuidle_state_default); struct cpuidle_state_kobj { struct cpuidle_state *state; @@ -448,7 +449,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj) static struct kobj_type ktype_state_cpuidle = { .sysfs_ops = &cpuidle_state_sysfs_ops, - .default_attrs = cpuidle_state_default_attrs, + .default_groups = cpuidle_state_default_groups, .release = cpuidle_state_sysfs_release, }; @@ -591,10 +592,11 @@ static struct attribute *cpuidle_driver_default_attrs[] = { &attr_driver_name.attr, NULL }; +ATTRIBUTE_GROUPS(cpuidle_driver_default); static struct kobj_type ktype_driver_cpuidle = { .sysfs_ops = &cpuidle_driver_sysfs_ops, - .default_attrs = cpuidle_driver_default_attrs, + .default_groups = cpuidle_driver_default_groups, .release = cpuidle_driver_sysfs_release, };