From 6e47c6e737082110386aaad6f7a6984193b29966 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Dec 2024 14:02:04 +0100 Subject: [PATCH 1/9] PM: sleep: Update stale comment in device_resume() There is no function called __device_suspend() any more and it is still mentioned in a comment in device_resume(), so update that comment. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2787627.mvXUDI8C0e@rjwysocki.net --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 4a67e83300e16..495ca6d7193a2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -914,7 +914,7 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) goto Complete; if (dev->power.direct_complete) { - /* Match the pm_runtime_disable() in __device_suspend(). */ + /* Match the pm_runtime_disable() in device_suspend(). */ pm_runtime_enable(dev); goto Complete; } From d6482311efa063da6d01c3c0cd57df81e743a480 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 18 Nov 2024 08:29:14 +0100 Subject: [PATCH 2/9] PM: sleep: autosleep: don't include 'pm_wakeup.h' directly The header clearly states that it does not want to be included directly, only via 'device.h'. 'platform_device.h' works equally well. Remove the direct inclusion. Signed-off-by: Wolfram Sang Link: https://patch.msgid.link/20241118072917.3853-16-wsa+renesas@sang-engineering.com [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- kernel/power/autosleep.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index b29c8aca7486c..865df641b97cc 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -9,7 +9,6 @@ #include #include -#include #include "power.h" From cb7595225ab725db4a0cf24981d1d12882fd65e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 18 Nov 2024 08:29:01 +0100 Subject: [PATCH 3/9] PM: sleep: sysfs: don't include 'pm_wakeup.h' directly The header clearly states that it does not want to be included directly, only via 'device.h'. 'platform_device.h' works equally well. Remove the direct inclusion. Signed-off-by: Wolfram Sang Link: https://patch.msgid.link/20241118072917.3853-3-wsa+renesas@sang-engineering.com [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/sysfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index f8163b559bf99..f84018125b466 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include "power.h" From eeed4bfbe9b96214162a09a7fbb7570fa9522ca4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Dec 2024 15:03:06 +0200 Subject: [PATCH 4/9] intel_idle: add Clearwater Forest SoC support Clearwater Forest (CWF) SoC has the same C-states as Sierra Forest (SRF) SoC. Add CWF support by re-using the SRF C-states table. Note: it is expected that CWF C-states will have same or very similar characteristics as SRF C-states (latency and target residency). However, there is a possibility that the characteristics will end up being different enough when the CWF platform development is finished. In that case, a separate CWF C-states table will be created and populated with the CWF-specific characteristics (latency and target residency). Signed-off-by: Artem Bityutskiy Link: https://patch.msgid.link/20241203130306.1559024-1-artem.bityutskiy@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index ac4d8faa3886c..23d0cd27a5812 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1651,6 +1651,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &idle_cpu_srf), {} }; From ebeeee390b6a341770789a50d81e677da9a103d9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Dec 2024 13:01:02 +0100 Subject: [PATCH 5/9] PM: EM: Move sched domains rebuild function from schedutil to EM Function sugov_eas_rebuild_sd() defined in the schedutil cpufreq governor implements generic functionality that may be useful in other places. In particular, there is a plan to use it in the intel_pstate driver in the future. For this reason, move it from schedutil to the energy model code and rename it to em_rebuild_sched_domains(). This also helps to get rid of some #ifdeffery in schedutil which is a plus. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle --- drivers/cpufreq/cpufreq.c | 2 +- include/linux/energy_model.h | 2 ++ kernel/power/energy_model.c | 17 ++++++++++++++++ kernel/sched/cpufreq_schedutil.c | 33 ++++++-------------------------- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1a4cae54a01bd..418236fef1723 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1538,7 +1538,7 @@ static int cpufreq_online(unsigned int cpu) /* * Register with the energy model before - * sugov_eas_rebuild_sd() is called, which will result + * em_rebuild_sched_domains() is called, which will result * in rebuilding of the sched domains, which should only be done * once the energy model is properly initialized for the policy * first. diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 752e0b2975820..78318d49276dc 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int em_dev_update_chip_binning(struct device *dev); int em_update_performance_limits(struct em_perf_domain *pd, unsigned long freq_min_khz, unsigned long freq_max_khz); +void em_rebuild_sched_domains(void); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM @@ -404,6 +405,7 @@ int em_update_performance_limits(struct em_perf_domain *pd, { return -EINVAL; } +static inline void em_rebuild_sched_domains(void) {} #endif #endif diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index d07faf42eace6..3874f0e97651e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -908,3 +908,20 @@ int em_update_performance_limits(struct em_perf_domain *pd, return 0; } EXPORT_SYMBOL_GPL(em_update_performance_limits); + +static void rebuild_sd_workfn(struct work_struct *work) +{ + rebuild_sched_domains_energy(); +} + +void em_rebuild_sched_domains(void) +{ + static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + + /* + * When called from the cpufreq_register_driver() path, the + * cpu_hotplug_lock is already held, so use a work item to + * avoid nested locking in rebuild_sched_domains(). + */ + schedule_work(&rebuild_sd_work); +} diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28c77904ea749..1c07a2f8c8b5d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -604,31 +604,6 @@ static const struct kobj_type sugov_tunables_ktype = { /********************** cpufreq governor interface *********************/ -#ifdef CONFIG_ENERGY_MODEL -static void rebuild_sd_workfn(struct work_struct *work) -{ - rebuild_sched_domains_energy(); -} - -static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); - -/* - * EAS shouldn't be attempted without sugov, so rebuild the sched_domains - * on governor changes to make sure the scheduler knows about it. - */ -static void sugov_eas_rebuild_sd(void) -{ - /* - * When called from the cpufreq_register_driver() path, the - * cpu_hotplug_lock is already held, so use a work item to - * avoid nested locking in rebuild_sched_domains(). - */ - schedule_work(&rebuild_sd_work); -} -#else -static inline void sugov_eas_rebuild_sd(void) { }; -#endif - struct cpufreq_governor schedutil_gov; static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) @@ -784,7 +759,11 @@ static int sugov_init(struct cpufreq_policy *policy) goto fail; out: - sugov_eas_rebuild_sd(); + /* + * Schedutil is the preferred governor for EAS, so rebuild sched domains + * on governor changes to make sure the scheduler knows about them. + */ + em_rebuild_sched_domains(); mutex_unlock(&global_tunables_lock); return 0; @@ -826,7 +805,7 @@ static void sugov_exit(struct cpufreq_policy *policy) sugov_policy_free(sg_policy); cpufreq_disable_fast_switch(policy); - sugov_eas_rebuild_sd(); + em_rebuild_sched_domains(); } static int sugov_start(struct cpufreq_policy *policy) From b317268368546d6401af788648668f82e3ba1bd3 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Wed, 18 Dec 2024 13:09:35 +0900 Subject: [PATCH 6/9] PM: wakeup: implement devm_device_init_wakeup() helper Some drivers that enable device wakeup fail to properly disable it during their cleanup, which results in a memory leak. To address this, introduce devm_device_init_wakeup(), a managed variant of device_init_wakeup(dev, true). With this managed helper, wakeup functionality will be automatically disabled when the device is released, ensuring a more reliable cleanup process. This need for this addition arose during a previous discussion [1]. Link: https://lore.kernel.org/linux-rtc/20241212100403.3799667-1-joe@pf.is.s.u-tokyo.ac.jp/ [1] Suggested-by: Alexandre Belloni Signed-off-by: Joe Hattori Link: https://patch.msgid.link/20241218040935.1921416-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 222f7530806c5..d501c09c60cd2 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -240,4 +240,21 @@ static inline int device_init_wakeup(struct device *dev, bool enable) return 0; } +static void device_disable_wakeup(void *dev) +{ + device_init_wakeup(dev, false); +} + +/** + * devm_device_init_wakeup - Resource managed device wakeup initialization. + * @dev: Device to handle. + * + * This function is the devm managed version of device_init_wakeup(dev, true). + */ +static inline int devm_device_init_wakeup(struct device *dev) +{ + device_init_wakeup(dev, true); + return devm_add_action_or_reset(dev, device_disable_wakeup, dev); +} + #endif /* _LINUX_PM_WAKEUP_H */ From 96484d21ae2775e142b23e99f90c02faef80d480 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 10 Jan 2025 22:31:07 -0800 Subject: [PATCH 7/9] PM: sleep: convert comment from kernel-doc to plain comment Modify a non-kernel-doc comment to begin with /* instead of /** so that it does not cause a kernel-doc warning. power.h:114: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Auxiliary structure used for reading the snapshot image data and power.h:114: warning: missing initial short description on line: * Auxiliary structure used for reading the snapshot image data and Signed-off-by: Randy Dunlap Acked-by: Pavel Machek Link: https://patch.msgid.link/20250111063107.910825-1-rdunlap@infradead.org Signed-off-by: Rafael J. Wysocki --- kernel/power/power.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/power.h b/kernel/power/power.h index de0e6b1077f23..c352dea2f67b5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -110,7 +110,7 @@ extern int hibernate_preallocate_memory(void); extern void clear_or_poison_free_pages(void); -/** +/* * Auxiliary structure used for reading the snapshot image data and * metadata from and writing them to the list of page backup entries * (PBEs) which is the main data structure of swsusp. From 56cabb937f8f6091c231bdbc17c0d0a10130fb5d Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 9 Jan 2025 12:59:58 -0800 Subject: [PATCH 8/9] PM: sleep: Allow configuring the DPM watchdog to warn earlier than panic Allow configuring the DPM watchdog to warn about slow suspend/resume functions without causing a system panic(). This allows you to set the DPM_WATCHDOG_WARNING_TIMEOUT to something like 5 or 10 seconds to get warnings about slow suspend/resume functions that eventually succeed. Signed-off-by: Douglas Anderson Reviewed-by: Tomasz Figa Link: https://patch.msgid.link/20250109125957.v2.1.I4554f931b8da97948f308ecc651b124338ee9603@changeid [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 24 +++++++++++++++++++----- kernel/power/Kconfig | 21 ++++++++++++++++++++- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 495ca6d7193a2..cbc9a7a75def7 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -496,6 +496,7 @@ struct dpm_watchdog { struct device *dev; struct task_struct *tsk; struct timer_list timer; + bool fatal; }; #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \ @@ -512,11 +513,23 @@ struct dpm_watchdog { static void dpm_watchdog_handler(struct timer_list *t) { struct dpm_watchdog *wd = from_timer(wd, t, timer); + struct timer_list *timer = &wd->timer; + unsigned int time_left; + + if (wd->fatal) { + dev_emerg(wd->dev, "**** DPM device timeout ****\n"); + show_stack(wd->tsk, NULL, KERN_EMERG); + panic("%s %s: unrecoverable failure\n", + dev_driver_string(wd->dev), dev_name(wd->dev)); + } + + time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT; + dev_warn(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n", + CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left); + show_stack(wd->tsk, NULL, KERN_WARNING); - dev_emerg(wd->dev, "**** DPM device timeout ****\n"); - show_stack(wd->tsk, NULL, KERN_EMERG); - panic("%s %s: unrecoverable failure\n", - dev_driver_string(wd->dev), dev_name(wd->dev)); + wd->fatal = true; + mod_timer(timer, jiffies + HZ * time_left); } /** @@ -530,10 +543,11 @@ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev) wd->dev = dev; wd->tsk = current; + wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT; timer_setup_on_stack(timer, dpm_watchdog_handler, 0); /* use same timeout value for both suspend and resume */ - timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT; + timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT; add_timer(timer); } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index afce8130d8b92..ca947ed32e3dd 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -257,11 +257,30 @@ config DPM_WATCHDOG boot session. config DPM_WATCHDOG_TIMEOUT - int "Watchdog timeout in seconds" + int "Watchdog timeout to panic in seconds" range 1 120 default 120 depends on DPM_WATCHDOG +config DPM_WATCHDOG_WARNING_TIMEOUT + int "Watchdog timeout to warn in seconds" + range 1 DPM_WATCHDOG_TIMEOUT + default DPM_WATCHDOG_TIMEOUT + depends on DPM_WATCHDOG + help + If the DPM watchdog warning timeout and main timeout are + different then a non-fatal warning (with a stack trace of + the stuck suspend routine) will be printed when the warning + timeout expires. If the suspend routine gets un-stuck + before the main timeout expires then no other action is + taken. If the routine continues to be stuck and the main + timeout expires then an emergency-level message and stack + trace will be printed and the system will panic. + + If the warning timeout is equal to the main timeout (the + default) then the warning will never happen and the system + will jump straight to panic when the main timeout expires. + config PM_TRACE bool help From fd8318a32573d73eb20637a0c80689de0dc98169 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 3 Jan 2025 16:41:13 +0800 Subject: [PATCH 9/9] PM: sleep: wakeirq: Introduce device-managed variant of dev_pm_set_wake_irq() Add device-managed variant of dev_pm_set_wake_irq which automatically clear the wake irq on device destruction to simplify error handling and resource management in drivers. Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250103-wake_irq-v2-1-e3aeff5e9966@nxp.com Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 26 ++++++++++++++++++++++++++ include/linux/pm_wakeirq.h | 6 ++++++ 2 files changed, 32 insertions(+) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 5a5a9e978e85f..8aa28c08b2891 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -103,6 +103,32 @@ void dev_pm_clear_wake_irq(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq); +static void devm_pm_clear_wake_irq(void *dev) +{ + dev_pm_clear_wake_irq(dev); +} + +/** + * devm_pm_set_wake_irq - device-managed variant of dev_pm_set_wake_irq + * @dev: Device entry + * @irq: Device IO interrupt + * + * + * Attach a device IO interrupt as a wake IRQ, same with dev_pm_set_wake_irq, + * but the device will be auto clear wake capability on driver detach. + */ +int devm_pm_set_wake_irq(struct device *dev, int irq) +{ + int ret; + + ret = dev_pm_set_wake_irq(dev, irq); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, devm_pm_clear_wake_irq, dev); +} +EXPORT_SYMBOL_GPL(devm_pm_set_wake_irq); + /** * handle_threaded_wake_irq - Handler for dedicated wake-up interrupts * @irq: Device specific dedicated wake-up interrupt diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index d9642c6cf8521..25b63ed51b765 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -10,6 +10,7 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq); extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq); extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq); extern void dev_pm_clear_wake_irq(struct device *dev); +extern int devm_pm_set_wake_irq(struct device *dev, int irq); #else /* !CONFIG_PM */ @@ -32,5 +33,10 @@ static inline void dev_pm_clear_wake_irq(struct device *dev) { } +static inline int devm_pm_set_wake_irq(struct device *dev, int irq) +{ + return 0; +} + #endif /* CONFIG_PM */ #endif /* _LINUX_PM_WAKEIRQ_H */