From 874b6476fa888e79e41daf7653384c8d70927b90 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Aug 2024 18:21:59 +0200 Subject: [PATCH 1/6] thermal: sysfs: Add sanity checks for trip temperature and hysteresis Add sanity checks for new trip temperature and hysteresis values to trip_point_temp_store() and trip_point_hyst_store() to prevent trip point threshold from falling below THERMAL_TEMP_INVALID. However, still allow user space to pass THERMAL_TEMP_INVALID as the new trip temperature value to invalidate the trip if necessary. Also allow the hysteresis to be updated when the temperature is invalid to allow user space to avoid having to adjust hysteresis after a valid temperature has been set, but in that case just change the value and do nothing else. Fixes: be0a3600aa1e ("thermal: sysfs: Rework the handling of trip point updates") Cc: 6.8+ # 6.8+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/12528772.O9o76ZdvQC@rjwysocki.net --- drivers/thermal/thermal_sysfs.c | 50 +++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index b740b60032ee4..197ae12624661 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -111,18 +111,26 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr, mutex_lock(&tz->lock); - if (temp != trip->temperature) { - if (tz->ops.set_trip_temp) { - ret = tz->ops.set_trip_temp(tz, trip, temp); - if (ret) - goto unlock; - } + if (temp == trip->temperature) + goto unlock; - thermal_zone_set_trip_temp(tz, trip, temp); + /* Arrange the condition to avoid integer overflows. */ + if (temp != THERMAL_TEMP_INVALID && + temp <= trip->hysteresis + THERMAL_TEMP_INVALID) { + ret = -EINVAL; + goto unlock; + } - __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED); + if (tz->ops.set_trip_temp) { + ret = tz->ops.set_trip_temp(tz, trip, temp); + if (ret) + goto unlock; } + thermal_zone_set_trip_temp(tz, trip, temp); + + __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED); + unlock: mutex_unlock(&tz->lock); @@ -152,15 +160,33 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, mutex_lock(&tz->lock); - if (hyst != trip->hysteresis) { - thermal_zone_set_trip_hyst(tz, trip, hyst); + if (hyst == trip->hysteresis) + goto unlock; + + /* + * Allow the hysteresis to be updated when the temperature is invalid + * to allow user space to avoid having to adjust hysteresis after a + * valid temperature has been set, but in that case just change the + * value and do nothing else. + */ + if (trip->temperature == THERMAL_TEMP_INVALID) { + WRITE_ONCE(trip->hysteresis, hyst); + goto unlock; + } - __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED); + if (trip->temperature - hyst <= THERMAL_TEMP_INVALID) { + ret = -EINVAL; + goto unlock; } + thermal_zone_set_trip_hyst(tz, trip, hyst); + + __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED); + +unlock: mutex_unlock(&tz->lock); - return count; + return ret ? ret : count; } static ssize_t From 15cb56bd529868d9242b22812fc69bd144bfdc94 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Aug 2024 18:23:49 +0200 Subject: [PATCH 2/6] thermal: gov_bang_bang: Adjust states of all uninitialized instances If a cooling device is registered after a thermal zone it should be bound to and the trip point it should be bound to has already been crossed by the zone temperature on the way up, the cooling device's state may need to be adjusted, but the Bang-bang governor will not do that because its .manage() callback only looks at thermal instances for trip points whose thresholds are below or at the zone temperature. Address this by updating bang_bang_manage() to look at all of the uninitialized thermal instances and setting their target states in accordance with the position of the zone temperature with respect to the threshold of the given trip point. Fixes: 5f64b4a1ab1b ("thermal: gov_bang_bang: Add .manage() callback") Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/6103874.lOV4Wx5bFT@rjwysocki.net --- drivers/thermal/gov_bang_bang.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index daed67d19efb8..863e7a4272e66 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -92,23 +92,21 @@ static void bang_bang_manage(struct thermal_zone_device *tz) for_each_trip_desc(tz, td) { const struct thermal_trip *trip = &td->trip; + bool turn_on; - if (tz->temperature >= td->threshold || - trip->temperature == THERMAL_TEMP_INVALID || + if (trip->temperature == THERMAL_TEMP_INVALID || trip->type == THERMAL_TRIP_CRITICAL || trip->type == THERMAL_TRIP_HOT) continue; /* - * If the initial cooling device state is "on", but the zone - * temperature is not above the trip point, the core will not - * call bang_bang_control() until the zone temperature reaches - * the trip point temperature which may be never. In those - * cases, set the initial state of the cooling device to 0. + * Adjust the target states for uninitialized thermal instances + * to the thermal zone temperature and the trip point threshold. */ + turn_on = tz->temperature >= td->threshold; list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if (!instance->initialized && instance->trip == trip) - bang_bang_set_instance_target(instance, 0); + bang_bang_set_instance_target(instance, turn_on); } } From 49029b507e3ad9de35faca497c7bde0e83fcc036 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Aug 2024 18:30:42 +0200 Subject: [PATCH 3/6] thermal: core: Drop redundant lockdep_assert_held() Along the lines of commit 24aad192c671 ("thermal: core: Drop redundant checks from thermal_bind_cdev_to_trip()") notice that thermal_unbind_cdev_from_trip() is only called by thermal_zone_cdev_unbind() under the thermal zone lock, so it need not use lockdep_assert_held() for that lock. No functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3341369.44csPzL39Z@rjwysocki.net --- drivers/thermal/thermal_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 4187f207bce91..9215a7bc92c79 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -891,8 +891,6 @@ static void thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, { struct thermal_instance *pos, *next; - lockdep_assert_held(&tz->lock); - mutex_lock(&cdev->lock); list_for_each_entry_safe(pos, next, &tz->thermal_instances, tz_node) { if (pos->trip == trip && pos->cdev == cdev) { From fcfacd544b74ee76de6fd8642340345669da407b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Aug 2024 18:31:32 +0200 Subject: [PATCH 4/6] thermal: core: Drop dead code from monitor_thermal_zone() Since monitor_thermal_zone() is only called when the given thermal zone has been enabled, as per the thermal_zone_device_is_enabled() check in __thermal_zone_device_update(), the tz->mode check in it always evaluates to "false" and the thermal_zone_device_set_polling() invocation depending on it is dead code, so drop it. No functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/10547425.nUPlyArG6x@rjwysocki.net --- drivers/thermal/thermal_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 9215a7bc92c79..2f11d271300a0 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -364,9 +364,7 @@ static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) static void monitor_thermal_zone(struct thermal_zone_device *tz) { - if (tz->mode != THERMAL_DEVICE_ENABLED) - thermal_zone_device_set_polling(tz, 0); - else if (tz->passive > 0) + if (tz->passive > 0) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); From 3c3ee53df47605476b9ff86a5abc6057b72be89c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Aug 2024 18:32:36 +0200 Subject: [PATCH 5/6] thermal: core: Check passive delay in monitor_thermal_zone() The only case in which thermal_zone_device_set_polling() is called with its second argument equal to zero is when passive cooling is under way and passive_delay_jiffies is 0, which only happens when the given thermal zone is not polled at all. If monitor_thermal_zone() is modified to check passive_delay_jiffies directly, the check of the thermal_zone_device_set_polling() second argument against 0 can be dropped and a passive_delay check can be dropped from thermal_zone_device_register_with_trips(), so change the code accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2004353.PYKUYFuaPT@rjwysocki.net --- drivers/thermal/thermal_core.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 2f11d271300a0..dd9b303a98f51 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -323,11 +323,6 @@ static void thermal_zone_broken_disable(struct thermal_zone_device *tz) static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, unsigned long delay) { - if (!delay) { - cancel_delayed_work(&tz->poll_queue); - return; - } - if (delay > HZ) delay = round_jiffies_relative(delay); @@ -364,7 +359,7 @@ static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) static void monitor_thermal_zone(struct thermal_zone_device *tz) { - if (tz->passive > 0) + if (tz->passive > 0 && tz->passive_delay_jiffies) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); @@ -1411,13 +1406,8 @@ thermal_zone_device_register_with_trips(const char *type, if (num_trips > 0 && !trips) return ERR_PTR(-EINVAL); - if (polling_delay) { - if (passive_delay > polling_delay) - return ERR_PTR(-EINVAL); - - if (!passive_delay) - passive_delay = polling_delay; - } + if (polling_delay && passive_delay > polling_delay) + return ERR_PTR(-EINVAL); if (!thermal_class) return ERR_PTR(-ENODEV); From 54fccad63ec88924db828df6fc0648930bccf345 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Aug 2024 18:37:16 +0200 Subject: [PATCH 6/6] thermal: core: Drop thermal_zone_device_is_enabled() There are only two callers of thermal_zone_device_is_enabled() and one of them call is under the zone lock and the other one uses lockdep_assert_held() on that lock. Thus the lockdep_assert_held() in thermal_zone_device_is_enabled() is redundant and it could be dropped, but then the function would merely become a wrapper around a simple tz->mode check that is more convenient to do directly. Accordingly, drop thermal_zone_device_is_enabled() altogether and update its callers to check tz->mode directly as appropriate. While at it, combine the tz->mode and tz->suspended checks in __thermal_zone_device_update() because they are of a similar category and if any of them evaluates to "true", the outcome is the same. No intentinal functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/9353673.CDJkKcVGEf@rjwysocki.net --- drivers/thermal/thermal_core.c | 12 +----------- drivers/thermal/thermal_core.h | 3 --- drivers/thermal/thermal_sysfs.c | 2 +- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index dd9b303a98f51..073d02e21352e 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -547,10 +547,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, int low = -INT_MAX, high = INT_MAX; int temp, ret; - if (tz->suspended) - return; - - if (!thermal_zone_device_is_enabled(tz)) + if (tz->suspended || tz->mode != THERMAL_DEVICE_ENABLED) return; ret = __thermal_zone_get_temp(tz, &temp); @@ -652,13 +649,6 @@ int thermal_zone_device_disable(struct thermal_zone_device *tz) } EXPORT_SYMBOL_GPL(thermal_zone_device_disable); -int thermal_zone_device_is_enabled(struct thermal_zone_device *tz) -{ - lockdep_assert_held(&tz->lock); - - return tz->mode == THERMAL_DEVICE_ENABLED; -} - static bool thermal_zone_is_present(struct thermal_zone_device *tz) { return !list_empty(&tz->node); diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 9b19b614a1bc3..50b858aa173a0 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -284,7 +284,4 @@ thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, unsigned long new_state) {} #endif /* CONFIG_THERMAL_STATISTICS */ -/* device tree support */ -int thermal_zone_device_is_enabled(struct thermal_zone_device *tz); - #endif /* __THERMAL_CORE_H__ */ diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 197ae12624661..1838aa729bb50 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -53,7 +53,7 @@ mode_show(struct device *dev, struct device_attribute *attr, char *buf) int enabled; mutex_lock(&tz->lock); - enabled = thermal_zone_device_is_enabled(tz); + enabled = tz->mode == THERMAL_DEVICE_ENABLED; mutex_unlock(&tz->lock); return sprintf(buf, "%s\n", enabled ? "enabled" : "disabled");