From 4570ddda43387e5a130dd85e71a1947b0c11da77 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 12 Mar 2021 14:04:07 +0100 Subject: [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code In order to increase the self-encapsulation of the dtpm generic code, the following changes are adding a power update ops to the dtpm ops. That allows the generic code to call directly the dtpm backend function to update the power values. The power update function does compute the power characteristics when the function is invoked. In the case of the CPUs, the power consumption depends on the number of online CPUs. The online CPUs mask is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down callback. That is the reason why the online / offline are at separate state. As there is already an existing state for DTPM, this one is only moved to the DEAD state, so there is no addition of new state with these changes. The dtpm node is not removed when the cpu is unplugged. That simplifies the code for the next changes and results in a more self-encapsulated code. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20210312130411.29833-1-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 54 ++++++------- drivers/powercap/dtpm_cpu.c | 148 ++++++++++++++++-------------------- include/linux/cpuhotplug.h | 2 +- include/linux/dtpm.h | 3 +- 4 files changed, 97 insertions(+), 110 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index c2185ec5f887b..58433b8ef9a19 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm) parent->power_limit -= dtpm->power_limit; parent = parent->parent; } - - __dtpm_rebalance_weight(root); } static void __dtpm_add_power(struct dtpm *dtpm) @@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm) parent->power_limit += dtpm->power_limit; parent = parent->parent; } +} + +static int __dtpm_update_power(struct dtpm *dtpm) +{ + int ret; + + __dtpm_sub_power(dtpm); - __dtpm_rebalance_weight(root); + ret = dtpm->ops->update_power_uw(dtpm); + if (ret) + pr_err("Failed to update power for '%s': %d\n", + dtpm->zone.name, ret); + + if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags)) + dtpm->power_limit = dtpm->power_max; + + __dtpm_add_power(dtpm); + + if (root) + __dtpm_rebalance_weight(root); + + return ret; } /** * dtpm_update_power - Update the power on the dtpm * @dtpm: a pointer to a dtpm structure to update - * @power_min: a u64 representing the new power_min value - * @power_max: a u64 representing the new power_max value * * Function to update the power values of the dtpm node specified in * parameter. These new values will be propagated to the tree. * * Return: zero on success, -EINVAL if the values are inconsistent */ -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max) +int dtpm_update_power(struct dtpm *dtpm) { - int ret = 0; + int ret; mutex_lock(&dtpm_lock); - - if (power_min == dtpm->power_min && power_max == dtpm->power_max) - goto unlock; - - if (power_max < power_min) { - ret = -EINVAL; - goto unlock; - } - - __dtpm_sub_power(dtpm); - - dtpm->power_min = power_min; - dtpm->power_max = power_max; - if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags)) - dtpm->power_limit = power_max; - - __dtpm_add_power(dtpm); - -unlock: + ret = __dtpm_update_power(dtpm); mutex_unlock(&dtpm_lock); return ret; @@ -436,6 +434,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) if (dtpm->ops && !(dtpm->ops->set_power_uw && dtpm->ops->get_power_uw && + dtpm->ops->update_power_uw && dtpm->ops->release)) return -EINVAL; @@ -455,7 +454,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) root = dtpm; } - __dtpm_add_power(dtpm); + if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) + __dtpm_add_power(dtpm); pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n", dtpm->zone.name, dtpm->power_min, dtpm->power_max); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 51c366938acd3..f6076de39540f 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -14,6 +14,8 @@ * The CPU hotplug is supported and the power numbers will be updated * if a CPU is hot plugged / unplugged. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -23,8 +25,6 @@ #include #include -static struct dtpm *__parent; - static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu); struct dtpm_cpu { @@ -32,57 +32,16 @@ struct dtpm_cpu { int cpu; }; -/* - * When a new CPU is inserted at hotplug or boot time, add the power - * contribution and update the dtpm tree. - */ -static int power_add(struct dtpm *dtpm, struct em_perf_domain *em) -{ - u64 power_min, power_max; - - power_min = em->table[0].power; - power_min *= MICROWATT_PER_MILLIWATT; - power_min += dtpm->power_min; - - power_max = em->table[em->nr_perf_states - 1].power; - power_max *= MICROWATT_PER_MILLIWATT; - power_max += dtpm->power_max; - - return dtpm_update_power(dtpm, power_min, power_max); -} - -/* - * When a CPU is unplugged, remove its power contribution from the - * dtpm tree. - */ -static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em) -{ - u64 power_min, power_max; - - power_min = em->table[0].power; - power_min *= MICROWATT_PER_MILLIWATT; - power_min = dtpm->power_min - power_min; - - power_max = em->table[em->nr_perf_states - 1].power; - power_max *= MICROWATT_PER_MILLIWATT; - power_max = dtpm->power_max - power_max; - - return dtpm_update_power(dtpm, power_min, power_max); -} - static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) { struct dtpm_cpu *dtpm_cpu = dtpm->private; - struct em_perf_domain *pd; + struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu); struct cpumask cpus; unsigned long freq; u64 power; int i, nr_cpus; - pd = em_cpu_get(dtpm_cpu->cpu); - cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); - nr_cpus = cpumask_weight(&cpus); for (i = 0; i < pd->nr_perf_states; i++) { @@ -113,6 +72,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) pd = em_cpu_get(dtpm_cpu->cpu); freq = cpufreq_quick_get(dtpm_cpu->cpu); + cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); nr_cpus = cpumask_weight(&cpus); @@ -128,6 +88,27 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) return 0; } +static int update_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu); + struct cpumask cpus; + int nr_cpus; + + cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus)); + nr_cpus = cpumask_weight(&cpus); + + dtpm->power_min = em->table[0].power; + dtpm->power_min *= MICROWATT_PER_MILLIWATT; + dtpm->power_min *= nr_cpus; + + dtpm->power_max = em->table[em->nr_perf_states - 1].power; + dtpm->power_max *= MICROWATT_PER_MILLIWATT; + dtpm->power_max *= nr_cpus; + + return 0; +} + static void pd_release(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = dtpm->private; @@ -139,39 +120,24 @@ static void pd_release(struct dtpm *dtpm) } static struct dtpm_ops dtpm_ops = { - .set_power_uw = set_pd_power_limit, - .get_power_uw = get_pd_power_uw, - .release = pd_release, + .set_power_uw = set_pd_power_limit, + .get_power_uw = get_pd_power_uw, + .update_power_uw = update_pd_power_uw, + .release = pd_release, }; static int cpuhp_dtpm_cpu_offline(unsigned int cpu) { - struct cpufreq_policy *policy; struct em_perf_domain *pd; struct dtpm *dtpm; - policy = cpufreq_cpu_get(cpu); - - if (!policy) - return 0; - pd = em_cpu_get(cpu); if (!pd) return -EINVAL; dtpm = per_cpu(dtpm_per_cpu, cpu); - power_sub(dtpm, pd); - - if (cpumask_weight(policy->cpus) != 1) - return 0; - - for_each_cpu(cpu, policy->related_cpus) - per_cpu(dtpm_per_cpu, cpu) = NULL; - - dtpm_unregister(dtpm); - - return 0; + return dtpm_update_power(dtpm); } static int cpuhp_dtpm_cpu_online(unsigned int cpu) @@ -184,7 +150,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) int ret = -ENOMEM; policy = cpufreq_cpu_get(cpu); - if (!policy) return 0; @@ -194,7 +159,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) dtpm = per_cpu(dtpm_per_cpu, cpu); if (dtpm) - return power_add(dtpm, pd); + return dtpm_update_power(dtpm); dtpm = dtpm_alloc(&dtpm_ops); if (!dtpm) @@ -210,27 +175,20 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) for_each_cpu(cpu, policy->related_cpus) per_cpu(dtpm_per_cpu, cpu) = dtpm; - sprintf(name, "cpu%d", dtpm_cpu->cpu); + snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu); - ret = dtpm_register(name, dtpm, __parent); + ret = dtpm_register(name, dtpm, NULL); if (ret) goto out_kfree_dtpm_cpu; - ret = power_add(dtpm, pd); - if (ret) - goto out_dtpm_unregister; - ret = freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, pd->table[pd->nr_perf_states - 1].frequency); if (ret) - goto out_power_sub; + goto out_dtpm_unregister; return 0; -out_power_sub: - power_sub(dtpm, pd); - out_dtpm_unregister: dtpm_unregister(dtpm); dtpm_cpu = NULL; @@ -248,10 +206,38 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) int dtpm_register_cpu(struct dtpm *parent) { - __parent = parent; + int ret; + + /* + * The callbacks at CPU hotplug time are calling + * dtpm_update_power() which in turns calls update_pd_power(). + * + * The function update_pd_power() uses the online mask to + * figure out the power consumption limits. + * + * At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU + * online mask when the cpuhp_dtpm_cpu_online function is + * called, but the CPU is still in the online mask for the + * tear down callback. So the power can not be updated when + * the CPU is unplugged. + * + * At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as + * above. The CPU online mask is not up to date when the CPU + * is plugged in. + * + * For this reason, we need to call the online and offline + * callbacks at different moments when the CPU online mask is + * consistent with the power numbers we want to update. + */ + ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline", + NULL, cpuhp_dtpm_cpu_offline); + if (ret < 0) + return ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online", + cpuhp_dtpm_cpu_online, NULL); + if (ret < 0) + return ret; - return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE, - "dtpm_cpu:online", - cpuhp_dtpm_cpu_online, - cpuhp_dtpm_cpu_offline); + return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 832d8a74fa596..ad9a34a80440e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -97,6 +97,7 @@ enum cpuhp_state { CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, + CPUHP_AP_DTPM_CPU_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -242,7 +243,6 @@ enum cpuhp_state { CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, - CPUHP_AP_DTPM_CPU_ONLINE, CPUHP_AP_ACTIVE, CPUHP_ONLINE, }; diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index e80a332e3d8a1..acf8d3638988a 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -29,6 +29,7 @@ struct dtpm { struct dtpm_ops { u64 (*set_power_uw)(struct dtpm *, u64); u64 (*get_power_uw)(struct dtpm *); + int (*update_power_uw)(struct dtpm *); void (*release)(struct dtpm *); }; @@ -62,7 +63,7 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone) return container_of(zone, struct dtpm, zone); } -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max); +int dtpm_update_power(struct dtpm *dtpm); int dtpm_release_zone(struct powercap_zone *pcz); From 7a89d7eacf8e84f2afb94db5ae9d9f9faa93f01c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 12 Mar 2021 14:04:09 +0100 Subject: [PATCH 2/5] powercap/drivers/dtpm: Simplify the dtpm table The dtpm table is an array of pointers, that forces the user of the table to define initdata along with the declaration of the table entry. It is more efficient to create an array of dtpm structure, so the declaration of the table entry can be done by initializing the different fields. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20210312130411.29833-3-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 4 ++-- drivers/powercap/dtpm_cpu.c | 4 +++- include/linux/dtpm.h | 20 +++++++++----------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 58433b8ef9a19..8c032398f6ceb 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -467,7 +467,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) static int __init dtpm_init(void) { - struct dtpm_descr **dtpm_descr; + struct dtpm_descr *dtpm_descr; pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { @@ -476,7 +476,7 @@ static int __init dtpm_init(void) } for_each_dtpm_table(dtpm_descr) - (*dtpm_descr)->init(*dtpm_descr); + dtpm_descr->init(); return 0; } diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index f6076de39540f..98841524a782b 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -204,7 +204,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) return ret; } -int dtpm_register_cpu(struct dtpm *parent) +static int __init dtpm_cpu_init(void) { int ret; @@ -241,3 +241,5 @@ int dtpm_register_cpu(struct dtpm *parent) return 0; } + +DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init); diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index acf8d3638988a..1e53db6bd5f98 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -33,25 +33,23 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; -struct dtpm_descr; - -typedef int (*dtpm_init_t)(struct dtpm_descr *); +typedef int (*dtpm_init_t)(void); struct dtpm_descr { - struct dtpm *parent; - const char *name; dtpm_init_t init; }; /* Init section thermal table */ -extern struct dtpm_descr *__dtpm_table[]; -extern struct dtpm_descr *__dtpm_table_end[]; +extern struct dtpm_descr __dtpm_table[]; +extern struct dtpm_descr __dtpm_table_end[]; -#define DTPM_TABLE_ENTRY(name) \ - static typeof(name) *__dtpm_table_entry_##name \ - __used __section("__dtpm_table") = &name +#define DTPM_TABLE_ENTRY(name, __init) \ + static struct dtpm_descr __dtpm_table_entry_##name \ + __used __section("__dtpm_table") = { \ + .init = __init, \ + } -#define DTPM_DECLARE(name) DTPM_TABLE_ENTRY(name) +#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init) #define for_each_dtpm_table(__dtpm) \ for (__dtpm = __dtpm_table; \ From d2cdc6adc30879d81160199fc7c6ab890fc4bd4c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 12 Mar 2021 14:04:10 +0100 Subject: [PATCH 3/5] powercap/drivers/dtpm: Use container_of instead of a private data field The dtpm framework provides an API to allocate a dtpm node. However when a backend dtpm driver needs to allocate a dtpm node it must define its own structure and store the pointer of this structure in the private field of the dtpm structure. It is more elegant to use the container_of macro and add the dtpm structure inside the dtpm backend specific structure. The code will be able to deal properly with the dtpm structure as a generic entity, making all this even more self-encapsulated. The dtpm_alloc() function does no longer make sense as the dtpm structure will be allocated when allocating the device specific dtpm structure. The dtpm_init() is provided instead. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20210312130411.29833-4-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 18 +++++--------- drivers/powercap/dtpm_cpu.c | 48 ++++++++++++++++++------------------- include/linux/dtpm.h | 3 +-- 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 8c032398f6ceb..c7c5529b61eb6 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -357,24 +357,18 @@ static struct powercap_zone_ops zone_ops = { }; /** - * dtpm_alloc - Allocate and initialize a dtpm struct - * @name: a string specifying the name of the node - * - * Return: a struct dtpm pointer, NULL in case of error + * dtpm_init - Allocate and initialize a dtpm struct + * @dtpm: The dtpm struct pointer to be initialized + * @ops: The dtpm device specific ops, NULL for a virtual node */ -struct dtpm *dtpm_alloc(struct dtpm_ops *ops) +void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops) { - struct dtpm *dtpm; - - dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL); if (dtpm) { INIT_LIST_HEAD(&dtpm->children); INIT_LIST_HEAD(&dtpm->sibling); dtpm->weight = 1024; dtpm->ops = ops; } - - return dtpm; } /** @@ -465,7 +459,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) return 0; } -static int __init dtpm_init(void) +static int __init init_dtpm(void) { struct dtpm_descr *dtpm_descr; @@ -480,4 +474,4 @@ static int __init dtpm_init(void) return 0; } -late_initcall(dtpm_init); +late_initcall(init_dtpm); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 98841524a782b..2e21e4e2b01f5 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -25,16 +25,22 @@ #include #include -static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu); - struct dtpm_cpu { + struct dtpm dtpm; struct freq_qos_request qos_req; int cpu; }; +static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu); + +static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm) +{ + return container_of(dtpm, struct dtpm_cpu, dtpm); +} + static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu); struct cpumask cpus; unsigned long freq; @@ -64,7 +70,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) static u64 get_pd_power_uw(struct dtpm *dtpm) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *pd; struct cpumask cpus; unsigned long freq; @@ -90,7 +96,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) static int update_pd_power_uw(struct dtpm *dtpm) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu); struct cpumask cpus; int nr_cpus; @@ -111,7 +117,7 @@ static int update_pd_power_uw(struct dtpm *dtpm) static void pd_release(struct dtpm *dtpm) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); if (freq_qos_request_active(&dtpm_cpu->qos_req)) freq_qos_remove_request(&dtpm_cpu->qos_req); @@ -129,20 +135,19 @@ static struct dtpm_ops dtpm_ops = { static int cpuhp_dtpm_cpu_offline(unsigned int cpu) { struct em_perf_domain *pd; - struct dtpm *dtpm; + struct dtpm_cpu *dtpm_cpu; pd = em_cpu_get(cpu); if (!pd) return -EINVAL; - dtpm = per_cpu(dtpm_per_cpu, cpu); + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); - return dtpm_update_power(dtpm); + return dtpm_update_power(&dtpm_cpu->dtpm); } static int cpuhp_dtpm_cpu_online(unsigned int cpu) { - struct dtpm *dtpm; struct dtpm_cpu *dtpm_cpu; struct cpufreq_policy *policy; struct em_perf_domain *pd; @@ -157,27 +162,23 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) if (!pd) return -EINVAL; - dtpm = per_cpu(dtpm_per_cpu, cpu); - if (dtpm) - return dtpm_update_power(dtpm); - - dtpm = dtpm_alloc(&dtpm_ops); - if (!dtpm) - return -EINVAL; + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); + if (dtpm_cpu) + return dtpm_update_power(&dtpm_cpu->dtpm); dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); if (!dtpm_cpu) - goto out_kfree_dtpm; + return -ENOMEM; - dtpm->private = dtpm_cpu; + dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops); dtpm_cpu->cpu = cpu; for_each_cpu(cpu, policy->related_cpus) - per_cpu(dtpm_per_cpu, cpu) = dtpm; + per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu; snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu); - ret = dtpm_register(name, dtpm, NULL); + ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL); if (ret) goto out_kfree_dtpm_cpu; @@ -190,17 +191,14 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) return 0; out_dtpm_unregister: - dtpm_unregister(dtpm); + dtpm_unregister(&dtpm_cpu->dtpm); dtpm_cpu = NULL; - dtpm = NULL; out_kfree_dtpm_cpu: for_each_cpu(cpu, policy->related_cpus) per_cpu(dtpm_per_cpu, cpu) = NULL; kfree(dtpm_cpu); -out_kfree_dtpm: - kfree(dtpm); return ret; } diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index 1e53db6bd5f98..2890f6370eb9e 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -23,7 +23,6 @@ struct dtpm { u64 power_max; u64 power_min; int weight; - void *private; }; struct dtpm_ops { @@ -65,7 +64,7 @@ int dtpm_update_power(struct dtpm *dtpm); int dtpm_release_zone(struct powercap_zone *pcz); -struct dtpm *dtpm_alloc(struct dtpm_ops *ops); +void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops); void dtpm_unregister(struct dtpm *dtpm); From eb82bace893169b319c563b7f813c58a0a5a9f76 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 12 Mar 2021 14:04:11 +0100 Subject: [PATCH 4/5] powercap/drivers/dtpm: Scale the power with the load Currently the power consumption is based on the current OPP power assuming the entire performance domain is fully loaded. That gives very gross power estimation and we can do much better by using the load to scale the power consumption. Use the utilization to normalize and scale the power usage over the max possible power. Tested on a rock960 with 2 big CPUS, the power consumption estimation conforms with the expected one. Before this change: ~$ ~/dhrystone -t 1 -l 10000& ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw 2260000 After this change: ~$ ~/dhrystone -t 1 -l 10000& ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw 1130000 ~$ ~/dhrystone -t 2 -l 10000& ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw 2260000 Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20210312130411.29833-5-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 46 +++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 2e21e4e2b01f5..44faa3a74db6a 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -68,27 +68,59 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) return power_limit; } +static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power) +{ + unsigned long max = 0, sum_util = 0; + int cpu; + + for_each_cpu_and(cpu, pd_mask, cpu_online_mask) { + + /* + * The capacity is the same for all CPUs belonging to + * the same perf domain, so a single call to + * arch_scale_cpu_capacity() is enough. However, we + * need the CPU parameter to be initialized by the + * loop, so the call ends up in this block. + * + * We can initialize 'max' with a cpumask_first() call + * before the loop but the bits computation is not + * worth given the arch_scale_cpu_capacity() just + * returns a value where the resulting assembly code + * will be optimized by the compiler. + */ + max = arch_scale_cpu_capacity(cpu); + sum_util += sched_cpu_util(cpu, max); + } + + /* + * In the improbable case where all the CPUs of the perf + * domain are offline, 'max' will be zero and will lead to an + * illegal operation with a zero division. + */ + return max ? (power * ((sum_util << 10) / max)) >> 10 : 0; +} + static u64 get_pd_power_uw(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *pd; - struct cpumask cpus; + struct cpumask *pd_mask; unsigned long freq; - int i, nr_cpus; + int i; pd = em_cpu_get(dtpm_cpu->cpu); - freq = cpufreq_quick_get(dtpm_cpu->cpu); - cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); - nr_cpus = cpumask_weight(&cpus); + pd_mask = em_span_cpus(pd); + + freq = cpufreq_quick_get(dtpm_cpu->cpu); for (i = 0; i < pd->nr_perf_states; i++) { if (pd->table[i].frequency < freq) continue; - return pd->table[i].power * - MICROWATT_PER_MILLIWATT * nr_cpus; + return scale_pd_power_uw(pd_mask, pd->table[i].power * + MICROWATT_PER_MILLIWATT); } return 0; From 5d8cb8db9f791252ef5b7951b6d8cc6281de60a6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 18 Mar 2021 21:52:38 +0100 Subject: [PATCH 5/5] powercap/drivers/dtpm: Fix power limit initialization When a DTPM node is registered its power limit must be initialized to the power max. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210318205238.21937-1-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index c7c5529b61eb6..b9fac786246ab 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -448,8 +448,10 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) root = dtpm; } - if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) + if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) { __dtpm_add_power(dtpm); + dtpm->power_limit = dtpm->power_max; + } pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n", dtpm->zone.name, dtpm->power_min, dtpm->power_max);