Skip to content

Commit

Permalink
Merge Energy Model material for 5.19 to satisfy dependencies.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafael J. Wysocki committed May 6, 2022
2 parents f55ae08 + 985a677 commit 46acb9d
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 47 deletions.
24 changes: 22 additions & 2 deletions Documentation/power/energy-model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
(static + dynamic). These power values might be coming directly from
experiments and measurements.

Registration of 'artificial' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

There is an option to provide a custom callback for drivers missing detailed
knowledge about power value for each performance state. The callback
.get_cost() is optional and provides the 'cost' values used by the EAS.
This is useful for platforms that only provide information on relative
efficiency between CPU types, where one could use the information to
create an abstract power model. But even an abstract power model can
sometimes be hard to fit in, given the input power value size restrictions.
The .get_cost() allows to provide the 'cost' values which reflect the
efficiency of the CPUs. This would allow to provide EAS information which
has different relation than what would be forced by the EM internal
formulas calculating 'cost' values. To register an EM for such platform, the
driver must set the flag 'milliwatts' to 0, provide .get_power() callback
and provide .get_cost() callback. The EM framework would handle such platform
properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
platform. Special care should be taken by other frameworks which are using EM
to test and treat this flag properly.

Registration of 'simple' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -181,8 +201,8 @@ EM framework::

-> drivers/cpufreq/foo_cpufreq.c

01 static int est_power(unsigned long *mW, unsigned long *KHz,
02 struct device *dev)
01 static int est_power(struct device *dev, unsigned long *mW,
02 unsigned long *KHz)
03 {
04 long freq, power;
05
Expand Down
4 changes: 2 additions & 2 deletions drivers/cpufreq/mediatek-cpufreq-hw.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};

static int __maybe_unused
mtk_cpufreq_get_cpu_power(unsigned long *mW,
unsigned long *KHz, struct device *cpu_dev)
mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
struct cpufreq_policy *policy;
Expand Down
4 changes: 2 additions & 2 deletions drivers/cpufreq/scmi-cpufreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
}

static int __maybe_unused
scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
struct device *cpu_dev)
scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
unsigned long *KHz)
{
unsigned long Hz;
int ret, domain;
Expand Down
6 changes: 3 additions & 3 deletions drivers/opp/of.c
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
Expand Down Expand Up @@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
struct device *dev)
static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
unsigned long *kHz)
{
struct dev_pm_opp *opp;
struct device_node *np;
Expand Down
2 changes: 1 addition & 1 deletion drivers/powercap/dtpm_cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
return 0;

pd = em_cpu_get(cpu);
if (!pd)
if (!pd || em_is_artificial(pd))
return -EINVAL;

dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
Expand Down
2 changes: 1 addition & 1 deletion drivers/thermal/cpufreq_cooling.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
struct cpufreq_policy *policy;
unsigned int nr_levels;

if (!em)
if (!em || em_is_artificial(em))
return false;

policy = cpufreq_cdev->policy;
Expand Down
8 changes: 5 additions & 3 deletions drivers/thermal/devfreq_cooling.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
struct em_perf_domain *em;
char *name;
int err, num_opps;

Expand All @@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,

dfc->devfreq = df;

dfc->em_pd = em_pd_get(dev);
if (dfc->em_pd) {
em = em_pd_get(dev);
if (em && !em_is_artificial(em)) {
dfc->em_pd = em;
devfreq_cooling_ops.get_requested_power =
devfreq_cooling_get_requested_power;
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
Expand All @@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
num_opps = em_pd_nr_perf_states(dfc->em_pd);
} else {
/* Backward compatibility for drivers which do not use IPA */
dev_dbg(dev, "missing EM for cooling device\n");
dev_dbg(dev, "missing proper EM for cooling device\n");

num_opps = dev_pm_opp_get_opp_count(dev);

Expand Down
35 changes: 31 additions & 4 deletions include/linux/energy_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,16 @@ struct em_perf_domain {
*
* EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
* energy consumption.
*
* EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be
* created by platform missing real power information
*/
#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2)

#define em_span_cpus(em) (to_cpumask((em)->cpus))
#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL)

#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
Expand All @@ -96,11 +101,11 @@ struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
* a device
* @dev : Device for which we do this operation (can be a CPU)
* @power : Active power at the performance state
* (modified)
* @freq : Frequency at the performance state in kHz
* (modified)
* @dev : Device for which we do this operation (can be a CPU)
*
* active_power() must find the lowest performance state of 'dev' above
* 'freq' and update 'power' and 'freq' to the matching active power
Expand All @@ -112,11 +117,32 @@ struct em_data_callback {
*
* Return 0 on success.
*/
int (*active_power)(unsigned long *power, unsigned long *freq,
struct device *dev);
int (*active_power)(struct device *dev, unsigned long *power,
unsigned long *freq);

/**
* get_cost() - Provide the cost at the given performance state of
* a device
* @dev : Device for which we do this operation (can be a CPU)
* @freq : Frequency at the performance state in kHz
* @cost : The cost value for the performance state
* (modified)
*
* In case of CPUs, the cost is the one of a single CPU in the domain.
* It is expected to fit in the [0, EM_MAX_POWER] range due to internal
* usage in EAS calculation.
*
* Return 0 on success, or appropriate error value in case of failure.
*/
int (*get_cost)(struct device *dev, unsigned long freq,
unsigned long *cost);
};
#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb)
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \
{ .active_power = _active_power_cb, \
.get_cost = _cost_cb }
#define EM_DATA_CB(_active_power_cb) \
EM_ADV_DATA_CB(_active_power_cb, NULL)

struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
Expand Down Expand Up @@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)

#else
struct em_data_callback {};
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
#define EM_DATA_CB(_active_power_cb) { }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0)

Expand Down
63 changes: 34 additions & 29 deletions kernel/power/energy_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);

static int em_debug_units_show(struct seq_file *s, void *unused)
static int em_debug_flags_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
"milliWatts" : "bogoWatts";

seq_printf(s, "%s\n", units);
seq_printf(s, "%#lx\n", pd->flags);

return 0;
}
DEFINE_SHOW_ATTRIBUTE(em_debug_units);

static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;

seq_printf(s, "%d\n", enabled);

return 0;
}
DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
DEFINE_SHOW_ATTRIBUTE(em_debug_flags);

static void em_debug_create_pd(struct device *dev)
{
Expand All @@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev)
debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
&em_debug_cpus_fops);

debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
&em_debug_skip_inefficiencies_fops);
debugfs_create_file("flags", 0444, d, dev->em_pd,
&em_debug_flags_fops);

/* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
Expand Down Expand Up @@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {}
#endif

static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
int nr_states, struct em_data_callback *cb)
int nr_states, struct em_data_callback *cb,
unsigned long flags)
{
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
Expand All @@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
* lowest performance state of 'dev' above 'freq' and updates
* 'power' and 'freq' accordingly.
*/
ret = cb->active_power(&power, &freq, dev);
ret = cb->active_power(dev, &power, &freq);
if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret);
Expand Down Expand Up @@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = nr_states - 1; i >= 0; i--) {
unsigned long power_res = em_scale_power(table[i].power);
unsigned long power_res, cost;

if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
ret = cb->get_cost(dev, table[i].frequency, &cost);
if (ret || !cost || cost > EM_MAX_POWER) {
dev_err(dev, "EM: invalid cost %lu %d\n",
cost, ret);
goto free_ps_table;
}
} else {
power_res = em_scale_power(table[i].power);
cost = div64_u64(fmax * power_res, table[i].frequency);
}

table[i].cost = cost;

table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
if (table[i].cost >= prev_cost) {
table[i].flags = EM_PERF_STATE_INEFFICIENT;
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
Expand All @@ -197,7 +196,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
}

static int em_create_pd(struct device *dev, int nr_states,
struct em_data_callback *cb, cpumask_t *cpus)
struct em_data_callback *cb, cpumask_t *cpus,
unsigned long flags)
{
struct em_perf_domain *pd;
struct device *cpu_dev;
Expand All @@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states,
return -ENOMEM;
}

ret = em_create_perf_table(dev, pd, nr_states, cb);
ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
if (ret) {
kfree(pd);
return ret;
Expand Down Expand Up @@ -332,6 +332,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
bool milliwatts)
{
unsigned long cap, prev_cap = 0;
unsigned long flags = 0;
int cpu, ret;

if (!dev || !nr_states || !cb)
Expand Down Expand Up @@ -378,12 +379,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
}
}

ret = em_create_pd(dev, nr_states, cb, cpus);
if (milliwatts)
flags |= EM_PERF_DOMAIN_MILLIWATTS;
else if (cb->get_cost)
flags |= EM_PERF_DOMAIN_ARTIFICIAL;

ret = em_create_pd(dev, nr_states, cb, cpus, flags);
if (ret)
goto unlock;

if (milliwatts)
dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
dev->em_pd->flags |= flags;

em_cpufreq_update_efficiencies(dev);

Expand Down

0 comments on commit 46acb9d

Please sign in to comment.