Skip to content

Commit

Permalink
x86/sched: Add basic support for CPU capacity scaling
Browse files Browse the repository at this point in the history
In order be able to compute the sizes of tasks consistently across all
CPUs in a hybrid system, it is necessary to provide CPU capacity scaling
information to the scheduler via arch_scale_cpu_capacity().  Moreover,
the value returned by arch_scale_freq_capacity() for the given CPU must
correspond to the arch_scale_cpu_capacity() return value for it, or
utilization computations will be inaccurate.

Add support for it through per-CPU variables holding the capacity and
maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will
be returned by arch_scale_cpu_capacity() and used by scale_freq_tick()
to compute arch_freq_scale for the current CPU, respectively.

In order to avoid adding measurable overhead for non-hybrid x86 systems,
which are the vast majority in the field, whether or not the new hybrid
CPU capacity scaling will be in effect is controlled by a static key.
This static key is set by calling arch_enable_hybrid_capacity_scale()
which also allocates memory for the per-CPU data and initializes it.
Next, arch_set_cpu_capacity() is used to set the per-CPU variables
mentioned above for each CPU and arch_rebuild_sched_domains() needs
to be called for the scheduler to realize that capacity-aware
scheduling can be used going forward.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> # scale invariance
Link: https://patch.msgid.link/10523497.nUPlyArG6x@rjwysocki.net
[ rjw: Added parens to function kerneldoc comments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  • Loading branch information
Rafael J. Wysocki committed Sep 4, 2024
1 parent c4a6c82 commit 5a9d101
Showing 2 changed files with 100 additions and 2 deletions.
13 changes: 13 additions & 0 deletions arch/x86/include/asm/topology.h
Original file line number Diff line number Diff line change
@@ -282,9 +282,22 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity

bool arch_enable_hybrid_capacity_scale(void);
void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
unsigned long cap_freq, unsigned long base_freq);

unsigned long arch_scale_cpu_capacity(int cpu);
#define arch_scale_cpu_capacity arch_scale_cpu_capacity

extern void arch_set_max_freq_ratio(bool turbo_disabled);
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
unsigned long max_cap,
unsigned long cap_freq,
unsigned long base_freq) { }

static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
89 changes: 87 additions & 2 deletions arch/x86/kernel/cpu/aperfmperf.c
Original file line number Diff line number Diff line change
@@ -349,17 +349,102 @@ static DECLARE_WORK(disable_freq_invariance_work,
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);

static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);

struct arch_hybrid_cpu_scale {
unsigned long capacity;
unsigned long freq_ratio;
};

static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;

/**
* arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling
*
* Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
* initialize it and set the static key controlling its code paths.
*
* Must be called before arch_set_cpu_capacity().
*/
bool arch_enable_hybrid_capacity_scale(void)
{
int cpu;

if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
return true;
}

arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
if (!arch_cpu_scale)
return false;

for_each_possible_cpu(cpu) {
per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
}

static_branch_enable(&arch_hybrid_cap_scale_key);

pr_info("Hybrid CPU capacity scaling enabled\n");

return true;
}

/**
* arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU
* @cpu: Target CPU.
* @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
* @max_cap: System-wide maximum CPU capacity.
* @cap_freq: Frequency of @cpu corresponding to @cap.
* @base_freq: Frequency of @cpu at which MPERF counts.
*
* The units in which @cap and @max_cap are expressed do not matter, so long
* as they are consistent, because the former is effectively divided by the
* latter. Analogously for @cap_freq and @base_freq.
*
* After calling this function for all CPUs, call arch_rebuild_sched_domains()
* to let the scheduler know that capacity-aware scheduling can be used going
* forward.
*/
void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
unsigned long cap_freq, unsigned long base_freq)
{
if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
} else {
WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
}
}

unsigned long arch_scale_cpu_capacity(int cpu)
{
if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);

return SCHED_CAPACITY_SCALE;
}
EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);

static void scale_freq_tick(u64 acnt, u64 mcnt)
{
u64 freq_scale;
u64 freq_scale, freq_ratio;

if (!arch_scale_freq_invariant())
return;

if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
goto error;

if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
else
freq_ratio = arch_max_freq_ratio;

if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
goto error;

freq_scale = div64_u64(acnt, mcnt);

0 comments on commit 5a9d101

Please sign in to comment.