From 9b18d536b124357fee56d82b1462c02f78d219e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Dec 2024 12:39:05 +0100 Subject: [PATCH 01/40] cpufreq: intel_pstate: Use CPPC to get scaling factors The perf-to-frequency scaling factors are used by intel_pstate on hybrid platforms to cast performance levels to frequency on different types of CPUs which is needed because the generic cpufreq sysfs interface works in the frequency domain. For some hybrid platforms already in the field, the scaling factors are known, but for others (including some upcoming ones) they most likely will be different and the only way to get them that scales is to use information provided by the platform firmware. In this particular case, the requisite information can be obtained via CPPC. If the P-core hybrid scaling factor for the given processor model is not known, use CPPC to compute hybrid scaling factors for all CPUs. Since the current default hybrid scaling factor is only suitable for a few early hybrid platforms, add intel_hybrid_scaling_factor[] entries for them and initialize the scaling factor to zero ("unknown") by default. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/8476313.T7Z3S40VBb@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b8e2396a708ad..e16b27c35cfba 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -302,11 +303,11 @@ static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; -#define HYBRID_SCALING_FACTOR 78741 +#define HYBRID_SCALING_FACTOR_ADL 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 #define HYBRID_SCALING_FACTOR_LNL 86957 -static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR; +static int hybrid_scaling_factor; static inline int core_get_scaling(void) { @@ -414,18 +415,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) static int intel_pstate_cppc_get_scaling(int cpu) { struct cppc_perf_caps cppc_perf; - int ret; - - ret = cppc_get_perf_caps(cpu, &cppc_perf); /* - * If the nominal frequency and the nominal performance are not - * zero and the ratio between them is not 100, return the hybrid - * scaling factor. + * Compute the perf-to-frequency scaling factor for the given CPU if + * possible, unless it would be 0. */ - if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq && - cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq) - return hybrid_scaling_factor; + if (!cppc_get_perf_caps(cpu, &cppc_perf) && + cppc_perf.nominal_perf && cppc_perf.nominal_freq) + return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ, + cppc_perf.nominal_perf); return core_get_scaling(); } @@ -2211,24 +2209,30 @@ static void hybrid_get_type(void *data) static int hwp_get_cpu_scaling(int cpu) { - u8 cpu_type = 0; + if (hybrid_scaling_factor) { + u8 cpu_type = 0; - smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* P-cores have a smaller perf level-to-freqency scaling factor. */ - if (cpu_type == 0x40) - return hybrid_scaling_factor; + smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* Use default core scaling for E-cores */ - if (cpu_type == 0x20) + /* + * Return the hybrid scaling factor for P-cores and use the + * default core scaling for E-cores. + */ + if (cpu_type == 0x40) + return hybrid_scaling_factor; + + if (cpu_type == 0x20) + return core_get_scaling(); + } + + /* Use core scaling on non-hybrid systems. */ + if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return core_get_scaling(); /* - * If reached here, this system is either non-hybrid (like Tiger - * Lake) or hybrid-capable (like Alder Lake or Raptor Lake) with - * no E cores (in which case CPUID for hybrid support is 0). - * - * The CPPC nominal_frequency field is 0 for non-hybrid systems, - * so the default core scaling will be used for them. + * The system is hybrid, but the hybrid scaling factor is not known or + * the CPU type is not one of the above, so use CPPC to compute the + * scaling factor for this CPU. */ return intel_pstate_cppc_get_scaling(cpu); } @@ -3665,6 +3669,11 @@ static const struct x86_cpu_id intel_epp_default[] = { }; static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { + X86_MATCH_VFM(INTEL_ALDERLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), From 20e20f83dd88a25c1e4ab0a3838e9a4ce583c30d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Dec 2024 12:40:19 +0100 Subject: [PATCH 02/40] cpufreq: intel_pstate: Drop Arrow Lake from "scaling factor" list Since HYBRID_SCALING_FACTOR_MTL is not going to be suitable for Arrow Lake in general, drop it from the "known hybrid scaling factors" list of platforms, so the scaling factor for it will be determined with the help of information provided by the platform firmware via CPPC. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2307515.iZASKD2KPV@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e16b27c35cfba..9e14374498d6e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3675,7 +3675,6 @@ static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), - X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), {} }; From 16c977f8177f9c2ecb88319c944722107c952731 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:38 +0000 Subject: [PATCH 03/40] cpufreq/amd-pstate: Convert the amd_pstate_get/set_epp() to static calls MSR and shared memory based systems have different mechanisms to get and set the epp value. Split those mechanisms into different functions and assign them appropriately to the static calls at boot time. This eliminates the need for the "if(cpu_feature_enabled(X86_FEATURE_CPPC))" checks at runtime. Also, propagate the error code from rdmsrl_on_cpu() and cppc_get_epp_perf() to *_get_epp()'s caller, instead of returning -EIO unconditionally. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-2-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 92 +++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 66e5dfc711c0c..bc42d96984f4c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -180,26 +180,40 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) { u64 epp; int ret; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); - if (epp) - return epp; - } - epp = (cppc_req_cached >> 24) & 0xFF; - } else { - ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (!cppc_req_cached) { + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); if (ret < 0) { pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return -EIO; + return ret; } } + epp = (cppc_req_cached >> 24) & 0xFF; + + return (s16)epp; +} + +DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); + +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +{ + return static_call(amd_pstate_get_epp)(cpudata, cppc_req_cached); +} + +static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) +{ + u64 epp; + int ret; + + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; + } return (s16)(epp & 0xff); } @@ -253,33 +267,45 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, max_perf, fast_switch); } -static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { int ret; - struct cppc_perf_ctrls perf_ctrls; - - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - WRITE_ONCE(cpudata->cppc_req_cached, value); + u64 value = READ_ONCE(cpudata->cppc_req_cached); - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; - } else { - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + WRITE_ONCE(cpudata->cppc_req_cached, value); - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { - pr_debug("failed to set energy perf value (%d)\n", ret); - return ret; - } + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (!ret) cpudata->epp_cached = epp; + + return ret; +} + +DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); + +static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + return static_call(amd_pstate_set_epp)(cpudata, epp); +} + +static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + int ret; + struct cppc_perf_ctrls perf_ctrls; + + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, false); + + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; } + cpudata->epp_cached = epp; return ret; } @@ -1869,6 +1895,8 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); static_call_update(amd_pstate_init_perf, shmem_init_perf); static_call_update(amd_pstate_update_perf, shmem_update_perf); + static_call_update(amd_pstate_get_epp, shmem_get_epp); + static_call_update(amd_pstate_set_epp, shmem_set_epp); } if (amd_pstate_prefcore) { From 57a2b25e45cd40eaa2e505452384fa1b7248895a Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:39 +0000 Subject: [PATCH 04/40] cpufreq/amd-pstate: Move the invocation of amd_pstate_update_perf() amd_pstate_update_perf() should not be a part of shmem_set_epp() function, so move it to the amd_pstate_epp_update_limit() function, where it is needed. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-3-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bc42d96984f4c..bd3e0f113a88c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -296,9 +296,6 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - perf_ctrls.energy_perf = epp; ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) { @@ -1600,6 +1597,10 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = 0; WRITE_ONCE(cpudata->cppc_req_cached, value); + + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, false); + return amd_pstate_set_epp(cpudata, epp); } From b1089e0c8817fda93d474eaa82ad86386887aefe Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:40 +0000 Subject: [PATCH 05/40] cpufreq/amd-pstate: Refactor amd_pstate_epp_reenable() and amd_pstate_epp_offline() Replace similar code chunks with amd_pstate_update_perf() and amd_pstate_set_epp() function calls. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-4-Dhananjay.Ugwekar@amd.com [ML: Fix LKP reported error about unused variable] Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 38 +++++++----------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bd3e0f113a88c..23c5840dc4069 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1632,25 +1632,17 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) { - struct cppc_perf_ctrls perf_ctrls; - u64 value, max_perf; + u64 max_perf; int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } + amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); + amd_pstate_set_epp(cpudata, cpudata->epp_cached); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) @@ -1670,31 +1662,15 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static void amd_pstate_epp_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; int min_perf; - u64 value; min_perf = READ_ONCE(cpudata->lowest_perf); - value = READ_ONCE(cpudata->cppc_req_cached); mutex_lock(&amd_pstate_limits_lock); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - - /* Set max perf same as min perf */ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(min_perf); - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; - perf_ctrls.min_perf = min_perf; - perf_ctrls.max_perf = min_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } + + amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); + amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); + mutex_unlock(&amd_pstate_limits_lock); } From b78f8c87ec3e7499bb049986838636d3afbc7ece Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:41 +0000 Subject: [PATCH 06/40] cpufreq/amd-pstate: Remove the cppc_state check in offline/online functions Only amd_pstate_epp driver (i.e. cppc_state = ACTIVE) enters the amd_pstate_epp_offline() and amd_pstate_epp_cpu_online() functions, so remove the unnecessary if condition checking if cppc_state is equal to AMD_PSTATE_ACTIVE. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-5-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 23c5840dc4069..9b8d7f299fcaf 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1651,10 +1651,8 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - if (cppc_state == AMD_PSTATE_ACTIVE) { - amd_pstate_epp_reenable(cpudata); - cpudata->suspended = false; - } + amd_pstate_epp_reenable(cpudata); + cpudata->suspended = false; return 0; } @@ -1683,8 +1681,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) if (cpudata->suspended) return 0; - if (cppc_state == AMD_PSTATE_ACTIVE) - amd_pstate_epp_offline(policy); + amd_pstate_epp_offline(policy); return 0; } From 53ec2101dfede8fecdd240662281a12e537c3411 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:42 +0000 Subject: [PATCH 07/40] cpufreq/amd-pstate: Merge amd_pstate_epp_cpu_offline() and amd_pstate_epp_offline() amd_pstate_epp_offline() is only called from within amd_pstate_epp_cpu_offline() and doesn't make much sense to have it at all. Hence, remove it. Also remove the unncessary debug print in the offline path while at it. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241204144842.164178-6-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9b8d7f299fcaf..8ce754ead328c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1657,11 +1657,14 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; int min_perf; + if (cpudata->suspended) + return 0; + min_perf = READ_ONCE(cpudata->lowest_perf); mutex_lock(&amd_pstate_limits_lock); @@ -1670,18 +1673,6 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); mutex_unlock(&amd_pstate_limits_lock); -} - -static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) -{ - struct amd_cpudata *cpudata = policy->driver_data; - - pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); - - if (cpudata->suspended) - return 0; - - amd_pstate_epp_offline(policy); return 0; } From 4dcd130151a654108a414b298df9e21a0d3575c9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:36 -0600 Subject: [PATCH 08/40] cpufreq/amd-pstate: Add trace event for EPP perf updates In "active" mode the most important thing for debugging whether an issue is hardware or software based is to look at what was the last thing written to the CPPC request MSR or shared memory region. The 'amd_pstate_epp_perf' trace event shows the values being written for all CPUs. Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-4-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 45 ++++++++++++++++++++++++++++++ drivers/cpufreq/amd-pstate.c | 28 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index 35f38ae67fb13..e2221a4b6901c 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -88,6 +88,51 @@ TRACE_EVENT(amd_pstate_perf, ) ); +TRACE_EVENT(amd_pstate_epp_perf, + + TP_PROTO(unsigned int cpu_id, + unsigned int highest_perf, + unsigned int epp, + unsigned int min_perf, + unsigned int max_perf, + bool boost + ), + + TP_ARGS(cpu_id, + highest_perf, + epp, + min_perf, + max_perf, + boost), + + TP_STRUCT__entry( + __field(unsigned int, cpu_id) + __field(unsigned int, highest_perf) + __field(unsigned int, epp) + __field(unsigned int, min_perf) + __field(unsigned int, max_perf) + __field(bool, boost) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->highest_perf = highest_perf; + __entry->epp = epp; + __entry->min_perf = min_perf; + __entry->max_perf = max_perf; + __entry->boost = boost; + ), + + TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u", + (unsigned int)__entry->cpu_id, + (unsigned int)__entry->min_perf, + (unsigned int)__entry->max_perf, + (unsigned int)__entry->highest_perf, + (unsigned int)__entry->epp, + (bool)__entry->boost + ) +); + #endif /* _AMD_PSTATE_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8ce754ead328c..20d52bce18821 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -324,6 +324,14 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, return -EBUSY; } + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + epp, + AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached), + cpudata->boost_state); + } + ret = amd_pstate_set_epp(cpudata, epp); return ret; @@ -1598,6 +1606,13 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) WRITE_ONCE(cpudata->cppc_req_cached, value); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + cpudata->min_limit_perf, + cpudata->max_limit_perf, + policy->boost_enabled); + } + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, cpudata->max_limit_perf, false); @@ -1641,6 +1656,13 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) max_perf = READ_ONCE(cpudata->highest_perf); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, + AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + max_perf, cpudata->boost_state); + } + amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); amd_pstate_set_epp(cpudata, cpudata->epp_cached); } @@ -1669,6 +1691,12 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) mutex_lock(&amd_pstate_limits_lock); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, + min_perf, min_perf, policy->boost_enabled); + } + amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); From 6c093d5a5b73ec1caf1e706510ae6031af2f9d43 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:37 -0600 Subject: [PATCH 09/40] cpufreq/amd-pstate: convert mutex use to guard() Using scoped guard declaration will unlock mutexes automatically. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-5-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 20d52bce18821..bcb9367aa9ca0 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -758,12 +758,12 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP; } - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_pstate_cpu_boost_update(policy, state); WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); - mutex_unlock(&amd_pstate_driver_lock); return ret; } @@ -854,7 +854,8 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!amd_pstate_prefcore) return; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_get_highest_perf(cpu, &cur_high); if (ret) goto free_cpufreq_put; @@ -874,7 +875,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!highest_perf_changed) cpufreq_update_policy(cpu); - mutex_unlock(&amd_pstate_driver_lock); } /* @@ -1203,11 +1203,11 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); + ret = amd_pstate_set_energy_pref_index(cpudata, ret); - mutex_unlock(&amd_pstate_limits_lock); - return ret ?: count; + return ret ? ret : count; } static ssize_t show_energy_performance_preference( @@ -1371,13 +1371,10 @@ EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { - ssize_t ret; - mutex_lock(&amd_pstate_driver_lock); - ret = amd_pstate_show_status(buf); - mutex_unlock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); - return ret; + return amd_pstate_show_status(buf); } static ssize_t status_store(struct device *a, struct device_attribute *b, @@ -1386,9 +1383,8 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); - mutex_unlock(&amd_pstate_driver_lock); return ret < 0 ? ret : count; } @@ -1689,7 +1685,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) min_perf = READ_ONCE(cpudata->lowest_perf); - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, @@ -1700,8 +1696,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); - mutex_unlock(&amd_pstate_limits_lock); - return 0; } @@ -1730,13 +1724,11 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ amd_pstate_epp_reenable(cpudata); - mutex_unlock(&amd_pstate_limits_lock); - cpudata->suspended = false; } From 3b43739824a6b617d8213dd2bce6fb1b2747c377 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:38 -0600 Subject: [PATCH 10/40] cpufreq/amd-pstate: Drop cached epp_policy variable epp_policy is not used by any of the current code and there is no need to cache it. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-6-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 3 --- drivers/cpufreq/amd-pstate.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bcb9367aa9ca0..0e77584dfadec 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1478,7 +1478,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; cpudata->cpu = policy->cpu; - cpudata->epp_policy = 0; ret = amd_pstate_init_perf(cpudata); if (ret) @@ -1585,8 +1584,6 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(0); - cpudata->epp_policy = cpudata->policy; - /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); if (epp < 0) { diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index cd573bc6b6db8..7765c82f975c6 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -57,7 +57,6 @@ struct amd_aperf_mperf { * @hw_prefcore: check whether HW supports preferred core featue. * Only when hw_prefcore and early prefcore param are true, * AMD P-State driver supports preferred core featue. - * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value @@ -94,7 +93,6 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - s16 epp_policy; s16 epp_cached; u32 policy; u64 cppc_cap1_cached; From 88a95ba066a962d4d39c6a36b18bf665f51d3767 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:39 -0600 Subject: [PATCH 11/40] cpufreq/amd-pstate: Use FIELD_PREP and FIELD_GET macros The FIELD_PREP and FIELD_GET macros improve readability and help to avoid shifting bugs. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-7-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 51 ++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 0e77584dfadec..fbd1b36846c52 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -88,6 +89,11 @@ static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) + /* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive @@ -182,7 +188,6 @@ static DEFINE_MUTEX(amd_pstate_driver_lock); static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) { - u64 epp; int ret; if (!cppc_req_cached) { @@ -192,9 +197,8 @@ static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) return ret; } } - epp = (cppc_req_cached >> 24) & 0xFF; - return (s16)epp; + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cppc_req_cached); } DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); @@ -269,12 +273,11 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { - int ret; - u64 value = READ_ONCE(cpudata->cppc_req_cached); + int ret; - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); @@ -327,8 +330,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, - AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), - AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), cpudata->boost_state); } @@ -542,18 +545,15 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, des_perf = 0; } - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - /* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, @@ -1573,16 +1573,11 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) min_perf = min(cpudata->nominal_perf, max_perf); - /* Initial min/max values for CPPC Performance Controls Register */ - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - - /* CPPC EPP feature require to set zero to the desire perf bit */ - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(0); + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); @@ -1652,7 +1647,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, cpudata->epp_cached, - AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), max_perf, cpudata->boost_state); } From 474e7218e81e7932ed18f91969b72169005ff038 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:40 -0600 Subject: [PATCH 12/40] cpufreq/amd-pstate: Only update the cached value in msr_set_epp() on success If writing the MSR MSR_AMD_CPPC_REQ fails then the cached value in the amd_cpudata structure should not be updated. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-8-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index fbd1b36846c52..ebfc9e20b6cb5 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -278,11 +278,15 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - WRITE_ONCE(cpudata->cppc_req_cached, value); ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; + if (ret) { + pr_err("failed to set energy perf value (%d)\n", ret); + return ret; + } + + cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; } From 68cb0e77b6439fea64c6907c563b7bd27f2ee57f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:41 -0600 Subject: [PATCH 13/40] cpufreq/amd-pstate: store all values in cpudata struct in khz Storing values in the cpudata structure in different units leads to confusion and hardcoded conversions elsewhere. After ratios are calculated store everything in khz for any future use. Adjust all relevant consumers for this change as well. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-9-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 12 +++++------- drivers/cpufreq/amd-pstate.c | 28 ++++++++++++++-------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index a261d7300951e..3a0a380c3590c 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -207,7 +207,6 @@ static void amd_pstate_ut_check_freq(u32 index) int cpu = 0; struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; - u32 nominal_freq_khz; for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); @@ -215,14 +214,13 @@ static void amd_pstate_ut_check_freq(u32 index) break; cpudata = policy->driver_data; - nominal_freq_khz = cpudata->nominal_freq*1000; - if (!((cpudata->max_freq >= nominal_freq_khz) && - (nominal_freq_khz > cpudata->lowest_nonlinear_freq) && + if (!((cpudata->max_freq >= cpudata->nominal_freq) && + (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && (cpudata->min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, cpudata->max_freq, nominal_freq_khz, + __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, cpudata->min_freq); goto skip_test; } @@ -236,13 +234,13 @@ static void amd_pstate_ut_check_freq(u32 index) if (cpudata->boost_supported) { if ((policy->max == cpudata->max_freq) || - (policy->max == nominal_freq_khz)) + (policy->max == cpudata->nominal_freq)) amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, cpudata->max_freq, - nominal_freq_khz); + cpudata->nominal_freq); goto skip_test; } } else { diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ebfc9e20b6cb5..bf2512e1f117e 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -739,8 +739,8 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) if (on) policy->cpuinfo.max_freq = max_freq; - else if (policy->cpuinfo.max_freq > nominal_freq * 1000) - policy->cpuinfo.max_freq = nominal_freq * 1000; + else if (policy->cpuinfo.max_freq > nominal_freq) + policy->cpuinfo.max_freq = nominal_freq; policy->max = policy->cpuinfo.max_freq; @@ -940,29 +940,29 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) return ret; if (quirks && quirks->lowest_freq) - min_freq = quirks->lowest_freq * 1000; + min_freq = quirks->lowest_freq; else - min_freq = cppc_perf.lowest_freq * 1000; + min_freq = cppc_perf.lowest_freq; if (quirks && quirks->nominal_freq) - nominal_freq = quirks->nominal_freq ; + nominal_freq = quirks->nominal_freq; else nominal_freq = cppc_perf.nominal_freq; nominal_perf = READ_ONCE(cpudata->nominal_perf); boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - WRITE_ONCE(cpudata->min_freq, min_freq); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - WRITE_ONCE(cpudata->max_freq, max_freq); + WRITE_ONCE(cpudata->min_freq, min_freq * 1000); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); + WRITE_ONCE(cpudata->max_freq, max_freq * 1000); /** * Below values need to be initialized correctly, otherwise driver will fail to load @@ -972,13 +972,13 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) */ if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", - min_freq, max_freq, nominal_freq * 1000); + min_freq, max_freq, nominal_freq); return -EINVAL; } - if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", - lowest_nonlinear_freq, min_freq, nominal_freq * 1000); + lowest_nonlinear_freq, min_freq, nominal_freq); return -EINVAL; } From 942718f2a236cb3b27d2dbb5942538681b6e0e88 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:42 -0600 Subject: [PATCH 14/40] cpufreq/amd-pstate: Change amd_pstate_update_perf() to return an int As msr_update_perf() calls an MSR it's possible that it fails. Pass this return code up to the caller. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-10-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bf2512e1f117e..d279ace500d7d 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -251,24 +251,26 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) return index; } -static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, +static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { - if (fast_switch) + if (fast_switch) { wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); - else - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + return 0; + } + + return wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + READ_ONCE(cpudata->cppc_req_cached)); } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { - static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, fast_switch); } static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) @@ -480,7 +482,7 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static void shmem_update_perf(struct amd_cpudata *cpudata, +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -490,7 +492,7 @@ static void shmem_update_perf(struct amd_cpudata *cpudata, perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); + return cppc_set_perf(cpudata->cpu, &perf_ctrls); } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) From 3f7b835fa4d0d06f82249a3aca989fdf9bdf4656 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:43 -0600 Subject: [PATCH 15/40] cpufreq/amd-pstate: Move limit updating code The limit updating code in amd_pstate_epp_update_limit() should not only apply to EPP updates. Move it to amd_pstate_update_min_max_limit() so other callers can benefit as well. With this move it's not necessary to have clamp_t calls anymore because the verify callback is called when setting limits. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-11-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d279ace500d7d..55529e32d325c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); u64 value = prev; - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); max_freq = READ_ONCE(cpudata->max_limit_freq); @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; max_perf = READ_ONCE(cpudata->highest_perf); @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * max_perf, max_freq); min_limit_perf = div_u64(policy->min * max_perf, max_freq); - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) - min_limit_perf = lowest_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; u64 value; s16 epp; - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); amd_pstate_update_min_max_limit(policy); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); value = READ_ONCE(cpudata->cppc_req_cached); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = min(cpudata->nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); From b3781f30bfcfd7db12de2595adb01779e565e1c6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:44 -0600 Subject: [PATCH 16/40] cpufreq/amd-pstate: Cache EPP value and use that everywhere Cache the value in cpudata->epp_cached, and use that for all callers. As all callers use cached value merge amd_pstate_get_energy_pref_index() into show_energy_performance_preference(). Check if the EPP value is changed before writing it to MSR or shared memory region. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-12-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 105 ++++++++++++++--------------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 55529e32d325c..d1f82e4ca9b12 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -186,29 +186,28 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata) { + u64 value; int ret; - if (!cppc_req_cached) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); - if (ret < 0) { - pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return ret; - } + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cppc_req_cached); + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); } DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); -static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) { - return static_call(amd_pstate_get_epp)(cpudata, cppc_req_cached); + return static_call(amd_pstate_get_epp)(cpudata); } -static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) +static s16 shmem_get_epp(struct amd_cpudata *cpudata) { u64 epp; int ret; @@ -222,35 +221,6 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) return (s16)(epp & 0xff); } -static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) -{ - s16 epp; - int index = -EINVAL; - - epp = amd_pstate_get_epp(cpudata, 0); - if (epp < 0) - return epp; - - switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - index = EPP_INDEX_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_PERFORMANCE: - index = EPP_INDEX_BALANCE_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_POWERSAVE: - index = EPP_INDEX_BALANCE_POWERSAVE; - break; - case AMD_CPPC_EPP_POWERSAVE: - index = EPP_INDEX_POWERSAVE; - break; - default: - break; - } - - return index; -} - static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -275,19 +245,23 @@ static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + u64 value, prev; int ret; + value = prev = READ_ONCE(cpudata->cppc_req_cached); value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (value == prev) + return 0; + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); if (ret) { pr_err("failed to set energy perf value (%d)\n", ret); return ret; } - cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; @@ -305,13 +279,16 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; + if (epp == cpudata->epp_cached) + return 0; + perf_ctrls.energy_perf = epp; ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) { pr_debug("failed to set energy perf value (%d)\n", ret); return ret; } - cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->epp_cached, epp); return ret; } @@ -1214,9 +1191,22 @@ static ssize_t show_energy_performance_preference( struct amd_cpudata *cpudata = policy->driver_data; int preference; - preference = amd_pstate_get_energy_pref_index(cpudata); - if (preference < 0) - return preference; + switch (cpudata->epp_cached) { + case AMD_CPPC_EPP_PERFORMANCE: + preference = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + preference = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + preference = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + preference = EPP_INDEX_POWERSAVE; + break; + default: + return -EINVAL; + } return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } @@ -1501,7 +1491,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); + cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata); policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1555,35 +1545,26 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; u64 value; - s16 epp; amd_pstate_update_min_max_limit(policy); value = READ_ONCE(cpudata->cppc_req_cached); value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK); + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); - /* Get BIOS pre-defined epp value */ - epp = amd_pstate_get_epp(cpudata, value); - if (epp < 0) { - /** - * This return value can only be negative for shared_memory - * systems where EPP register read/write not supported. - */ - return epp; - } - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; + WRITE_ONCE(cpudata->epp_cached, 0); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, cpudata->epp_cached); WRITE_ONCE(cpudata->cppc_req_cached, value); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, cpudata->min_limit_perf, cpudata->max_limit_perf, policy->boost_enabled); @@ -1592,7 +1573,7 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, cpudata->max_limit_perf, false); - return amd_pstate_set_epp(cpudata, epp); + return amd_pstate_set_epp(cpudata, READ_ONCE(cpudata->epp_cached)); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) From fff395796917ac3fe3b4c4607cb74a8dbdc17593 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:45 -0600 Subject: [PATCH 17/40] cpufreq/amd-pstate: Always write EPP value when updating perf For MSR systems the EPP value is in the same register as perf targets and so divding them into two separate MSR writes is wasteful. In msr_update_perf(), update both EPP and perf values in one write to MSR_AMD_CPPC_REQ, and cache them if successful. To accomplish this plumb the EPP value into the update_perf call and modify all its callers to check the return value. As this unifies calls, ensure that the MSR write is necessary before flushing a write out. Also drop the comparison from the passive flow tracing. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-13-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 7 +- drivers/cpufreq/amd-pstate.c | 108 +++++++++++++++-------------- 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index e2221a4b6901c..8d692415d9050 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -32,7 +32,6 @@ TRACE_EVENT(amd_pstate_perf, u64 aperf, u64 tsc, unsigned int cpu_id, - bool changed, bool fast_switch ), @@ -44,7 +43,6 @@ TRACE_EVENT(amd_pstate_perf, aperf, tsc, cpu_id, - changed, fast_switch ), @@ -57,7 +55,6 @@ TRACE_EVENT(amd_pstate_perf, __field(unsigned long long, aperf) __field(unsigned long long, tsc) __field(unsigned int, cpu_id) - __field(bool, changed) __field(bool, fast_switch) ), @@ -70,11 +67,10 @@ TRACE_EVENT(amd_pstate_perf, __entry->aperf = aperf; __entry->tsc = tsc; __entry->cpu_id = cpu_id; - __entry->changed = changed; __entry->fast_switch = fast_switch; ), - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", (unsigned long)__entry->min_perf, (unsigned long)__entry->target_perf, (unsigned long)__entry->capacity, @@ -83,7 +79,6 @@ TRACE_EVENT(amd_pstate_perf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, (unsigned int)__entry->cpu_id, - (__entry->changed) ? "true" : "false", (__entry->fast_switch) ? "true" : "false" ) ); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d1f82e4ca9b12..419790e52d910 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -222,25 +222,47 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata) } static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { + u64 value, prev; + + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) + return 0; + if (fast_switch) { - wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); + wrmsrl(MSR_AMD_CPPC_REQ, value); return 0; + } else { + int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + + if (ret) + return ret; } - return wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + WRITE_ONCE(cpudata->cppc_req_cached, value); + WRITE_ONCE(cpudata->epp_cached, epp); + + return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) + u32 max_perf, u32 epp, + bool fast_switch) { return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + max_perf, epp, fast_switch); } static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) @@ -261,6 +283,7 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) return ret; } + /* update both so that msr_update_perf() can effectively check */ WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); @@ -459,12 +482,18 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + if (cppc_state == AMD_PSTATE_ACTIVE) { + int ret = shmem_set_epp(cpudata, epp); + + if (ret) + return ret; + } + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; @@ -510,9 +539,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, { unsigned long max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u64 prev = READ_ONCE(cpudata->cppc_req_cached); u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); - u64 value = prev; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); @@ -528,27 +555,14 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); - value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, - cpudata->cpu, (value != prev), fast_switch); + cpudata->cpu, fast_switch); } - if (value == prev) - goto cpufreq_policy_put; + amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - WRITE_ONCE(cpudata->cppc_req_cached, value); - - amd_pstate_update_perf(cpudata, min_perf, des_perf, - max_perf, fast_switch); - -cpufreq_policy_put: cpufreq_cpu_put(policy); } @@ -1544,36 +1558,24 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u64 value; + u32 epp; amd_pstate_update_min_max_limit(policy); - value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); - value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - WRITE_ONCE(cpudata->epp_cached, 0); - value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, cpudata->epp_cached); - - WRITE_ONCE(cpudata->cppc_req_cached, value); + epp = 0; + else + epp = READ_ONCE(cpudata->epp_cached); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, - cpudata->epp_cached, + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, cpudata->min_limit_perf, cpudata->max_limit_perf, policy->boost_enabled); } - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - - return amd_pstate_set_epp(cpudata, READ_ONCE(cpudata->epp_cached)); + return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1602,7 +1604,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) { u64 max_perf; int ret; @@ -1620,17 +1622,19 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) max_perf, cpudata->boost_state); } - amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); - amd_pstate_set_epp(cpudata, cpudata->epp_cached); + return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - amd_pstate_epp_reenable(cpudata); + ret = amd_pstate_epp_reenable(cpudata); + if (ret) + return ret; cpudata->suspended = false; return 0; @@ -1654,10 +1658,8 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) min_perf, min_perf, policy->boost_enabled); } - amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); - amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); - - return 0; + return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) From f8fde687c911a366a6132aed85f4ee6b647b9160 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:46 -0600 Subject: [PATCH 18/40] cpufreq/amd-pstate: Drop ret variable from amd_pstate_set_energy_pref_index() The ret variable is not necessary. Reviewed-and-tested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-14-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 419790e52d910..4d665d9c76d35 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -319,13 +319,11 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, int pref_index) { - int epp = -EINVAL; - int ret; + int epp; if (!pref_index) epp = cpudata->epp_default; - - if (epp == -EINVAL) + else epp = epp_values[pref_index]; if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -341,9 +339,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, cpudata->boost_state); } - ret = amd_pstate_set_epp(cpudata, epp); - - return ret; + return amd_pstate_set_epp(cpudata, epp); } static inline int msr_cppc_enable(bool enable) From f9a378ff6443cdcd4387e5dbb76fa5fa549a83ec Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:47 -0600 Subject: [PATCH 19/40] cpufreq/amd-pstate: Set different default EPP policy for Epyc and Ryzen For Ryzen systems the EPP policy set by the BIOS is generally configured to performance as this is the default register value for the CPPC request MSR. If a user doesn't use additional software to configure EPP then the system will default biased towards performance and consume extra battery. Instead configure the default to "balanced_performance" for this case. Suggested-by: Artem S. Tashkinov Reviewed-by: Dhananjay Ugwekar Tested-by: Dhananjay Ugwekar Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219526 Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-15-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 4d665d9c76d35..97aee213821d1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1501,8 +1501,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata); - policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1513,10 +1511,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) * the default cpufreq governor is neither powersave nor performance. */ if (amd_pstate_acpi_pm_profile_server() || - amd_pstate_acpi_pm_profile_undefined()) + amd_pstate_acpi_pm_profile_undefined()) { policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else + cpudata->epp_default = amd_pstate_get_epp(cpudata); + } else { policy->policy = CPUFREQ_POLICY_POWERSAVE; + cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; + } if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); @@ -1529,6 +1530,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } + ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + if (ret) + return ret; current_pstate_driver->adjust_perf = NULL; From 95fad7fb58cfaa2a295aa54a1f001a16b9324963 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:48 -0600 Subject: [PATCH 20/40] cpufreq/amd-pstate: Drop boost_state variable Currently boost_state is cached for every processor in cpudata structure and driver boost state is set for every processor. Both of these aren't necessary as the driver only needs to set once and the policy stores whether boost is enabled. Move the driver boost setting to registration and adjust all references to cached value to pull from the policy instead. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-16-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 26 +++++++++++++------------- drivers/cpufreq/amd-pstate.h | 1 - 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 97aee213821d1..d7b1de97727a1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -316,9 +316,10 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) return ret; } -static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int pref_index) +static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, + int pref_index) { + struct amd_cpudata *cpudata = policy->driver_data; int epp; if (!pref_index) @@ -336,7 +337,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, epp, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), - cpudata->boost_state); + policy->boost_enabled); } return amd_pstate_set_epp(cpudata, epp); @@ -746,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); - WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); @@ -768,9 +768,6 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) goto exit_err; } - /* at least one CPU supports CPB, even if others fail later on to set up */ - current_pstate_driver->boost_enabled = true; - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); if (ret) { pr_err_once("failed to read initial CPU boost state!\n"); @@ -1176,7 +1173,6 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { - struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; @@ -1190,7 +1186,7 @@ static ssize_t store_energy_performance_preference( guard(mutex)(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); + ret = amd_pstate_set_energy_pref_index(policy, ret); return ret ? ret : count; } @@ -1265,6 +1261,9 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* at least one CPU supports CPB */ + current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); @@ -1604,8 +1603,9 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { + struct amd_cpudata *cpudata = policy->driver_data; u64 max_perf; int ret; @@ -1619,7 +1619,7 @@ static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, cpudata->epp_cached, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - max_perf, cpudata->boost_state); + max_perf, policy->boost_enabled); } return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); @@ -1632,7 +1632,7 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - ret = amd_pstate_epp_reenable(cpudata); + ret = amd_pstate_epp_reenable(policy); if (ret) return ret; cpudata->suspended = false; @@ -1690,7 +1690,7 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(cpudata); + amd_pstate_epp_reenable(policy); cpudata->suspended = false; } diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 7765c82f975c6..9747e3be6ceee 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -98,7 +98,6 @@ struct amd_cpudata { u64 cppc_cap1_cached; bool suspended; s16 epp_default; - bool boost_state; }; /* From de51589f9bd98efddf4ab776d3a490e81905ef7c Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Wed, 11 Dec 2024 12:26:05 +0000 Subject: [PATCH 21/40] cpufreq: intel_pstate: Use CPUFREQ_POLICY_UNKNOWN epp_policy uses the same values as cpufreq_policy.policy and resets to CPUFREQ_POLICY_UNKNOWN during offlining. Be consistent about it and initialize to CPUFREQ_POLICY_UNKNOWN instead of 0, too. No functional change intended. Signed-off-by: Christian Loehle Link: https://patch.msgid.link/20241211122605.3048503-3-christian.loehle@arm.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9e14374498d6e..9c4cc01fd51aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2713,7 +2713,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) } cpu->epp_powersave = -EINVAL; - cpu->epp_policy = 0; + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; intel_pstate_get_cpu_pstates(cpu); From 8e461a1cb43d69d2fc8a97e61916dce571e6bb31 Mon Sep 17 00:00:00 2001 From: "Sultan Alsawaf (unemployed)" Date: Wed, 11 Dec 2024 17:57:32 -0800 Subject: [PATCH 22/40] cpufreq: schedutil: Fix superfluous updates caused by need_freq_update A redundant frequency update is only truly needed when there is a policy limits change with a driver that specifies CPUFREQ_NEED_UPDATE_LIMITS. In spite of that, drivers specifying CPUFREQ_NEED_UPDATE_LIMITS receive a frequency update _all the time_, not just for a policy limits change, because need_freq_update is never cleared. Furthermore, ignore_dl_rate_limit()'s usage of need_freq_update also leads to a redundant frequency update, regardless of whether or not the driver specifies CPUFREQ_NEED_UPDATE_LIMITS, when the next chosen frequency is the same as the current one. Fix the superfluous updates by only honoring CPUFREQ_NEED_UPDATE_LIMITS when there's a policy limits change, and clearing need_freq_update when a requisite redundant update occurs. This is neatly achieved by moving up the CPUFREQ_NEED_UPDATE_LIMITS test and instead setting need_freq_update to false in sugov_update_next_freq(). Fixes: 600f5badb78c ("cpufreq: schedutil: Don't skip freq update when limits change") Signed-off-by: Sultan Alsawaf (unemployed) Reviewed-by: Christian Loehle Link: https://patch.msgid.link/20241212015734.41241-2-sultan@kerneltoast.com Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28c77904ea749..e51d5ce730be1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -83,7 +83,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (unlikely(sg_policy->limits_changed)) { sg_policy->limits_changed = false; - sg_policy->need_freq_update = true; + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); return true; } @@ -96,7 +96,7 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { if (sg_policy->need_freq_update) - sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + sg_policy->need_freq_update = false; else if (sg_policy->next_freq == next_freq) return false; From 34059ed0f30f3b83aac0576c131e79d39bc143d8 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 14 Nov 2024 07:59:16 +0530 Subject: [PATCH 23/40] cpufreq: scmi: Register for limit change notifications Register for limit change notifications if supported and use the throttled frequency from the notification to apply HW pressure. Signed-off-by: Sibi Sankar Tested-by: Mike Tipton Reviewed-by: Cristian Marussi Reviewed-by: Lukasz Luba Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 07d6f9a9b7c82..b8fe758aeb010 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ struct scmi_data { int nr_opp; struct device *cpu_dev; cpumask_var_t opp_shared_cpus; + struct notifier_block limit_notify_nb; + struct freq_qos_request limits_freq_req; }; static struct scmi_protocol_handle *ph; @@ -174,6 +177,22 @@ static struct freq_attr *scmi_cpufreq_hw_attr[] = { NULL, }; +static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned long event, void *data) +{ + struct scmi_data *priv = container_of(nb, struct scmi_data, limit_notify_nb); + struct scmi_perf_limits_report *limit_notify = data; + unsigned int limit_freq_khz; + int ret; + + limit_freq_khz = limit_notify->range_max_freq / HZ_PER_KHZ; + + ret = freq_qos_update_request(&priv->limits_freq_req, limit_freq_khz); + if (ret < 0) + pr_warn("failed to update freq constraint: %d\n", ret); + + return NOTIFY_OK; +} + static int scmi_cpufreq_init(struct cpufreq_policy *policy) { int ret, nr_opp, domain; @@ -181,6 +200,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) struct device *cpu_dev; struct scmi_data *priv; struct cpufreq_frequency_table *freq_table; + struct scmi_device *sdev = cpufreq_get_driver_data(); cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { @@ -294,6 +314,23 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) } } + ret = freq_qos_add_request(&policy->constraints, &priv->limits_freq_req, FREQ_QOS_MAX, + FREQ_QOS_MAX_DEFAULT_VALUE); + if (ret < 0) { + dev_err(cpu_dev, "failed to add qos limits request: %d\n", ret); + goto out_free_table; + } + + priv->limit_notify_nb.notifier_call = scmi_limit_notify_cb; + ret = sdev->handle->notify_ops->event_notifier_register(sdev->handle, SCMI_PROTOCOL_PERF, + SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED, + &priv->domain_id, + &priv->limit_notify_nb); + if (ret) + dev_warn(&sdev->dev, + "failed to register for limits change notifier for domain %d\n", + priv->domain_id); + return 0; out_free_table: @@ -313,7 +350,13 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) static void scmi_cpufreq_exit(struct cpufreq_policy *policy) { struct scmi_data *priv = policy->driver_data; + struct scmi_device *sdev = cpufreq_get_driver_data(); + sdev->handle->notify_ops->event_notifier_unregister(sdev->handle, SCMI_PROTOCOL_PERF, + SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED, + &priv->domain_id, + &priv->limit_notify_nb); + freq_qos_remove_request(&priv->limits_freq_req); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_remove_all_dynamic(priv->cpu_dev); free_cpumask_var(priv->opp_shared_cpus); @@ -372,6 +415,8 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) if (!handle) return -ENODEV; + scmi_cpufreq_driver.driver_data = sdev; + perf_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_PERF, &ph); if (IS_ERR(perf_ops)) return PTR_ERR(perf_ops); From f1f010c9d9c62c865d9f54e94075800ba764b4d9 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 3 Nov 2024 22:02:51 +0100 Subject: [PATCH 24/40] cpufreq: fix using cpufreq-dt as module This driver can be built as a module since commit 3b062a086984 ("cpufreq: dt-platdev: Support building as module"), but unfortunately this caused a regression because the cputfreq-dt-platdev.ko module does not autoload. Usually, this is solved by just using the MODULE_DEVICE_TABLE() macro to export all the device IDs as module aliases. But this driver is special due how matches with devices and decides what platform supports. There are two of_device_id lists, an allow list that are for CPU devices that always match and a deny list that's for devices that must not match. The driver registers a cpufreq-dt platform device for all the CPU device nodes that either are in the allow list or contain an operating-points-v2 property and are not in the deny list. Enforce builtin compile of cpufreq-dt-platdev to make autoload work. Fixes: 3b062a086984 ("cpufreq: dt-platdev: Support building as module") Link: https://lore.kernel.org/all/20241104201424.2a42efdd@akair/ Link: https://lore.kernel.org/all/20241119111918.1732531-1-javierm@redhat.com/ Cc: stable@vger.kernel.org Signed-off-by: Andreas Kemnade Reported-by: Radu Rendec Reported-by: Javier Martinez Canillas [ Viresh: Picked commit log from Javier, updated tags ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 2 +- drivers/cpufreq/cpufreq-dt-platdev.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 92a83a9bb2e10..ea9afdc119fbf 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -232,7 +232,7 @@ config CPUFREQ_VIRT If in doubt, say N. config CPUFREQ_DT_PLATDEV - tristate "Generic DT based cpufreq platdev driver" + bool "Generic DT based cpufreq platdev driver" depends on OF help This adds a generic DT based cpufreq platdev driver for frequency diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2a3e8bd317c9d..9c198bd4f7e9b 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -235,5 +235,3 @@ static int __init cpufreq_dt_platdev_init(void) sizeof(struct cpufreq_dt_platform_data))); } core_initcall(cpufreq_dt_platdev_init); -MODULE_DESCRIPTION("Generic DT based cpufreq platdev driver"); -MODULE_LICENSE("GPL"); From ab16dfb99cb565fb21ddf29320890c7bad35024c Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 6 Dec 2024 22:11:24 +0100 Subject: [PATCH 25/40] dt-bindings: cpufreq: Document support for Airoha EN7581 CPUFreq On newer Airoha SoC, CPU Frequency is scaled indirectly with SMC commands to ATF. A virtual clock is exposed. This virtual clock is a get-only clock and is used to expose the current global CPU clock. The frequency info comes by the output of the SMC command that reports the clock in MHz. The SMC sets the CPU clock by providing an index, this is modelled as performance states in a power domain. CPUs can't be individually scaled as the CPU frequency is shared across all CPUs and is global. Signed-off-by: Christian Marangi Reviewed-by: Ulf Hansson Reviewed-by: Rob Herring (Arm) Signed-off-by: Viresh Kumar --- .../cpufreq/airoha,en7581-cpufreq.yaml | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml diff --git a/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml new file mode 100644 index 0000000000000..7d4510b3219ca --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/cpufreq/airoha,en7581-cpufreq.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Airoha EN7581 CPUFreq + +maintainers: + - Christian Marangi + +description: | + On newer Airoha SoC, CPU Frequency is scaled indirectly with SMC commands + to ATF. + + A virtual clock is exposed. This virtual clock is a get-only clock and + is used to expose the current global CPU clock. The frequency info comes + by the output of the SMC command that reports the clock in MHz. + + The SMC sets the CPU clock by providing an index, this is modelled as + performance states in a power domain. + + CPUs can't be individually scaled as the CPU frequency is shared across + all CPUs and is global. + +properties: + compatible: + const: airoha,en7581-cpufreq + + '#clock-cells': + const: 0 + + '#power-domain-cells': + const: 0 + + operating-points-v2: true + +required: + - compatible + - '#clock-cells' + - '#power-domain-cells' + - operating-points-v2 + +additionalProperties: false + +examples: + - | + performance-domain { + compatible = "airoha,en7581-cpufreq"; + + operating-points-v2 = <&cpu_smcc_opp_table>; + + #power-domain-cells = <0>; + #clock-cells = <0>; + }; From a248d90ddc3f73d193327275da2f16fb80c862cf Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 19 Dec 2024 02:25:03 +0800 Subject: [PATCH 26/40] dt-bindings: cpufreq: apple,cluster-cpufreq: Add A7-A11, T2 compatibles Add compatibles for Apple A7-A11, T2 SoCs. Apple A7, A8, A8X gets the per-SoC compatible and the A7 "apple,s5l8960x-cluster-cpufreq" compatible. Apple A9, A9X, A10, A10X, T2, A11 gets the per-SoC compatible, M1 "apple,t8103-cluster-cpufreq" compatible, then the "apple,cluster-cpufreq" fallback compatible. Acked-by: Krzysztof Kozlowski Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/apple,cluster-cpufreq.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml index 76cb9726660e5..896276b8c6bbe 100644 --- a/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml +++ b/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml @@ -24,9 +24,17 @@ properties: - apple,t8112-cluster-cpufreq - const: apple,cluster-cpufreq - items: - - const: apple,t6000-cluster-cpufreq + - enum: + - apple,s8000-cluster-cpufreq + - apple,t8010-cluster-cpufreq + - apple,t8015-cluster-cpufreq + - apple,t6000-cluster-cpufreq - const: apple,t8103-cluster-cpufreq - const: apple,cluster-cpufreq + - items: + - const: apple,t7000-cluster-cpufreq + - const: apple,s5l8960x-cluster-cpufreq + - const: apple,s5l8960x-cluster-cpufreq reg: maxItems: 1 From 4a06c250abaaf7cd1e32abcb90694b551712fa63 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 19 Dec 2024 02:25:04 +0800 Subject: [PATCH 27/40] cpufreq: apple-soc: Drop setting the PS2 field on M2+ Newer device do not use this. It is not known what this field does, but change the behavior to be same as macOS to be safe. Signed-off-by: Hector Martin Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 4dcacab9b4bf2..ad6c7b8f290ca 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -25,7 +25,7 @@ #define APPLE_DVFS_CMD 0x20 #define APPLE_DVFS_CMD_BUSY BIT(31) #define APPLE_DVFS_CMD_SET BIT(25) -#define APPLE_DVFS_CMD_PS2 GENMASK(16, 12) +#define APPLE_DVFS_CMD_PS2 GENMASK(15, 12) #define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) /* Same timebase as CPU counter (24MHz) */ @@ -55,6 +55,7 @@ #define APPLE_DVFS_TRANSITION_TIMEOUT 100 struct apple_soc_cpufreq_info { + bool has_ps2; u64 max_pstate; u64 cur_pstate_mask; u64 cur_pstate_shift; @@ -69,18 +70,21 @@ struct apple_cpu_priv { static struct cpufreq_driver apple_soc_cpufreq_driver; static const struct apple_soc_cpufreq_info soc_t8103_info = { + .has_ps2 = true, .max_pstate = 15, .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8103, .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103, }; static const struct apple_soc_cpufreq_info soc_t8112_info = { + .has_ps2 = false, .max_pstate = 31, .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8112, .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112, }; static const struct apple_soc_cpufreq_info soc_default_info = { + .has_ps2 = false, .max_pstate = 15, .cur_pstate_mask = 0, /* fallback */ }; @@ -148,9 +152,12 @@ static int apple_soc_cpufreq_set_target(struct cpufreq_policy *policy, return -EIO; } - reg &= ~(APPLE_DVFS_CMD_PS1 | APPLE_DVFS_CMD_PS2); + reg &= ~APPLE_DVFS_CMD_PS1; reg |= FIELD_PREP(APPLE_DVFS_CMD_PS1, pstate); - reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate); + if (priv->info->has_ps2) { + reg &= ~APPLE_DVFS_CMD_PS2; + reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate); + } reg |= APPLE_DVFS_CMD_SET; writeq_relaxed(reg, priv->reg_base + APPLE_DVFS_CMD); From 0755a9376ec949eb7d881324548266ba8667a206 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 19 Dec 2024 02:25:05 +0800 Subject: [PATCH 28/40] cpufreq: apple-soc: Allow per-SoC configuration of APPLE_DVFS_CMD_PS1 Support for SoC that has a different APPLE_DVFS_CMD_PS1 will be added soon, so modify the driver first to allow it to be configured per-SoC. Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index ad6c7b8f290ca..90e34105b50bd 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -27,6 +27,7 @@ #define APPLE_DVFS_CMD_SET BIT(25) #define APPLE_DVFS_CMD_PS2 GENMASK(15, 12) #define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) +#define APPLE_DVFS_CMD_PS1_SHIFT 0 /* Same timebase as CPU counter (24MHz) */ #define APPLE_DVFS_LAST_CHG_TIME 0x38 @@ -59,6 +60,8 @@ struct apple_soc_cpufreq_info { u64 max_pstate; u64 cur_pstate_mask; u64 cur_pstate_shift; + u64 ps1_mask; + u64 ps1_shift; }; struct apple_cpu_priv { @@ -74,6 +77,8 @@ static const struct apple_soc_cpufreq_info soc_t8103_info = { .max_pstate = 15, .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8103, .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103, + .ps1_mask = APPLE_DVFS_CMD_PS1, + .ps1_shift = APPLE_DVFS_CMD_PS1_SHIFT, }; static const struct apple_soc_cpufreq_info soc_t8112_info = { @@ -81,12 +86,16 @@ static const struct apple_soc_cpufreq_info soc_t8112_info = { .max_pstate = 31, .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8112, .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112, + .ps1_mask = APPLE_DVFS_CMD_PS1, + .ps1_shift = APPLE_DVFS_CMD_PS1_SHIFT, }; static const struct apple_soc_cpufreq_info soc_default_info = { .has_ps2 = false, .max_pstate = 15, .cur_pstate_mask = 0, /* fallback */ + .ps1_mask = APPLE_DVFS_CMD_PS1, + .ps1_shift = APPLE_DVFS_CMD_PS1_SHIFT, }; static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { @@ -152,8 +161,8 @@ static int apple_soc_cpufreq_set_target(struct cpufreq_policy *policy, return -EIO; } - reg &= ~APPLE_DVFS_CMD_PS1; - reg |= FIELD_PREP(APPLE_DVFS_CMD_PS1, pstate); + reg &= ~priv->info->ps1_mask; + reg |= pstate << priv->info->ps1_shift; if (priv->info->has_ps2) { reg &= ~APPLE_DVFS_CMD_PS2; reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate); From 55aac9f570b0f8a300a9b0128620ced15c4742d8 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 19 Dec 2024 02:25:06 +0800 Subject: [PATCH 29/40] cpufreq: apple-soc: Use 32-bit read for status register Apple A7-A9(X) SoCs requires 32-bit reads on the status register. Newer SoCs accepts 32-bit reads on the status register as well. Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 90e34105b50bd..b27d261fe5a68 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -122,7 +122,7 @@ static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) unsigned int pstate; if (priv->info->cur_pstate_mask) { - u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_STATUS); + u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); pstate = (reg & priv->info->cur_pstate_mask) >> priv->info->cur_pstate_shift; } else { From 0dc21f609149b958ca8c010d17f9935ed9cc96fe Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 19 Dec 2024 02:25:07 +0800 Subject: [PATCH 30/40] cpufreq: apple-soc: Increase cluster switch timeout to 400us Apple A11 SoC takes a long time to switch. Maximum switch time observed is 345us, so increase the cluster switch timeout to 400us to be safe. Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index b27d261fe5a68..94e57f055a5ff 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -53,7 +53,7 @@ #define APPLE_DVFS_PLL_FACTOR_MULT GENMASK(31, 16) #define APPLE_DVFS_PLL_FACTOR_DIV GENMASK(15, 0) -#define APPLE_DVFS_TRANSITION_TIMEOUT 100 +#define APPLE_DVFS_TRANSITION_TIMEOUT 400 struct apple_soc_cpufreq_info { bool has_ps2; From 13b147b2a9f8a9a0b18eddd23abaf84d7bea80d1 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 19 Dec 2024 02:25:08 +0800 Subject: [PATCH 31/40] cpufreq: apple-soc: Set fallback transition latency to APPLE_DVFS_TRANSITION_TIMEOUT The driver already assumes transitions will not take longer than APPLE_DVFS_TRANSITION_TIMEOUT in apple_soc_cpufreq_set_target(), so it makes little sense to set CPUFREQ_ETERNAL as the transition latency when the transistion latency is not given by the opp-table. Reviewed-by: Christian Loehle Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 94e57f055a5ff..879ddec9e557d 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -291,7 +291,7 @@ static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); if (!transition_latency) - transition_latency = CPUFREQ_ETERNAL; + transition_latency = APPLE_DVFS_TRANSITION_TIMEOUT * NSEC_PER_USEC; policy->cpuinfo.transition_latency = transition_latency; policy->dvfs_possible_from_any_cpu = true; From 1a4ddf6ab9dde8a7e1e025c44745862705636b2f Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 19 Dec 2024 02:25:09 +0800 Subject: [PATCH 32/40] cpufreq: apple-soc: Add Apple A7-A8X SoC cpufreq support These SoCs only use 3 bits for p-states, and have a different APPLE_DVFS_CMD_PS1 mask value. Signed-off-by: Nick Chan Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 879ddec9e557d..269b18c62d040 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -22,12 +22,14 @@ #include #include -#define APPLE_DVFS_CMD 0x20 -#define APPLE_DVFS_CMD_BUSY BIT(31) -#define APPLE_DVFS_CMD_SET BIT(25) -#define APPLE_DVFS_CMD_PS2 GENMASK(15, 12) -#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) -#define APPLE_DVFS_CMD_PS1_SHIFT 0 +#define APPLE_DVFS_CMD 0x20 +#define APPLE_DVFS_CMD_BUSY BIT(31) +#define APPLE_DVFS_CMD_SET BIT(25) +#define APPLE_DVFS_CMD_PS1_S5L8960X GENMASK(24, 22) +#define APPLE_DVFS_CMD_PS1_S5L8960X_SHIFT 22 +#define APPLE_DVFS_CMD_PS2 GENMASK(15, 12) +#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0) +#define APPLE_DVFS_CMD_PS1_SHIFT 0 /* Same timebase as CPU counter (24MHz) */ #define APPLE_DVFS_LAST_CHG_TIME 0x38 @@ -36,6 +38,9 @@ * Apple ran out of bits and had to shift this in T8112... */ #define APPLE_DVFS_STATUS 0x50 +#define APPLE_DVFS_STATUS_CUR_PS_S5L8960X GENMASK(5, 3) +#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_S5L8960X 3 +#define APPLE_DVFS_STATUS_TGT_PS_S5L8960X GENMASK(2, 0) #define APPLE_DVFS_STATUS_CUR_PS_T8103 GENMASK(7, 4) #define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103 4 #define APPLE_DVFS_STATUS_TGT_PS_T8103 GENMASK(3, 0) @@ -72,6 +77,15 @@ struct apple_cpu_priv { static struct cpufreq_driver apple_soc_cpufreq_driver; +static const struct apple_soc_cpufreq_info soc_s5l8960x_info = { + .has_ps2 = false, + .max_pstate = 7, + .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_S5L8960X, + .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_S5L8960X, + .ps1_mask = APPLE_DVFS_CMD_PS1_S5L8960X, + .ps1_shift = APPLE_DVFS_CMD_PS1_S5L8960X_SHIFT, +}; + static const struct apple_soc_cpufreq_info soc_t8103_info = { .has_ps2 = true, .max_pstate = 15, @@ -99,6 +113,10 @@ static const struct apple_soc_cpufreq_info soc_default_info = { }; static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { + { + .compatible = "apple,s5l8960x-cluster-cpufreq", + .data = &soc_s5l8960x_info, + }, { .compatible = "apple,t8103-cluster-cpufreq", .data = &soc_t8103_info, From 85d8b11351a8f15d6ec7a5e97909861cb3b6bcec Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 5 Dec 2024 22:20:28 +0530 Subject: [PATCH 33/40] cpufreq: qcom: Fix qcom_cpufreq_hw_recalc_rate() to query LUT if LMh IRQ is not available Currently, qcom_cpufreq_hw_recalc_rate() returns the LMh throttled frequency for the domain even if LMh IRQ is not available. But as per qcom_cpufreq_hw_get(), the driver has to query LUT entries to get the actual frequency of the domain. So do the same in qcom_cpufreq_hw_recalc_rate(). While doing so, refactor the existing qcom_cpufreq_hw_get() function so that qcom_cpufreq_hw_recalc_rate() can make use of the existing code and avoid code duplication. This also requires setting the qcom_cpufreq_data::policy even if LMh IRQ is not available. Fixes: 4370232c727b ("cpufreq: qcom-hw: Add CPU clock provider support") Signed-off-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 98129565acb8e..c145ab7b0bb21 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -143,14 +143,12 @@ static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) } /* Get the frequency requested by the cpufreq core for the CPU */ -static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) +static unsigned int qcom_cpufreq_get_freq(struct cpufreq_policy *policy) { struct qcom_cpufreq_data *data; const struct qcom_cpufreq_soc_data *soc_data; - struct cpufreq_policy *policy; unsigned int index; - policy = cpufreq_cpu_get_raw(cpu); if (!policy) return 0; @@ -163,12 +161,10 @@ static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) return policy->freq_table[index].frequency; } -static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) +static unsigned int __qcom_cpufreq_hw_get(struct cpufreq_policy *policy) { struct qcom_cpufreq_data *data; - struct cpufreq_policy *policy; - policy = cpufreq_cpu_get_raw(cpu); if (!policy) return 0; @@ -177,7 +173,12 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) if (data->throttle_irq >= 0) return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; - return qcom_cpufreq_get_freq(cpu); + return qcom_cpufreq_get_freq(policy); +} + +static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) +{ + return __qcom_cpufreq_hw_get(cpufreq_cpu_get_raw(cpu)); } static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, @@ -363,7 +364,7 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) * If h/w throttled frequency is higher than what cpufreq has requested * for, then stop polling and switch back to interrupt mechanism. */ - if (throttled_freq >= qcom_cpufreq_get_freq(cpu)) + if (throttled_freq >= qcom_cpufreq_get_freq(cpufreq_cpu_get_raw(cpu))) enable_irq(data->throttle_irq); else mod_delayed_work(system_highpri_wq, &data->throttle_work, @@ -441,7 +442,6 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) return data->throttle_irq; data->cancel_throttle = false; - data->policy = policy; mutex_init(&data->throttle_lock); INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll); @@ -552,6 +552,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) policy->driver_data = data; policy->dvfs_possible_from_any_cpu = true; + data->policy = policy; ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy); if (ret) { @@ -622,7 +623,7 @@ static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned lon { struct qcom_cpufreq_data *data = container_of(hw, struct qcom_cpufreq_data, cpu_clk); - return qcom_lmh_get_throttle_freq(data); + return __qcom_cpufreq_hw_get(data->policy) * HZ_PER_KHZ; } static const struct clk_ops qcom_cpufreq_hw_clk_ops = { From a9ba290d0b829012574b6821ba08815046e60c94 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 5 Dec 2024 22:20:29 +0530 Subject: [PATCH 34/40] cpufreq: qcom: Implement clk_ops::determine_rate() for qcom_cpufreq* clocks determine_rate() callback is used by the clk_set_rate() API to get the closest rate of the target rate supported by the clock. If this callback is not implemented (nor round_rate() callback), then the API will assume that the clock cannot set the requested rate. And since there is no parent, it will return -EINVAL. This is not an issue right now as clk_set_rate() mistakenly compares the target rate with cached rate and bails out early. But once that is fixed to compare the target rate with the actual rate of the clock (returned by recalc_rate()), then clk_set_rate() for this clock will start to fail as below: cpu cpu0: _opp_config_clk_single: failed to set clock rate: -22 So implement the determine_rate() callback that just returns the actual rate at which the clock is passed to the CPUs in a domain. Fixes: 4370232c727b ("cpufreq: qcom-hw: Add CPU clock provider support") Reported-by: Johan Hovold Suggested-by: Stephen Boyd Signed-off-by: Manivannan Sadhasivam Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index c145ab7b0bb21..b2e7e89feaac4 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -626,8 +626,21 @@ static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned lon return __qcom_cpufreq_hw_get(data->policy) * HZ_PER_KHZ; } +/* + * Since we cannot determine the closest rate of the target rate, let's just + * return the actual rate at which the clock is running at. This is needed to + * make clk_set_rate() API work properly. + */ +static int qcom_cpufreq_hw_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) +{ + req->rate = qcom_cpufreq_hw_recalc_rate(hw, 0); + + return 0; +} + static const struct clk_ops qcom_cpufreq_hw_clk_ops = { .recalc_rate = qcom_cpufreq_hw_recalc_rate, + .determine_rate = qcom_cpufreq_hw_determine_rate, }; static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) From af6cc45af3db12f452bfdc9a515da54b56412756 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Mon, 23 Dec 2024 14:41:24 +0000 Subject: [PATCH 35/40] cpufreq: sparc: change kzalloc to kcalloc Refactor to use kcalloc instead of kzalloc when multiplying allocation size by count. This refactor prevents unintentional memory overflows. Discovered by checkpatch.pl. Signed-off-by: Ethan Carter Edwards Signed-off-by: Viresh Kumar --- drivers/cpufreq/sparc-us2e-cpufreq.c | 2 +- drivers/cpufreq/sparc-us3-cpufreq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c index 8a0cd5312a593..15899dd77c086 100644 --- a/drivers/cpufreq/sparc-us2e-cpufreq.c +++ b/drivers/cpufreq/sparc-us2e-cpufreq.c @@ -323,7 +323,7 @@ static int __init us2e_freq_init(void) impl = ((ver >> 32) & 0xffff); if (manuf == 0x17 && impl == 0x13) { - us2e_freq_table = kzalloc(NR_CPUS * sizeof(*us2e_freq_table), + us2e_freq_table = kcalloc(NR_CPUS, sizeof(*us2e_freq_table), GFP_KERNEL); if (!us2e_freq_table) return -ENOMEM; diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c index b50f9d13e6d26..de50a2f3b1245 100644 --- a/drivers/cpufreq/sparc-us3-cpufreq.c +++ b/drivers/cpufreq/sparc-us3-cpufreq.c @@ -171,7 +171,7 @@ static int __init us3_freq_init(void) impl == CHEETAH_PLUS_IMPL || impl == JAGUAR_IMPL || impl == PANTHER_IMPL)) { - us3_freq_table = kzalloc(NR_CPUS * sizeof(*us3_freq_table), + us3_freq_table = kcalloc(NR_CPUS, sizeof(*us3_freq_table), GFP_KERNEL); if (!us3_freq_table) return -ENOMEM; From fd604ae6c261c5a56bb977ae99f875bbd7264a3f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Jan 2025 08:12:04 -0600 Subject: [PATCH 36/40] cpufreq/amd-pstate: Fix prefcore rankings commit 50a062a76200 ("cpufreq/amd-pstate: Store the boost numerator as highest perf again") updated the value stored for highest perf to no longer store the highest perf value but instead the boost numerator. This is a fixed value for systems with preferred cores and not appropriate for use ITMT rankings. Update the value used for ITMT rankings to be the preferred core ranking. Reported-and-tested-by: Sebastian Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219640 Fixes: 50a062a76200 ("cpufreq/amd-pstate: Store the boost numerator as highest perf again") Reviewed-by: Dhananjay Ugwekar Link: https://lore.kernel.org/r/20250102141204.3413202-1-superm1@kernel.org Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d7b1de97727a1..2330903a8b457 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -815,7 +815,7 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) * sched_set_itmt_support(true) has been called and it is valid to * update them at any time after it has been called. */ - sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu); + sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); schedule_work(&sched_prefcore_work); } From 857a61c2ce74e30fc3b10bc89d68ddd8d05b188c Mon Sep 17 00:00:00 2001 From: Naresh Solanki Date: Fri, 20 Dec 2024 01:48:32 +0530 Subject: [PATCH 37/40] cpufreq/amd-pstate: Refactor max frequency calculation The previous approach introduced roundoff errors during division when calculating the boost ratio. This, in turn, affected the maximum frequency calculation, often resulting in reporting lower frequency values. For example, on the Glinda SoC based board with the following parameters: max_perf = 208 nominal_perf = 100 nominal_freq = 2600 MHz The Linux kernel previously calculated the frequency as: freq = ((max_perf * 1024 / nominal_perf) * nominal_freq) / 1024 freq = 5405 MHz // Integer arithmetic. With the updated formula: freq = (max_perf * nominal_freq) / nominal_perf freq = 5408 MHz This change ensures more accurate frequency calculations by eliminating unnecessary shifts and divisions, thereby improving precision. Signed-off-by: Naresh Solanki [ML: trim the changelog from commit message] Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241219201833.2750998-1-naresh.solanki@9elements.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 2330903a8b457..dd9b8d6993d69 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -908,9 +908,8 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; u32 min_freq, max_freq; - u32 nominal_perf, nominal_freq; + u32 highest_perf, nominal_perf, nominal_freq; u32 lowest_nonlinear_perf, lowest_nonlinear_freq; - u32 boost_ratio, lowest_nonlinear_ratio; struct cppc_perf_caps cppc_perf; ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); @@ -927,16 +926,12 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) else nominal_freq = cppc_perf.nominal_freq; + highest_perf = READ_ONCE(cpudata->highest_perf); nominal_perf = READ_ONCE(cpudata->nominal_perf); - - boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); + max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); - lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - + lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); WRITE_ONCE(cpudata->min_freq, min_freq * 1000); WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); From 0834667545962ef1c5e8684ed32b45d9c574acd3 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Mon, 13 Jan 2025 10:11:07 +0530 Subject: [PATCH 38/40] cpufreq: ACPI: Fix max-frequency computation Commit 3c55e94c0ade ("cpufreq: ACPI: Extend frequency tables to cover boost frequencies") introduced an assumption in acpi_cpufreq_cpu_init() that the first entry in the P-state table was the nominal frequency. This assumption is incorrect. The frequency corresponding to the P0 P-State need not be the same as the nominal frequency advertised via CPPC. Since the driver is using the CPPC.highest_perf and CPPC.nominal_perf to compute the boost-ratio, it makes sense to use CPPC.nominal_freq to compute the max-frequency. CPPC.nominal_freq is advertised on platforms supporting CPPC revisions 3 or higher. Hence, fallback to using the first entry in the P-State table only on platforms that do not advertise CPPC.nominal_freq. Fixes: 3c55e94c0ade ("cpufreq: ACPI: Extend frequency tables to cover boost frequencies") Tested-by: Dhananjay Ugwekar Signed-off-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20250113044107.566-1-gautham.shenoy@amd.com [ rjw: Retain reverse X-mas tree ordering of local variable declarations ] [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index c9ebacf5c88e2..302df42d68875 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -623,7 +623,14 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) #endif #ifdef CONFIG_ACPI_CPPC_LIB -static u64 get_max_boost_ratio(unsigned int cpu) +/* + * get_max_boost_ratio: Computes the max_boost_ratio as the ratio + * between the highest_perf and the nominal_perf. + * + * Returns the max_boost_ratio for @cpu. Returns the CPPC nominal + * frequency via @nominal_freq if it is non-NULL pointer. + */ +static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq) { struct cppc_perf_caps perf_caps; u64 highest_perf, nominal_perf; @@ -652,6 +659,9 @@ static u64 get_max_boost_ratio(unsigned int cpu) nominal_perf = perf_caps.nominal_perf; + if (nominal_freq) + *nominal_freq = perf_caps.nominal_freq; + if (!highest_perf || !nominal_perf) { pr_debug("CPU%d: highest or nominal performance missing\n", cpu); return 0; @@ -664,8 +674,12 @@ static u64 get_max_boost_ratio(unsigned int cpu) return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); } + #else -static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; } +static inline u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq) +{ + return 0; +} #endif static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) @@ -675,9 +689,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) struct acpi_cpufreq_data *data; unsigned int cpu = policy->cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); + u64 max_boost_ratio, nominal_freq = 0; unsigned int valid_states = 0; unsigned int result = 0; - u64 max_boost_ratio; unsigned int i; #ifdef CONFIG_SMP static int blacklisted; @@ -827,16 +841,20 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } freq_table[valid_states].frequency = CPUFREQ_TABLE_END; - max_boost_ratio = get_max_boost_ratio(cpu); + max_boost_ratio = get_max_boost_ratio(cpu, &nominal_freq); if (max_boost_ratio) { - unsigned int freq = freq_table[0].frequency; + unsigned int freq = nominal_freq; /* - * Because the loop above sorts the freq_table entries in the - * descending order, freq is the maximum frequency in the table. - * Assume that it corresponds to the CPPC nominal frequency and - * use it to set cpuinfo.max_freq. + * The loop above sorts the freq_table entries in the + * descending order. If ACPI CPPC has not advertised + * the nominal frequency (this is possible in CPPC + * revisions prior to 3), then use the first entry in + * the pstate table as a proxy for nominal frequency. */ + if (!freq) + freq = freq_table[0].frequency; + policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; } else { /* From 84cf9e541cccb8cb698518a9897942e8c78f1d83 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 9 Jan 2025 14:12:58 +0100 Subject: [PATCH 39/40] cpufreq: airoha: Add EN7581 CPUFreq SMCCC driver Add simple CPU Freq driver for Airoha EN7581 SoC that control CPU frequency scaling with SMC APIs and register a generic "cpufreq-dt" device. All CPU share the same frequency and can't be controlled independently. CPU frequency is controlled by the attached PM domain. Add SoC compatible to cpufreq-dt-plat block list as a dedicated cpufreq driver is needed with OPP v2 nodes declared in DTS. Signed-off-by: Christian Marangi Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 8 ++ drivers/cpufreq/Makefile | 1 + drivers/cpufreq/airoha-cpufreq.c | 152 +++++++++++++++++++++++++++ drivers/cpufreq/cpufreq-dt-platdev.c | 2 + 4 files changed, 163 insertions(+) create mode 100644 drivers/cpufreq/airoha-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 5f7e13e60c802..704e84d006390 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -15,6 +15,14 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM To compile this driver as a module, choose M here: the module will be called sun50i-cpufreq-nvmem. +config ARM_AIROHA_SOC_CPUFREQ + tristate "Airoha EN7581 SoC CPUFreq support" + depends on ARCH_AIROHA || COMPILE_TEST + select PM_OPP + default ARCH_AIROHA + help + This adds the CPUFreq driver for Airoha EN7581 SoCs. + config ARM_APPLE_SOC_CPUFREQ tristate "Apple Silicon SoC CPUFreq support" depends on ARCH_APPLE || (COMPILE_TEST && 64BIT) diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index d35a28dd94630..890fff99f37d9 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o ################################################################################## # ARM SoC drivers +obj-$(CONFIG_ARM_AIROHA_SOC_CPUFREQ) += airoha-cpufreq.o obj-$(CONFIG_ARM_APPLE_SOC_CPUFREQ) += apple-soc-cpufreq.o obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ) += armada-8k-cpufreq.o diff --git a/drivers/cpufreq/airoha-cpufreq.c b/drivers/cpufreq/airoha-cpufreq.c new file mode 100644 index 0000000000000..4fe39eadd1633 --- /dev/null +++ b/drivers/cpufreq/airoha-cpufreq.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include + +#include "cpufreq-dt.h" + +struct airoha_cpufreq_priv { + int opp_token; + struct dev_pm_domain_list *pd_list; + struct platform_device *cpufreq_dt; +}; + +static struct platform_device *cpufreq_pdev; + +/* NOP function to disable OPP from setting clock */ +static int airoha_cpufreq_config_clks_nop(struct device *dev, + struct opp_table *opp_table, + struct dev_pm_opp *opp, + void *data, bool scaling_down) +{ + return 0; +} + +static const char * const airoha_cpufreq_clk_names[] = { "cpu", NULL }; +static const char * const airoha_cpufreq_pd_names[] = { "perf" }; + +static int airoha_cpufreq_probe(struct platform_device *pdev) +{ + const struct dev_pm_domain_attach_data attach_data = { + .pd_names = airoha_cpufreq_pd_names, + .num_pd_names = ARRAY_SIZE(airoha_cpufreq_pd_names), + .pd_flags = PD_FLAG_DEV_LINK_ON | PD_FLAG_REQUIRED_OPP, + }; + struct dev_pm_opp_config config = { + .clk_names = airoha_cpufreq_clk_names, + .config_clks = airoha_cpufreq_config_clks_nop, + }; + struct platform_device *cpufreq_dt; + struct airoha_cpufreq_priv *priv; + struct device *dev = &pdev->dev; + struct device *cpu_dev; + int ret; + + /* CPUs refer to the same OPP table */ + cpu_dev = get_cpu_device(0); + if (!cpu_dev) + return -ENODEV; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + /* Set OPP table conf with NOP config_clks */ + priv->opp_token = dev_pm_opp_set_config(cpu_dev, &config); + if (priv->opp_token < 0) + return dev_err_probe(dev, priv->opp_token, "Failed to set OPP config\n"); + + /* Attach PM for OPP */ + ret = dev_pm_domain_attach_list(cpu_dev, &attach_data, + &priv->pd_list); + if (ret) + goto clear_opp_config; + + cpufreq_dt = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); + ret = PTR_ERR_OR_ZERO(cpufreq_dt); + if (ret) { + dev_err(dev, "failed to create cpufreq-dt device: %d\n", ret); + goto detach_pm; + } + + priv->cpufreq_dt = cpufreq_dt; + platform_set_drvdata(pdev, priv); + + return 0; + +detach_pm: + dev_pm_domain_detach_list(priv->pd_list); +clear_opp_config: + dev_pm_opp_clear_config(priv->opp_token); + + return ret; +} + +static void airoha_cpufreq_remove(struct platform_device *pdev) +{ + struct airoha_cpufreq_priv *priv = platform_get_drvdata(pdev); + + platform_device_unregister(priv->cpufreq_dt); + + dev_pm_domain_detach_list(priv->pd_list); + + dev_pm_opp_clear_config(priv->opp_token); +} + +static struct platform_driver airoha_cpufreq_driver = { + .probe = airoha_cpufreq_probe, + .remove = airoha_cpufreq_remove, + .driver = { + .name = "airoha-cpufreq", + }, +}; + +static const struct of_device_id airoha_cpufreq_match_list[] __initconst = { + { .compatible = "airoha,en7581" }, + {}, +}; +MODULE_DEVICE_TABLE(of, airoha_cpufreq_match_list); + +static int __init airoha_cpufreq_init(void) +{ + struct device_node *np = of_find_node_by_path("/"); + const struct of_device_id *match; + int ret; + + if (!np) + return -ENODEV; + + match = of_match_node(airoha_cpufreq_match_list, np); + of_node_put(np); + if (!match) + return -ENODEV; + + ret = platform_driver_register(&airoha_cpufreq_driver); + if (unlikely(ret < 0)) + return ret; + + cpufreq_pdev = platform_device_register_data(NULL, "airoha-cpufreq", + -1, match, sizeof(*match)); + ret = PTR_ERR_OR_ZERO(cpufreq_pdev); + if (ret) + platform_driver_unregister(&airoha_cpufreq_driver); + + return ret; +} +module_init(airoha_cpufreq_init); + +static void __exit airoha_cpufreq_exit(void) +{ + platform_device_unregister(cpufreq_pdev); + platform_driver_unregister(&airoha_cpufreq_driver); +} +module_exit(airoha_cpufreq_exit); + +MODULE_AUTHOR("Christian Marangi "); +MODULE_DESCRIPTION("CPUfreq driver for Airoha SoCs"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 9c198bd4f7e9b..2aa00769cf09d 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,8 @@ static const struct of_device_id allowlist[] __initconst = { * platforms using "operating-points-v2" property. */ static const struct of_device_id blocklist[] __initconst = { + { .compatible = "airoha,en7581", }, + { .compatible = "allwinner,sun50i-a100" }, { .compatible = "allwinner,sun50i-h6", }, { .compatible = "allwinner,sun50i-h616", }, From f994c1cb6c438bb32487d36ab26c1954829cab1f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2025 20:06:00 +0100 Subject: [PATCH 40/40] cpufreq: Use str_enable_disable()-like helpers Replace ternary (condition ? "enable" : "disable") syntax with helpers from string_choices.h because: 1. Simple function call with one argument is easier to read. Ternary operator has three arguments and with wrapping might lead to quite long code. 2. Is slightly shorter thus also easier to read. 3. It brings uniformity in the text - same string. 4. Allows deduping by the linker, which results in a smaller binary file. Signed-off-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250114190600.846651-1-krzysztof.kozlowski@linaro.org [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 ++++--- drivers/cpufreq/powernv-cpufreq.c | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1a4cae54a01bd..c95b4d291f2ce 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -602,12 +603,12 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, if (cpufreq_boost_trigger_state(enable)) { pr_err("%s: Cannot %s BOOST!\n", - __func__, enable ? "enable" : "disable"); + __func__, str_enable_disable(enable)); return -EINVAL; } pr_debug("%s: cpufreq BOOST %s\n", - __func__, enable ? "enabled" : "disabled"); + __func__, str_enabled_disabled(enable)); return count; } @@ -2812,7 +2813,7 @@ int cpufreq_boost_trigger_state(int state) write_unlock_irqrestore(&cpufreq_driver_lock, flags); pr_err("%s: Cannot %s BOOST\n", - __func__, state ? "enable" : "disable"); + __func__, str_enable_disable(state)); return ret; } diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 8de759247771f..ae79d909943b5 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -281,7 +282,7 @@ static int init_powernv_pstates(void) pr_info("cpufreq pstate min 0x%x nominal 0x%x max 0x%x\n", pstate_min, pstate_nominal, pstate_max); pr_info("Workload Optimized Frequency is %s in the platform\n", - (powernv_pstate_info.wof_enabled) ? "enabled" : "disabled"); + str_enabled_disabled(powernv_pstate_info.wof_enabled)); pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids); if (!pstate_ids) {