diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index 35f38ae67fb13..8d692415d9050 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -32,7 +32,6 @@ TRACE_EVENT(amd_pstate_perf, u64 aperf, u64 tsc, unsigned int cpu_id, - bool changed, bool fast_switch ), @@ -44,7 +43,6 @@ TRACE_EVENT(amd_pstate_perf, aperf, tsc, cpu_id, - changed, fast_switch ), @@ -57,7 +55,6 @@ TRACE_EVENT(amd_pstate_perf, __field(unsigned long long, aperf) __field(unsigned long long, tsc) __field(unsigned int, cpu_id) - __field(bool, changed) __field(bool, fast_switch) ), @@ -70,11 +67,10 @@ TRACE_EVENT(amd_pstate_perf, __entry->aperf = aperf; __entry->tsc = tsc; __entry->cpu_id = cpu_id; - __entry->changed = changed; __entry->fast_switch = fast_switch; ), - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", (unsigned long)__entry->min_perf, (unsigned long)__entry->target_perf, (unsigned long)__entry->capacity, @@ -83,11 +79,55 @@ TRACE_EVENT(amd_pstate_perf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, (unsigned int)__entry->cpu_id, - (__entry->changed) ? "true" : "false", (__entry->fast_switch) ? "true" : "false" ) ); +TRACE_EVENT(amd_pstate_epp_perf, + + TP_PROTO(unsigned int cpu_id, + unsigned int highest_perf, + unsigned int epp, + unsigned int min_perf, + unsigned int max_perf, + bool boost + ), + + TP_ARGS(cpu_id, + highest_perf, + epp, + min_perf, + max_perf, + boost), + + TP_STRUCT__entry( + __field(unsigned int, cpu_id) + __field(unsigned int, highest_perf) + __field(unsigned int, epp) + __field(unsigned int, min_perf) + __field(unsigned int, max_perf) + __field(bool, boost) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->highest_perf = highest_perf; + __entry->epp = epp; + __entry->min_perf = min_perf; + __entry->max_perf = max_perf; + __entry->boost = boost; + ), + + TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u", + (unsigned int)__entry->cpu_id, + (unsigned int)__entry->min_perf, + (unsigned int)__entry->max_perf, + (unsigned int)__entry->highest_perf, + (unsigned int)__entry->epp, + (bool)__entry->boost + ) +); + #endif /* _AMD_PSTATE_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index a261d7300951e..3a0a380c3590c 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -207,7 +207,6 @@ static void amd_pstate_ut_check_freq(u32 index) int cpu = 0; struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; - u32 nominal_freq_khz; for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); @@ -215,14 +214,13 @@ static void amd_pstate_ut_check_freq(u32 index) break; cpudata = policy->driver_data; - nominal_freq_khz = cpudata->nominal_freq*1000; - if (!((cpudata->max_freq >= nominal_freq_khz) && - (nominal_freq_khz > cpudata->lowest_nonlinear_freq) && + if (!((cpudata->max_freq >= cpudata->nominal_freq) && + (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && (cpudata->min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, cpudata->max_freq, nominal_freq_khz, + __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, cpudata->min_freq); goto skip_test; } @@ -236,13 +234,13 @@ static void amd_pstate_ut_check_freq(u32 index) if (cpudata->boost_supported) { if ((policy->max == cpudata->max_freq) || - (policy->max == nominal_freq_khz)) + (policy->max == cpudata->nominal_freq)) amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, cpudata->max_freq, - nominal_freq_khz); + cpudata->nominal_freq); goto skip_test; } } else { diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 66e5dfc711c0c..d7b1de97727a1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -88,6 +89,11 @@ static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) + /* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive @@ -180,120 +186,145 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata) { - u64 epp; + u64 value; int ret; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); - if (epp) - return epp; - } - epp = (cppc_req_cached >> 24) & 0xFF; - } else { - ret = cppc_get_epp_perf(cpudata->cpu, &epp); - if (ret < 0) { - pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return -EIO; - } + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return (s16)(epp & 0xff); + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); } -static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) +DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); + +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) { - s16 epp; - int index = -EINVAL; + return static_call(amd_pstate_get_epp)(cpudata); +} - epp = amd_pstate_get_epp(cpudata, 0); - if (epp < 0) - return epp; +static s16 shmem_get_epp(struct amd_cpudata *cpudata) +{ + u64 epp; + int ret; - switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - index = EPP_INDEX_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_PERFORMANCE: - index = EPP_INDEX_BALANCE_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_POWERSAVE: - index = EPP_INDEX_BALANCE_POWERSAVE; - break; - case AMD_CPPC_EPP_POWERSAVE: - index = EPP_INDEX_POWERSAVE; - break; - default: - break; + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return index; + return (s16)(epp & 0xff); } -static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) +static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { - if (fast_switch) - wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); - else - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + u64 value, prev; + + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) + return 0; + + if (fast_switch) { + wrmsrl(MSR_AMD_CPPC_REQ, value); + return 0; + } else { + int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + + if (ret) + return ret; + } + + WRITE_ONCE(cpudata->cppc_req_cached, value); + WRITE_ONCE(cpudata->epp_cached, epp); + + return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) + u32 max_perf, u32 epp, + bool fast_switch) { - static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, epp, fast_switch); } -static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { + u64 value, prev; int ret; - struct cppc_perf_ctrls perf_ctrls; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - WRITE_ONCE(cpudata->cppc_req_cached, value); + value = prev = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; - } else { - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); + if (value == prev) + return 0; - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { - pr_debug("failed to set energy perf value (%d)\n", ret); - return ret; - } - cpudata->epp_cached = epp; + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (ret) { + pr_err("failed to set energy perf value (%d)\n", ret); + return ret; } + /* update both so that msr_update_perf() can effectively check */ + WRITE_ONCE(cpudata->epp_cached, epp); + WRITE_ONCE(cpudata->cppc_req_cached, value); + return ret; } -static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int pref_index) +DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); + +static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + return static_call(amd_pstate_set_epp)(cpudata, epp); +} + +static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) { - int epp = -EINVAL; int ret; + struct cppc_perf_ctrls perf_ctrls; + + if (epp == cpudata->epp_cached) + return 0; + + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; + } + WRITE_ONCE(cpudata->epp_cached, epp); + + return ret; +} + +static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, + int pref_index) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int epp; if (!pref_index) epp = cpudata->epp_default; - - if (epp == -EINVAL) + else epp = epp_values[pref_index]; if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -301,9 +332,15 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, return -EBUSY; } - ret = amd_pstate_set_epp(cpudata, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), + policy->boost_enabled); + } - return ret; + return amd_pstate_set_epp(cpudata, epp); } static inline int msr_cppc_enable(bool enable) @@ -442,17 +479,23 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static void shmem_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + if (cppc_state == AMD_PSTATE_ACTIVE) { + int ret = shmem_set_epp(cpudata, epp); + + if (ret) + return ret; + } + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); + return cppc_set_perf(cpudata->cpu, &perf_ctrls); } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) @@ -493,14 +536,8 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, { unsigned long max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u64 prev = READ_ONCE(cpudata->cppc_req_cached); u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); - u64 value = prev; - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); max_freq = READ_ONCE(cpudata->max_limit_freq); @@ -511,34 +548,18 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, des_perf = 0; } - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - /* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, - cpudata->cpu, (value != prev), fast_switch); + cpudata->cpu, fast_switch); } - if (value == prev) - goto cpufreq_policy_put; + amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - WRITE_ONCE(cpudata->cppc_req_cached, value); - - amd_pstate_update_perf(cpudata, min_perf, des_perf, - max_perf, fast_switch); - -cpufreq_policy_put: cpufreq_cpu_put(policy); } @@ -570,7 +591,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; max_perf = READ_ONCE(cpudata->highest_perf); @@ -578,12 +599,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * max_perf, max_freq); min_limit_perf = div_u64(policy->min * max_perf, max_freq); - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) - min_limit_perf = lowest_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -704,8 +721,8 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) if (on) policy->cpuinfo.max_freq = max_freq; - else if (policy->cpuinfo.max_freq > nominal_freq * 1000) - policy->cpuinfo.max_freq = nominal_freq * 1000; + else if (policy->cpuinfo.max_freq > nominal_freq) + policy->cpuinfo.max_freq = nominal_freq; policy->max = policy->cpuinfo.max_freq; @@ -727,12 +744,11 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP; } - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_pstate_cpu_boost_update(policy, state); - WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); - mutex_unlock(&amd_pstate_driver_lock); return ret; } @@ -752,9 +768,6 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) goto exit_err; } - /* at least one CPU supports CPB, even if others fail later on to set up */ - current_pstate_driver->boost_enabled = true; - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); if (ret) { pr_err_once("failed to read initial CPU boost state!\n"); @@ -823,7 +836,8 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!amd_pstate_prefcore) return; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_get_highest_perf(cpu, &cur_high); if (ret) goto free_cpufreq_put; @@ -843,7 +857,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!highest_perf_changed) cpufreq_update_policy(cpu); - mutex_unlock(&amd_pstate_driver_lock); } /* @@ -905,29 +918,29 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) return ret; if (quirks && quirks->lowest_freq) - min_freq = quirks->lowest_freq * 1000; + min_freq = quirks->lowest_freq; else - min_freq = cppc_perf.lowest_freq * 1000; + min_freq = cppc_perf.lowest_freq; if (quirks && quirks->nominal_freq) - nominal_freq = quirks->nominal_freq ; + nominal_freq = quirks->nominal_freq; else nominal_freq = cppc_perf.nominal_freq; nominal_perf = READ_ONCE(cpudata->nominal_perf); boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - WRITE_ONCE(cpudata->min_freq, min_freq); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - WRITE_ONCE(cpudata->max_freq, max_freq); + WRITE_ONCE(cpudata->min_freq, min_freq * 1000); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); + WRITE_ONCE(cpudata->max_freq, max_freq * 1000); /** * Below values need to be initialized correctly, otherwise driver will fail to load @@ -937,13 +950,13 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) */ if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", - min_freq, max_freq, nominal_freq * 1000); + min_freq, max_freq, nominal_freq); return -EINVAL; } - if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", - lowest_nonlinear_freq, min_freq, nominal_freq * 1000); + lowest_nonlinear_freq, min_freq, nominal_freq); return -EINVAL; } @@ -1160,7 +1173,6 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { - struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; @@ -1172,11 +1184,11 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - mutex_lock(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); - mutex_unlock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); + + ret = amd_pstate_set_energy_pref_index(policy, ret); - return ret ?: count; + return ret ? ret : count; } static ssize_t show_energy_performance_preference( @@ -1185,9 +1197,22 @@ static ssize_t show_energy_performance_preference( struct amd_cpudata *cpudata = policy->driver_data; int preference; - preference = amd_pstate_get_energy_pref_index(cpudata); - if (preference < 0) - return preference; + switch (cpudata->epp_cached) { + case AMD_CPPC_EPP_PERFORMANCE: + preference = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + preference = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + preference = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + preference = EPP_INDEX_POWERSAVE; + break; + default: + return -EINVAL; + } return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } @@ -1236,6 +1261,9 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* at least one CPU supports CPB */ + current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); @@ -1340,13 +1368,10 @@ EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { - ssize_t ret; - mutex_lock(&amd_pstate_driver_lock); - ret = amd_pstate_show_status(buf); - mutex_unlock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); - return ret; + return amd_pstate_show_status(buf); } static ssize_t status_store(struct device *a, struct device_attribute *b, @@ -1355,9 +1380,8 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); - mutex_unlock(&amd_pstate_driver_lock); return ret < 0 ? ret : count; } @@ -1451,7 +1475,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; cpudata->cpu = policy->cpu; - cpudata->epp_policy = 0; ret = amd_pstate_init_perf(cpudata); if (ret) @@ -1477,8 +1500,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); - policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1489,10 +1510,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) * the default cpufreq governor is neither powersave nor performance. */ if (amd_pstate_acpi_pm_profile_server() || - amd_pstate_acpi_pm_profile_undefined()) + amd_pstate_acpi_pm_profile_undefined()) { policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else + cpudata->epp_default = amd_pstate_get_epp(cpudata); + } else { policy->policy = CPUFREQ_POLICY_POWERSAVE; + cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; + } if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); @@ -1505,6 +1529,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } + ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + if (ret) + return ret; current_pstate_driver->adjust_perf = NULL; @@ -1530,51 +1557,24 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; - u64 value; - s16 epp; + u32 epp; - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); amd_pstate_update_min_max_limit(policy); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - value = READ_ONCE(cpudata->cppc_req_cached); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = min(cpudata->nominal_perf, max_perf); - - /* Initial min/max values for CPPC Performance Controls Register */ - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - - /* CPPC EPP feature require to set zero to the desire perf bit */ - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(0); - - cpudata->epp_policy = cpudata->policy; + epp = 0; + else + epp = READ_ONCE(cpudata->epp_cached); - /* Get BIOS pre-defined epp value */ - epp = amd_pstate_get_epp(cpudata, value); - if (epp < 0) { - /** - * This return value can only be negative for shared_memory - * systems where EPP register read/write not supported. - */ - return epp; + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + cpudata->min_limit_perf, + cpudata->max_limit_perf, + policy->boost_enabled); } - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; - - WRITE_ONCE(cpudata->cppc_req_cached, value); - return amd_pstate_set_epp(cpudata, epp); + return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1603,87 +1603,63 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { - struct cppc_perf_ctrls perf_ctrls; - u64 value, max_perf; + struct amd_cpudata *cpudata = policy->driver_data; + u64 max_perf; int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + max_perf, policy->boost_enabled); } + + return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - if (cppc_state == AMD_PSTATE_ACTIVE) { - amd_pstate_epp_reenable(cpudata); - cpudata->suspended = false; - } + ret = amd_pstate_epp_reenable(policy); + if (ret) + return ret; + cpudata->suspended = false; return 0; } -static void amd_pstate_epp_offline(struct cpufreq_policy *policy) -{ - struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; - int min_perf; - u64 value; - - min_perf = READ_ONCE(cpudata->lowest_perf); - value = READ_ONCE(cpudata->cppc_req_cached); - - mutex_lock(&amd_pstate_limits_lock); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - - /* Set max perf same as min perf */ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(min_perf); - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; - perf_ctrls.min_perf = min_perf; - perf_ctrls.max_perf = min_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } - mutex_unlock(&amd_pstate_limits_lock); -} - static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - - pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); + int min_perf; if (cpudata->suspended) return 0; - if (cppc_state == AMD_PSTATE_ACTIVE) - amd_pstate_epp_offline(policy); + min_perf = READ_ONCE(cpudata->lowest_perf); - return 0; + guard(mutex)(&amd_pstate_limits_lock); + + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, + min_perf, min_perf, policy->boost_enabled); + } + + return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) @@ -1711,12 +1687,10 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(cpudata); - - mutex_unlock(&amd_pstate_limits_lock); + amd_pstate_epp_reenable(policy); cpudata->suspended = false; } @@ -1869,6 +1843,8 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); static_call_update(amd_pstate_init_perf, shmem_init_perf); static_call_update(amd_pstate_update_perf, shmem_update_perf); + static_call_update(amd_pstate_get_epp, shmem_get_epp); + static_call_update(amd_pstate_set_epp, shmem_set_epp); } if (amd_pstate_prefcore) { diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index cd573bc6b6db8..9747e3be6ceee 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -57,7 +57,6 @@ struct amd_aperf_mperf { * @hw_prefcore: check whether HW supports preferred core featue. * Only when hw_prefcore and early prefcore param are true, * AMD P-State driver supports preferred core featue. - * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value @@ -94,13 +93,11 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - s16 epp_policy; s16 epp_cached; u32 policy; u64 cppc_cap1_cached; bool suspended; s16 epp_default; - bool boost_state; }; /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b8e2396a708ad..9c4cc01fd51aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -302,11 +303,11 @@ static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; -#define HYBRID_SCALING_FACTOR 78741 +#define HYBRID_SCALING_FACTOR_ADL 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 #define HYBRID_SCALING_FACTOR_LNL 86957 -static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR; +static int hybrid_scaling_factor; static inline int core_get_scaling(void) { @@ -414,18 +415,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) static int intel_pstate_cppc_get_scaling(int cpu) { struct cppc_perf_caps cppc_perf; - int ret; - - ret = cppc_get_perf_caps(cpu, &cppc_perf); /* - * If the nominal frequency and the nominal performance are not - * zero and the ratio between them is not 100, return the hybrid - * scaling factor. + * Compute the perf-to-frequency scaling factor for the given CPU if + * possible, unless it would be 0. */ - if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq && - cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq) - return hybrid_scaling_factor; + if (!cppc_get_perf_caps(cpu, &cppc_perf) && + cppc_perf.nominal_perf && cppc_perf.nominal_freq) + return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ, + cppc_perf.nominal_perf); return core_get_scaling(); } @@ -2211,24 +2209,30 @@ static void hybrid_get_type(void *data) static int hwp_get_cpu_scaling(int cpu) { - u8 cpu_type = 0; + if (hybrid_scaling_factor) { + u8 cpu_type = 0; - smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* P-cores have a smaller perf level-to-freqency scaling factor. */ - if (cpu_type == 0x40) - return hybrid_scaling_factor; + smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* Use default core scaling for E-cores */ - if (cpu_type == 0x20) + /* + * Return the hybrid scaling factor for P-cores and use the + * default core scaling for E-cores. + */ + if (cpu_type == 0x40) + return hybrid_scaling_factor; + + if (cpu_type == 0x20) + return core_get_scaling(); + } + + /* Use core scaling on non-hybrid systems. */ + if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return core_get_scaling(); /* - * If reached here, this system is either non-hybrid (like Tiger - * Lake) or hybrid-capable (like Alder Lake or Raptor Lake) with - * no E cores (in which case CPUID for hybrid support is 0). - * - * The CPPC nominal_frequency field is 0 for non-hybrid systems, - * so the default core scaling will be used for them. + * The system is hybrid, but the hybrid scaling factor is not known or + * the CPU type is not one of the above, so use CPPC to compute the + * scaling factor for this CPU. */ return intel_pstate_cppc_get_scaling(cpu); } @@ -2709,7 +2713,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) } cpu->epp_powersave = -EINVAL; - cpu->epp_policy = 0; + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; intel_pstate_get_cpu_pstates(cpu); @@ -3665,8 +3669,12 @@ static const struct x86_cpu_id intel_epp_default[] = { }; static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { + X86_MATCH_VFM(INTEL_ALDERLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), - X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), {} }; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28c77904ea749..e51d5ce730be1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -83,7 +83,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (unlikely(sg_policy->limits_changed)) { sg_policy->limits_changed = false; - sg_policy->need_freq_update = true; + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); return true; } @@ -96,7 +96,7 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { if (sg_policy->need_freq_update) - sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + sg_policy->need_freq_update = false; else if (sg_policy->next_freq == next_freq) return false;