From 691fef8ccbb70dc5cd242b2bd14a0e95b7d1cc17 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 May 2024 11:47:13 -0700 Subject: [PATCH 01/24] cpufreq: Switch to new Intel CPU model defines New CPU #defines encode vendor and family as well as model. Signed-off-by: Tony Luck Acked-by: Rafael J. Wysocki Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/speedstep-centrino.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 75b10ecdb60f2..ddd6f53bfd2ae 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -520,10 +520,10 @@ static struct cpufreq_driver centrino_driver = { * or ASCII model IDs. */ static const struct x86_cpu_id centrino_ids[] = { - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, 9, X86_FEATURE_EST, NULL), - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, 13, X86_FEATURE_EST, NULL), - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 15, 3, X86_FEATURE_EST, NULL), - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 15, 4, X86_FEATURE_EST, NULL), + X86_MATCH_VFM_FEATURE(IFM( 6, 9), X86_FEATURE_EST, NULL), + X86_MATCH_VFM_FEATURE(IFM( 6, 13), X86_FEATURE_EST, NULL), + X86_MATCH_VFM_FEATURE(IFM(15, 3), X86_FEATURE_EST, NULL), + X86_MATCH_VFM_FEATURE(IFM(15, 4), X86_FEATURE_EST, NULL), {} }; From ca8752384c6f75a38aef40f0cbcb810af172feec Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 May 2024 11:47:19 -0700 Subject: [PATCH 02/24] cpufreq: intel_pstate: Switch to new Intel CPU model defines New CPU #defines encode vendor and family as well as model. Signed-off-by: Tony Luck Acked-by: Rafael J. Wysocki Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 90 +++++++++++++++++----------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 65d3f79104bd5..ebb3335d23f1b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2363,54 +2363,53 @@ static const struct pstate_funcs knl_funcs = { .get_val = core_get_val, }; -#define X86_MATCH(model, policy) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ - X86_FEATURE_APERFMPERF, &policy) +#define X86_MATCH(vfm, policy) \ + X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_APERFMPERF, &policy) static const struct x86_cpu_id intel_pstate_cpu_ids[] = { - X86_MATCH(SANDYBRIDGE, core_funcs), - X86_MATCH(SANDYBRIDGE_X, core_funcs), - X86_MATCH(ATOM_SILVERMONT, silvermont_funcs), - X86_MATCH(IVYBRIDGE, core_funcs), - X86_MATCH(HASWELL, core_funcs), - X86_MATCH(BROADWELL, core_funcs), - X86_MATCH(IVYBRIDGE_X, core_funcs), - X86_MATCH(HASWELL_X, core_funcs), - X86_MATCH(HASWELL_L, core_funcs), - X86_MATCH(HASWELL_G, core_funcs), - X86_MATCH(BROADWELL_G, core_funcs), - X86_MATCH(ATOM_AIRMONT, airmont_funcs), - X86_MATCH(SKYLAKE_L, core_funcs), - X86_MATCH(BROADWELL_X, core_funcs), - X86_MATCH(SKYLAKE, core_funcs), - X86_MATCH(BROADWELL_D, core_funcs), - X86_MATCH(XEON_PHI_KNL, knl_funcs), - X86_MATCH(XEON_PHI_KNM, knl_funcs), - X86_MATCH(ATOM_GOLDMONT, core_funcs), - X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs), - X86_MATCH(SKYLAKE_X, core_funcs), - X86_MATCH(COMETLAKE, core_funcs), - X86_MATCH(ICELAKE_X, core_funcs), - X86_MATCH(TIGERLAKE, core_funcs), - X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), - X86_MATCH(EMERALDRAPIDS_X, core_funcs), + X86_MATCH(INTEL_SANDYBRIDGE, core_funcs), + X86_MATCH(INTEL_SANDYBRIDGE_X, core_funcs), + X86_MATCH(INTEL_ATOM_SILVERMONT, silvermont_funcs), + X86_MATCH(INTEL_IVYBRIDGE, core_funcs), + X86_MATCH(INTEL_HASWELL, core_funcs), + X86_MATCH(INTEL_BROADWELL, core_funcs), + X86_MATCH(INTEL_IVYBRIDGE_X, core_funcs), + X86_MATCH(INTEL_HASWELL_X, core_funcs), + X86_MATCH(INTEL_HASWELL_L, core_funcs), + X86_MATCH(INTEL_HASWELL_G, core_funcs), + X86_MATCH(INTEL_BROADWELL_G, core_funcs), + X86_MATCH(INTEL_ATOM_AIRMONT, airmont_funcs), + X86_MATCH(INTEL_SKYLAKE_L, core_funcs), + X86_MATCH(INTEL_BROADWELL_X, core_funcs), + X86_MATCH(INTEL_SKYLAKE, core_funcs), + X86_MATCH(INTEL_BROADWELL_D, core_funcs), + X86_MATCH(INTEL_XEON_PHI_KNL, knl_funcs), + X86_MATCH(INTEL_XEON_PHI_KNM, knl_funcs), + X86_MATCH(INTEL_ATOM_GOLDMONT, core_funcs), + X86_MATCH(INTEL_ATOM_GOLDMONT_PLUS, core_funcs), + X86_MATCH(INTEL_SKYLAKE_X, core_funcs), + X86_MATCH(INTEL_COMETLAKE, core_funcs), + X86_MATCH(INTEL_ICELAKE_X, core_funcs), + X86_MATCH(INTEL_TIGERLAKE, core_funcs), + X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); #ifdef CONFIG_ACPI static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { - X86_MATCH(BROADWELL_D, core_funcs), - X86_MATCH(BROADWELL_X, core_funcs), - X86_MATCH(SKYLAKE_X, core_funcs), - X86_MATCH(ICELAKE_X, core_funcs), - X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(INTEL_BROADWELL_D, core_funcs), + X86_MATCH(INTEL_BROADWELL_X, core_funcs), + X86_MATCH(INTEL_SKYLAKE_X, core_funcs), + X86_MATCH(INTEL_ICELAKE_X, core_funcs), + X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), {} }; #endif static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { - X86_MATCH(KABYLAKE, core_funcs), + X86_MATCH(INTEL_KABYLAKE, core_funcs), {} }; @@ -3346,14 +3345,13 @@ static inline void intel_pstate_request_control_from_smm(void) {} #define INTEL_PSTATE_HWP_BROADWELL 0x01 -#define X86_MATCH_HWP(model, hwp_mode) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ - X86_FEATURE_HWP, hwp_mode) +#define X86_MATCH_HWP(vfm, hwp_mode) \ + X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_HWP, hwp_mode) static const struct x86_cpu_id hwp_support_ids[] __initconst = { - X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), - X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), - X86_MATCH_HWP(ANY, 0), + X86_MATCH_HWP(INTEL_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), + X86_MATCH_HWP(INTEL_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), + X86_MATCH_HWP(INTEL_ANY, 0), {} }; @@ -3386,15 +3384,15 @@ static const struct x86_cpu_id intel_epp_default[] = { * which can result in one core turbo frequency for * AlderLake Mobile CPUs. */ - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, - HWP_EPP_BALANCE_POWERSAVE, 115, 16)), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + HWP_EPP_BALANCE_POWERSAVE, 115, 16)), {} }; static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), + X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), {} }; From 8bdab3c8f2e696fcb3bf65832376ac91ef6da271 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 6 Jun 2024 11:12:14 -0700 Subject: [PATCH 03/24] cpufreq: intel_pstate: Update Meteor Lake EPPs Update the default balance_performance EPP to 64. This gives better performance and also perf/watt compared to current value of 115. For example: Speedometer 2.1 score: +19% Perf/watt: +5.25% Webxprt 4 score score: +12% Perf/watt: +6.12% 3DMark Wildlife extreme unlimited score score: +3.2% Perf/watt: +11.5% Geekbench6 MT score: +2.14% Perf/watt: +0.32% Also update balance_power EPP default to 179. With this change: Video Playback power is reduced by 52% Team video conference power is reduced by 35% With Power profile daemon now sets balance_power EPP on DC instead of balance_performance, updating balance_power EPP will help to extend battery life. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ebb3335d23f1b..738b8225baad3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3387,7 +3387,7 @@ static const struct x86_cpu_id intel_epp_default[] = { X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, - HWP_EPP_BALANCE_POWERSAVE, 115, 16)), + 179, 64, 16)), {} }; From e2ae7893b716b598b07169762f619a7e6c1d4fa9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 6 Jun 2024 11:27:48 -0700 Subject: [PATCH 04/24] cpufreq: intel_pstate: Use Meteor Lake EPPs for Arrow Lake Use the same default EPPs as Meteor Lake generation. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 738b8225baad3..029d6c6dfff96 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3388,6 +3388,8 @@ static const struct x86_cpu_id intel_epp_default[] = { X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, 179, 64, 16)), + X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + 179, 64, 16)), {} }; From 7e1c3f584ee78b0d0210fc424420d9529f3ca952 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 29 May 2024 22:53:30 -0700 Subject: [PATCH 05/24] cpufreq: intel_pstate: Support Emerald Rapids OOB mode Prevent intel_pstate from loading when OOB (Out Of Band) P-states mode is enabled in Emerald Rapids. The OOB identifying bits are same as for the prior generation CPUs like Sapphire Rapids servers, so also add Emerald Rapids to the intel_pstate_cpu_oob_ids[] list. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 029d6c6dfff96..efb29a473be21 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2404,6 +2404,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(INTEL_SKYLAKE_X, core_funcs), X86_MATCH(INTEL_ICELAKE_X, core_funcs), X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), {} }; #endif From c7107750b2ffaa4950faa53f84f9c9aa2caac89a Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Sat, 25 May 2024 23:08:43 +0800 Subject: [PATCH 06/24] x86/cpufeatures: Add AMD FAST CPPC feature flag Some AMD Zen 4 processors support a new feature FAST CPPC which allows for a faster CPPC loop due to internal architectural enhancements. The goal of this faster loop is higher performance at the same power consumption. Reference: See the page 99 of PPR for AMD Family 19h Model 61h rev.B1, docID 56713 Signed-off-by: Perry Yuan Signed-off-by: Xiaojian Du Reviewed-by: Borislav Petkov (AMD) --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3c7434329661c..6c128d463a143 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -470,6 +470,7 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* "" AMD Fast CPPC */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index af5aa2c754c22..c84c30188fdf2 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, + { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, From c00d476cbcef4cbcf0c7db8944df7e98a36bdbfa Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Sat, 25 May 2024 23:08:44 +0800 Subject: [PATCH 07/24] cpufreq: amd-pstate: change cpu freq transition delay for some models Some of AMD ZEN4 APU/CPU have support for adjusting the CPU core clock more quickly and presicely according to CPU work loading. This is advertised by the Fast CPPC x86 feature. This change will only be effective in the *passive mode* of AMD pstate driver. From the test results of different transition delay values, 600us is chosen to make a balance between performance and power consumption. Some test results on AMD Ryzen 7840HS(Phoenix) APU: 1. Tbench (Energy less is better, Throughput more is better, PPW--Performance per Watt more is better) ============= =================== ============== =============== ============== =============== ============== =============== =============== Trans Delay Tbench governor:schedutil, 3-iterations average ============= =================== ============== =============== ============== =============== ============== =============== =============== 1000us Clients 1 2 4 8 12 16 32 Energy/Joules 2010 2804 8768 17171 16170 15132 15027 Throughput/(MB/s) 114 259 1041 3010 3135 4851 4605 PPW 0.0567 0.0923 0.1187 0.1752 0.1938 0.3205 0.3064 600us Clients 1 2 4 8 12 16 32 Energy/Joules 2115 (5.22%) 2388 (-14.84%) 10700(22.03%) 16716 (-2.65%) 15939 (-1.43%) 15053 (-0.52%) 15083 (0.37% ) Throughput/(MB/s) 122 (7.02%) 234 (-9.65% ) 1188 (14.12%) 3003 (-0.23%) 3143 (0.26% ) 4842 (-0.19%) 4603 (-0.04%) PPW 0.0576(1.59%) 0.0979(6.07% ) 0.111(-6.49%) 0.1796(2.51% ) 0.1971(1.70% ) 0.3216(0.34% ) 0.3051(-0.42%) ============= =================== ============== ================ ============= =============== ============== =============== =============== 2.Dbench (Energy less is better, Throughput more is better, PPW--Performance per Watt more is better) ============= =================== ============== =============== ============== =============== ============== =============== =============== Trans Delay Dbench governor:schedutil, 3-iterations average ============= =================== ============== =============== ============== =============== ============== =============== =============== 1000us Clients 1 2 4 8 12 16 32 Energy/Joules 4890 3779 3567 5157 5611 6500 8163 Throughput/(MB/s) 327 167 220 577 775 938 1397 PPW 0.0668 0.0441 0.0616 0.1118 0.1381 0.1443 0.1711 600us Clients 1 2 4 8 12 16 32 Energy/Joules 4915 (0.51%) 4912 (29.98%) 3506 (-1.71%) 4907 (-4.85% ) 5011 (-10.69%) 5672 (-12.74%) 8141 (-0.27%) Throughput/(MB/s) 348 (6.42%) 284 (70.06%) 220 (0.00% ) 518 (-10.23%) 712 (-8.13% ) 854 (-8.96% ) 1475 (5.58% ) PPW 0.0708(5.99%) 0.0578(31.07%) 0.0627(1.79% ) 0.1055(-5.64% ) 0.142(2.82% ) 0.1505(4.30% ) 0.1811(5.84% ) ============= =================== ============== =============== ============== =============== ============== =============== =============== 3.Hackbench(less time is better) ============= =========================== ========================== hackbench governor:schedutil ============= =========================== ========================== Trans Delay Process Mode Ave time(s) Thread Mode Ave time(s) 1000us 14.484 14.484 600us 14.418(-0.46%) 15.41(+6.39%) ============= =========================== ========================== 4.Perf_sched_bench(less time is better) ============= =================== ============== ============== ============== =============== =============== ============= Trans Delay perf_sched_bench governor:schedutil ============= =================== ============== ============== ============== =============== =============== ============= 1000us Groups 1 2 4 8 12 24 AveTime(s) 1.64 2.851 5.878 11.636 16.093 26.395 600us Groups 1 2 4 8 12 24 AveTime(s) 1.69(3.05%) 2.845(-0.21%) 5.843(-0.60%) 11.576(-0.52%) 16.092(-0.01%) 26.32(-0.28%) ============= ================== ============== ============== ============== =============== =============== ============== 5.Sysbench(higher is better) ============= ================== ============== ================= ============== ================ =============== ================= Sysbench governor:schedutil ============= ================== ============== ================= ============== ================ =============== ================= 1000us Thread 1 2 4 8 12 24 Ave events 6020.98 12273.39 24119.82 46171.57 47074.37 47831.72 600us Thread 1 2 4 8 12 24 Ave events 6154.82(2.22%) 12271.63(-0.01%) 24392.5(1.13%) 46117.64(-0.12%) 46852.19(-0.47%) 47678.92(-0.32%) ============= ================== ============== ================= ============== ================ =============== ================= In conclusion, a shorter transition delay of cpu clock will make a quite positive effect to improve PPW on Dbench test, in the meanwhile, keep stable performance on Tbench, Hackbench, Perf_sched_bench and Sysbench. Signed-off-by: Xiaojian Du Reviewed-by: Perry Yuan Acked-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9ad62dbe8bfbf..fda8f86c90e01 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -51,6 +51,7 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 #define CPPC_HIGHEST_PERF_PERFORMANCE 196 #define CPPC_HIGHEST_PERF_DEFAULT 166 @@ -849,8 +850,12 @@ static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) u32 transition_delay_ns; transition_delay_ns = cppc_get_transition_latency(cpu); - if (transition_delay_ns == CPUFREQ_ETERNAL) - return AMD_PSTATE_TRANSITION_DELAY; + if (transition_delay_ns == CPUFREQ_ETERNAL) { + if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC)) + return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; + else + return AMD_PSTATE_TRANSITION_DELAY; + } return transition_delay_ns / NSEC_PER_USEC; } From 6c30b137c081d0b3e79365a63f44e59bd0aac0cf Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 17 Jun 2024 22:52:20 -0700 Subject: [PATCH 08/24] cpufreq: intel_pstate: Update Arrow Lake hybrid scaling factor Arrow Lake uses the same scaling factor as Meteor Lake, so reuse the same scaling factor. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240618055221.446108-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3dc9b82c43af5..72d9e60369e03 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3401,6 +3401,7 @@ static const struct x86_cpu_id intel_epp_default[] = { static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), + X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), {} }; From ede951c27dbd2d4c91b9d54f0b423508a6b5e191 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 17 Jun 2024 22:52:21 -0700 Subject: [PATCH 09/24] cpufreq: intel_pstate: Update Lunar Lake hybrid scaling factor Change hybrid scaling factor for Lunar Lake. Scaling factor is 1.15 for P-cores compared to E-cores. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240618055221.446108-2-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 72d9e60369e03..32c331ee97ca0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -300,6 +300,7 @@ static struct cpufreq_driver *intel_pstate_driver __read_mostly; #define HYBRID_SCALING_FACTOR 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 +#define HYBRID_SCALING_FACTOR_LNL 86957 static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR; @@ -3402,6 +3403,7 @@ static const struct x86_cpu_id intel_epp_default[] = { static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), {} }; From 5e62d53c763aa44be07a7b7ecb5a94a269bb0ed1 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 16:15:20 +0800 Subject: [PATCH 10/24] cpufreq: update to sysfs_emit() for safer buffer handling Replace sprintf() and scnprintf() with sysfs_emit() and sysfs_emit_at() in the cpufreq core. This ensures safer buffer handling and consistency with sysfs interfaces. Update show_scaling_available_governors() and related functions for compliance with the new API. Signed-off-by: Perry Yuan Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20240619081520.259971-1-perry.yuan@amd.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a45aac17c20f0..e678ea7b08912 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -608,7 +608,7 @@ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); static ssize_t show_boost(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled); + return sysfs_emit(buf, "%d\n", cpufreq_driver->boost_enabled); } static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, @@ -739,7 +739,7 @@ static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) static ssize_t show_##file_name \ (struct cpufreq_policy *policy, char *buf) \ { \ - return sprintf(buf, "%u\n", policy->object); \ + return sysfs_emit(buf, "%u\n", policy->object); \ } show_one(cpuinfo_min_freq, cpuinfo.min_freq); @@ -760,11 +760,11 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) freq = arch_freq_get_on_cpu(policy->cpu); if (freq) - ret = sprintf(buf, "%u\n", freq); + ret = sysfs_emit(buf, "%u\n", freq); else if (cpufreq_driver->setpolicy && cpufreq_driver->get) - ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu)); + ret = sysfs_emit(buf, "%u\n", cpufreq_driver->get(policy->cpu)); else - ret = sprintf(buf, "%u\n", policy->cur); + ret = sysfs_emit(buf, "%u\n", policy->cur); return ret; } @@ -798,9 +798,9 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, unsigned int cur_freq = __cpufreq_get(policy); if (cur_freq) - return sprintf(buf, "%u\n", cur_freq); + return sysfs_emit(buf, "%u\n", cur_freq); - return sprintf(buf, "\n"); + return sysfs_emit(buf, "\n"); } /* @@ -809,12 +809,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) { if (policy->policy == CPUFREQ_POLICY_POWERSAVE) - return sprintf(buf, "powersave\n"); + return sysfs_emit(buf, "powersave\n"); else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) - return sprintf(buf, "performance\n"); + return sysfs_emit(buf, "performance\n"); else if (policy->governor) - return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", - policy->governor->name); + return sysfs_emit(buf, "%s\n", policy->governor->name); return -EINVAL; } @@ -873,7 +872,7 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, struct cpufreq_governor *t; if (!has_target()) { - i += sprintf(buf, "performance powersave"); + i += sysfs_emit(buf, "performance powersave"); goto out; } @@ -882,11 +881,11 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) break; - i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name); + i += sysfs_emit_at(buf, i, "%s ", t->name); } mutex_unlock(&cpufreq_governor_mutex); out: - i += sprintf(&buf[i], "\n"); + i += sysfs_emit_at(buf, i, "\n"); return i; } @@ -896,7 +895,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf) unsigned int cpu; for_each_cpu(cpu, mask) { - i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u ", cpu); + i += sysfs_emit_at(buf, i, "%u ", cpu); if (i >= (PAGE_SIZE - 5)) break; } @@ -904,7 +903,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf) /* Remove the extra space at the end */ i--; - i += sprintf(&buf[i], "\n"); + i += sysfs_emit_at(buf, i, "\n"); return i; } EXPORT_SYMBOL_GPL(cpufreq_show_cpus); @@ -947,7 +946,7 @@ static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) { if (!policy->governor || !policy->governor->show_setspeed) - return sprintf(buf, "\n"); + return sysfs_emit(buf, "\n"); return policy->governor->show_setspeed(policy, buf); } @@ -961,8 +960,8 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) int ret; ret = cpufreq_driver->bios_limit(policy->cpu, &limit); if (!ret) - return sprintf(buf, "%u\n", limit); - return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); + return sysfs_emit(buf, "%u\n", limit); + return sysfs_emit(buf, "%u\n", policy->cpuinfo.max_freq); } cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); From fc6e0837264a8b2504b6160e63ec92eb012540f3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 12 Jun 2024 09:20:49 -0500 Subject: [PATCH 11/24] cpufreq: amd-pstate: Allow users to write 'default' EPP string The EPP string for 'default' represents what the firmware had configured as the default EPP value but once a user changes EPP to another string they can't reset it back to 'default'. Cache the firmware EPP value and allow the user to write 'default' using this value. Reported-by: Artem S. Tashkinov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217931#c61 Reviewed-by: Perry Yuan Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 8 +++----- drivers/cpufreq/amd-pstate.h | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index fda8f86c90e01..5bdcdd3ea163b 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -282,10 +282,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, int epp = -EINVAL; int ret; - if (!pref_index) { - pr_debug("EPP pref_index is invalid\n"); - return -EINVAL; - } + if (!pref_index) + epp = cpudata->epp_default; if (epp == -EINVAL) epp = epp_values[pref_index]; @@ -1441,7 +1439,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); + cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index e6a28e7f4dbf1..f80b33fa5d43a 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -99,6 +99,7 @@ struct amd_cpudata { u32 policy; u64 cppc_cap1_cached; bool suspended; + s16 epp_default; }; #endif /* _LINUX_AMD_PSTATE_H */ From 8f8b42c1fcc939a73b547b172a9ffcb65ef4bf47 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:12 +0800 Subject: [PATCH 12/24] cpufreq: amd-pstate: optimize the initial frequency values verification To enhance the debugging capability of the driver loading failure for broken CPPC ACPI tables, it can optimize the expression by moving the verification of `min_freq`, `nominal_freq`, and other dependency values to the `amd_pstate_init_freq()` function where they are initialized. If any of these values are incorrect, the `amd-pstate` driver will not be registered. By ensuring that these values are correct before they are used, it will facilitate the debugging process when encountering driver loading failures due to faulty CPPC ACPI tables from BIOS Signed-off-by: Perry Yuan Acked-by: Gautham R. Shenoy Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/f9793f8451c1832e34cc9dc35f89c653b39cfe38.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 5bdcdd3ea163b..d4d7b7cdc4eb5 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -924,6 +924,24 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_freq, nominal_freq); WRITE_ONCE(cpudata->max_freq, max_freq); + /** + * Below values need to be initialized correctly, otherwise driver will fail to load + * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf + * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] + * Check _CPC in ACPI table objects if any values are incorrect + */ + if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { + pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", + min_freq, max_freq, nominal_freq * 1000); + return -EINVAL; + } + + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { + pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", + lowest_nonlinear_freq, min_freq, nominal_freq * 1000); + return -EINVAL; + } + return 0; } @@ -962,15 +980,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) max_freq = READ_ONCE(cpudata->max_freq); nominal_freq = READ_ONCE(cpudata->nominal_freq); - if (min_freq <= 0 || max_freq <= 0 || - nominal_freq <= 0 || min_freq > max_freq) { - dev_err(dev, - "min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n", - min_freq, max_freq, nominal_freq); - ret = -EINVAL; - goto free_cpudata1; - } - policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); @@ -1423,14 +1432,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) min_freq = READ_ONCE(cpudata->min_freq); max_freq = READ_ONCE(cpudata->max_freq); nominal_freq = READ_ONCE(cpudata->nominal_freq); - if (min_freq <= 0 || max_freq <= 0 || - nominal_freq <= 0 || min_freq > max_freq) { - dev_err(dev, - "min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n", - min_freq, max_freq, nominal_freq); - ret = -EINVAL; - goto free_cpudata1; - } policy->cpuinfo.min_freq = min_freq; policy->cpuinfo.max_freq = max_freq; From 7bf7f22906671995b798b39f8c3cb62405ea03b2 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:13 +0800 Subject: [PATCH 13/24] cpufreq: amd-pstate: remove unused variable nominal_freq removed the unused variable `nominal_freq` for build warning. This variable was defined and assigned a value in the previous code, but it was not used in the subsequent code. Closes: https://lore.kernel.org/oe-kbuild-all/202405080431.BPU6Yg9s-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/b7ef41557f71d40d098393ddb27f0fe1f23648ae.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d4d7b7cdc4eb5..1ce063a222148 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -947,7 +947,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, ret; + int min_freq, max_freq, ret; struct device *dev; struct amd_cpudata *cpudata; @@ -978,7 +978,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) min_freq = READ_ONCE(cpudata->min_freq); max_freq = READ_ONCE(cpudata->max_freq); - nominal_freq = READ_ONCE(cpudata->nominal_freq); policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); @@ -1398,7 +1397,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, nominal_freq, ret; + int min_freq, max_freq, ret; struct amd_cpudata *cpudata; struct device *dev; u64 value; @@ -1431,7 +1430,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) min_freq = READ_ONCE(cpudata->min_freq); max_freq = READ_ONCE(cpudata->max_freq); - nominal_freq = READ_ONCE(cpudata->nominal_freq); policy->cpuinfo.min_freq = min_freq; policy->cpuinfo.max_freq = max_freq; From cb817ec6673b7c2faed2141e7f7ae5d7052b2442 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:14 +0800 Subject: [PATCH 14/24] cpufreq: amd-pstate: show CPPC debug message if CPPC is not supported Add CPU ID checking in case the driver attempt to load on systems where CPPC functionality is unavailable. And the warning message will not be shown if CPPC is not supported. It will also print debug message if the CPU has no CPPC support that helps to debug the driver loading failure issue. Reported-by: Paul Menzel Closes: https://lore.kernel.org/linux-pm/CYYPR12MB8655D32EA18574C9497E888A9C122@CYYPR12MB8655.namprd12.prod.outlook.com/T/#t Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/437dbd581a4119465581330081d9b1e289482ba2.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1ce063a222148..76419762c04f7 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1743,6 +1743,20 @@ static int __init amd_pstate_set_driver(int mode_idx) return -EINVAL; } +/** + * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. + * show the debug message that helps to check if the CPU has CPPC support for loading issue. + */ +static bool amd_cppc_supported(void) +{ + if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { + pr_debug_once("CPPC feature is not supported by the processor\n"); + return false; + } + + return true; +} + static int __init amd_pstate_init(void) { struct device *dev_root; @@ -1751,6 +1765,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* show debug message only if CPPC is not supported */ + if (!amd_cppc_supported()) + return -EOPNOTSUPP; + + /* show warning message when BIOS broken or ACPI disabled */ if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); return -ENODEV; From bff7d13c190ad98cf4f877189b022c75df4cb383 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:15 +0800 Subject: [PATCH 15/24] cpufreq: amd-pstate: add debug message while CPPC is supported and disabled by SBIOS If CPPC feature is supported by the CPU however the CPUID flag bit is not set by SBIOS, the `amd_pstate` will be failed to load while system booting. So adding one more debug message to inform user to check the SBIOS setting, The change also can help maintainers to debug why amd_pstate driver failed to be loaded at system booting if the processor support CPPC. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218686 Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/42c953616ac121bd1e5c329e83d015a02e6b32c7.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 76419762c04f7..9aa220a0e3fef 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1749,11 +1749,37 @@ static int __init amd_pstate_set_driver(int mode_idx) */ static bool amd_cppc_supported(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + bool warn = false; + if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { pr_debug_once("CPPC feature is not supported by the processor\n"); return false; } + /* + * If the CPPC feature is disabled in the BIOS for processors that support MSR-based CPPC, + * the AMD Pstate driver may not function correctly. + * Check the CPPC flag and display a warning message if the platform supports CPPC. + * Note: below checking code will not abort the driver registeration process because of + * the code is added for debugging purposes. + */ + if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_ZEN1) || cpu_feature_enabled(X86_FEATURE_ZEN2)) { + if (c->x86_model > 0x60 && c->x86_model < 0xaf) + warn = true; + } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || cpu_feature_enabled(X86_FEATURE_ZEN4)) { + if ((c->x86_model > 0x10 && c->x86_model < 0x1F) || + (c->x86_model > 0x40 && c->x86_model < 0xaf)) + warn = true; + } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { + warn = true; + } + } + + if (warn) + pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" + "Please enable it if your BIOS has the CPPC option.\n"); return true; } From 1d53f30b3aa6120ca91789254cb2dfff9ff8c533 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:16 +0800 Subject: [PATCH 16/24] Documentation: PM: amd-pstate: add guided mode to the Operation mode the guided mode is also supported, so the operation mode should include that mode as well. Reviewed-by: Mario Limonciello Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/a61d825ef71f6aacc8f1624fe9fb982b8446b5a7.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- Documentation/admin-guide/pm/amd-pstate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 1e0d101b020a0..f5ee81419a935 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -406,7 +406,7 @@ control its functionality at the system level. They are located in the ``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs. ``status`` - Operation mode of the driver: "active", "passive" or "disable". + Operation mode of the driver: "active", "passive", "guided" or "disable". "active" The driver is functional and in the ``active mode`` From c9fdaba8369ea6ca01728ed9c25c6ca1b90d3355 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:17 +0800 Subject: [PATCH 17/24] cpufreq: amd-pstate: switch boot_cpu_has() to cpu_feature_enabled() replace the usage of the deprecated boot_cpu_has() function with the modern cpu_feature_enabled() function. The switch to cpu_feature_enabled() ensures compatibility with the latest CPU feature detection mechanisms and improves code maintainability. Acked-by: Mario Limonciello Suggested-by: Borislav Petkov (AMD) Signed-off-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/f1567593ac5e1d38343067e9c681a8c4b0707038.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9aa220a0e3fef..cb750ef305fe0 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -158,7 +158,7 @@ static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) * broken BIOS lack of nominal_freq and lowest_freq capabilities * definition in ACPI tables */ - if (boot_cpu_has(X86_FEATURE_ZEN2)) { + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { quirks = dmi->driver_data; pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); return 1; @@ -200,7 +200,7 @@ static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) u64 epp; int ret; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { if (!cppc_req_cached) { epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); @@ -253,7 +253,7 @@ static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { u64 value = READ_ONCE(cpudata->cppc_req_cached); value &= ~GENMASK_ULL(31, 24); @@ -752,7 +752,7 @@ static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) { int ret; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { u64 cap1; ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); @@ -991,7 +991,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - if (boot_cpu_has(X86_FEATURE_CPPC)) + if (cpu_feature_enabled(X86_FEATURE_CPPC)) policy->fast_switch_possible = true; ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], @@ -1224,7 +1224,7 @@ static int amd_pstate_change_mode_without_dvr_change(int mode) cppc_state = mode; - if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) + if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) return 0; for_each_present_cpu(cpu) { @@ -1453,7 +1453,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) else policy->policy = CPUFREQ_POLICY_POWERSAVE; - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); if (ret) return ret; @@ -1543,7 +1543,7 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = 0; /* Set initial EPP value */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { value &= ~GENMASK_ULL(31, 24); value |= (u64)epp << 24; } @@ -1582,7 +1582,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); } else { perf_ctrls.max_perf = max_perf; @@ -1616,7 +1616,7 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) value = READ_ONCE(cpudata->cppc_req_cached); mutex_lock(&amd_pstate_limits_lock); - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; /* Set max perf same as min perf */ @@ -1819,7 +1819,7 @@ static int __init amd_pstate_init(void) */ if (amd_pstate_acpi_pm_profile_undefined() || amd_pstate_acpi_pm_profile_server() || - !boot_cpu_has(X86_FEATURE_CPPC)) { + !cpu_feature_enabled(X86_FEATURE_CPPC)) { pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } @@ -1838,7 +1838,7 @@ static int __init amd_pstate_init(void) } /* capability check */ - if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (cpu_feature_enabled(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); if (cppc_state != AMD_PSTATE_ACTIVE) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; From 918263938c41dac46935609477672442e8fa5d9e Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:18 +0800 Subject: [PATCH 18/24] cpufreq: amd-pstate: enable shared memory type CPPC by default The amd-pstate-epp driver has been implemented and resolves the performance drop issue seen in passive mode for shared memory type CPPC systems. Users who enable the active mode driver will not experience a performance drop compared to the passive mode driver. Therefore, the EPP driver should be loaded by default for shared memory type CPPC system to get better performance. Signed-off-by: Perry Yuan Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/c705507cf3ee790e544251cfd897ed11e8e57712.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index cb750ef305fe0..0f8ffbc0dc2ac 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -86,15 +86,6 @@ struct quirk_entry { u32 lowest_freq; }; -/* - * TODO: We need more time to fine tune processors with shared memory solution - * with community together. - * - * There are some performance drops on the CPU benchmarks which reports from - * Suse. We are co-working with them to fine tune the shared memory solution. So - * we disable it by default to go acpi-cpufreq on these processors and add a - * module parameter to be able to enable it manually for debugging. - */ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; @@ -1815,11 +1806,9 @@ static int __init amd_pstate_init(void) /* Disable on the following configs by default: * 1. Undefined platforms * 2. Server platforms - * 3. Shared memory designs */ if (amd_pstate_acpi_pm_profile_undefined() || - amd_pstate_acpi_pm_profile_server() || - !cpu_feature_enabled(X86_FEATURE_CPPC)) { + amd_pstate_acpi_pm_profile_server()) { pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } From 4e4f600ee750facedf6a5dc97e8ae0b627ab4573 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 19 Jun 2024 23:40:19 +0800 Subject: [PATCH 19/24] cpufreq: amd-pstate: auto-load pstate driver by default If the `amd-pstate` driver is not loaded automatically by default, it is because the kernel command line parameter has not been added. To resolve this issue, it is necessary to call the `amd_pstate_set_driver()` function to enable the desired mode (passive/active/guided) before registering the driver instance. This ensures that the driver is loaded correctly without relying on the kernel command line parameter. When there is no parameter added to command line, Kernel config will provide the default mode to load. Meanwhile, user can add driver mode in command line which will override the kernel config default option. Reported-by: Andrei Amuraritei Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218705 Signed-off-by: Perry Yuan Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/83301c4cea4f92fb19e14b23f2bac7facfd8bdbb.1718811234.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 0f8ffbc0dc2ac..a96ad7d10a4d2 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1801,8 +1801,13 @@ static int __init amd_pstate_init(void) /* check if this machine need CPPC quirks */ dmi_check_system(amd_pstate_quirks_table); - switch (cppc_state) { - case AMD_PSTATE_UNDEFINED: + /* + * determine the driver mode from the command line or kernel config. + * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. + * command line options will override the kernel config settings. + */ + + if (cppc_state == AMD_PSTATE_UNDEFINED) { /* Disable on the following configs by default: * 1. Undefined platforms * 2. Server platforms @@ -1812,15 +1817,20 @@ static int __init amd_pstate_init(void) pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } - ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); - if (ret) - return ret; - break; + /* get driver mode from kernel config option [1:4] */ + cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; + } + + switch (cppc_state) { case AMD_PSTATE_DISABLE: + pr_info("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; case AMD_PSTATE_PASSIVE: case AMD_PSTATE_ACTIVE: case AMD_PSTATE_GUIDED: + ret = amd_pstate_set_driver(cppc_state); + if (ret) + return ret; break; default: return -EINVAL; @@ -1841,7 +1851,7 @@ static int __init amd_pstate_init(void) /* enable amd pstate feature */ ret = amd_pstate_enable(true); if (ret) { - pr_err("failed to enable with return %d\n", ret); + pr_err("failed to enable driver mode(%d)\n", cppc_state); return ret; } From e8f555daacd3377bf691fdda2490c0b164e00085 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Tue, 27 Feb 2024 15:11:33 +0800 Subject: [PATCH 20/24] cpufreq/amd-pstate: fix setting policy current frequency value When scaling min/max freq values were being setted, the value of policy->cur need to update. Signed-off-by: Meng Li Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/20240227071133.3405003-1-li.meng@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index a96ad7d10a4d2..c418f2bb5cede 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -511,6 +511,8 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) { + unsigned long max_freq; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); u64 prev = READ_ONCE(cpudata->cppc_req_cached); u64 value = prev; @@ -520,6 +522,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + max_freq = READ_ONCE(cpudata->max_limit_freq); + policy->cur = div_u64(des_perf * max_freq, max_perf); + if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; des_perf = 0; @@ -641,10 +646,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long capacity) { unsigned long max_perf, min_perf, des_perf, - cap_perf, lowest_nonlinear_perf, max_freq; + cap_perf, lowest_nonlinear_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata = policy->driver_data; - unsigned int target_freq; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); @@ -652,7 +656,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, cap_perf = READ_ONCE(cpudata->highest_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - max_freq = READ_ONCE(cpudata->max_freq); des_perf = cap_perf; if (target_perf < capacity) @@ -670,8 +673,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - target_freq = div_u64(des_perf * max_freq, max_perf); - policy->cur = target_freq; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); @@ -1557,6 +1558,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) amd_pstate_epp_update_limit(policy); + /* + * policy->cur is never updated with the amd_pstate_epp driver, but it + * is used as a stale frequency value. So, keep it within limits. + */ + policy->cur = policy->min; + return 0; } From e1a921f7fcd157ca898e453bbb56927ab39412e3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 22 Jun 2024 16:52:19 -0500 Subject: [PATCH 21/24] cpufreq: amd-pstate: Make amd-pstate unit tests depend on amd-pstate As the unit tests use a symbol from the driver an explicit dependency is required. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406222016.R4A2TMs8-lkp@intel.com/ Reviewed-by: Gautham R. Shenoy Reviewed-by: Perry Yuan Link: https://lore.kernel.org/r/20240622215219.47911-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/Kconfig.x86 | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 438c9e75a04dc..97c2d4f15d76e 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -71,6 +71,7 @@ config X86_AMD_PSTATE_DEFAULT_MODE config X86_AMD_PSTATE_UT tristate "selftest for AMD Processor P-State driver" depends on X86 && ACPI_PROCESSOR + depends on X86_AMD_PSTATE default n help This kernel module is used for testing. It's safe to say M here. From bc76f57574741cc8e7d04c53f199d8722ce09413 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 23 Jun 2024 15:09:18 -0500 Subject: [PATCH 22/24] cpufreq: amd-pstate: Don't create attributes when registration fails If driver registration fails then immediately return the failure instead of continuing to register attributes. This fixes issues of falling back from amd-pstate to other drivers when cpufreq init has failed for any reason. Reported-by: alex.s.cochran@proton.me Reviewed-by: Gautham R. Shenoy Reviewed-by: Perry Yuan Link: https://lore.kernel.org/r/20240623200918.52104-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index c418f2bb5cede..afcf398574f61 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1863,8 +1863,10 @@ static int __init amd_pstate_init(void) } ret = cpufreq_register_driver(current_pstate_driver); - if (ret) + if (ret) { pr_err("failed to register with return %d\n", ret); + goto disable_driver; + } dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { @@ -1880,6 +1882,8 @@ static int __init amd_pstate_init(void) global_attr_free: cpufreq_unregister_driver(current_pstate_driver); +disable_driver: + amd_pstate_enable(false); return ret; } device_initcall(amd_pstate_init); From 2240d3e60bb3e7a00422596412d012aeb54c1573 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Sat, 22 Jun 2024 00:50:57 +0800 Subject: [PATCH 23/24] cpufreq: simplify boolean parsing with kstrtobool in store function Update the cpufreq store function to use kstrtobool for parsing boolean values. This simplifies the code and improves readability by using a standard kernel function for boolean string conversion. Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Signed-off-by: Perry Yuan Link: https://lore.kernel.org/r/d392eba3bad1231776124c321cef8c184ce1482d.1718988436.git.perry.yuan@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/cpufreq.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a45aac17c20f0..1fdabb660231e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -614,10 +614,9 @@ static ssize_t show_boost(struct kobject *kobj, static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - int ret, enable; + bool enable; - ret = sscanf(buf, "%d", &enable); - if (ret != 1 || enable < 0 || enable > 1) + if (kstrtobool(buf, &enable)) return -EINVAL; if (cpufreq_boost_trigger_state(enable)) { @@ -641,10 +640,10 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) static ssize_t store_local_boost(struct cpufreq_policy *policy, const char *buf, size_t count) { - int ret, enable; + int ret; + bool enable; - ret = kstrtoint(buf, 10, &enable); - if (ret || enable < 0 || enable > 1) + if (kstrtobool(buf, &enable)) return -EINVAL; if (!cpufreq_driver->boost_enabled) From acfc429e42f09524653af52998548cd9317892a6 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:27:14 -0700 Subject: [PATCH 24/24] cpufreq: intel_pstate: Replace boot_cpu_has() Replace boot_cpu_has() with cpu_feature_enabled(). Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624162714.1431182-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 32c331ee97ca0..eaff499309dde 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1633,7 +1633,7 @@ void notify_hwp_interrupt(void) unsigned long flags; u64 value; - if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; rdmsrl_safe(MSR_HWP_STATUS, &value); @@ -1661,7 +1661,7 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { bool cancel_work; - if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */