From 9a55ab6f02c98bfca1c9c9d73507c1744406d2ba Mon Sep 17 00:00:00 2001 From: Keguang Zhang Date: Thu, 12 Jan 2023 21:53:42 +0800 Subject: [PATCH 01/13] cpufreq: loongson1: Delete obsolete driver The generic DT based cpufreq driver works for Loongson-1, so delete the old custom driver. Signed-off-by: Keguang Zhang Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 9 -- drivers/cpufreq/Makefile | 1 - drivers/cpufreq/loongson1-cpufreq.c | 222 ---------------------------- 3 files changed, 232 deletions(-) delete mode 100644 drivers/cpufreq/loongson1-cpufreq.c diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 2a84fc63371e2..448b8ffb4ebd9 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -270,15 +270,6 @@ config LOONGSON2_CPUFREQ Loongson2F and its successors support this feature. - If in doubt, say N. - -config LOONGSON1_CPUFREQ - tristate "Loongson1 CPUFreq Driver" - depends on LOONGSON1_LS1B - help - This option adds a CPUFreq driver for loongson1 processors which - support software configurable cpu frequency. - If in doubt, say N. endif diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 32a7029e25ed8..4a806cc5265b9 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -111,7 +111,6 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o -obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c deleted file mode 100644 index fb72d709db565..0000000000000 --- a/drivers/cpufreq/loongson1-cpufreq.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * CPU Frequency Scaling for Loongson 1 SoC - * - * Copyright (C) 2014-2016 Zhang, Keguang - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -struct ls1x_cpufreq { - struct device *dev; - struct clk *clk; /* CPU clk */ - struct clk *mux_clk; /* MUX of CPU clk */ - struct clk *pll_clk; /* PLL clk */ - struct clk *osc_clk; /* OSC clk */ - unsigned int max_freq; - unsigned int min_freq; -}; - -static struct ls1x_cpufreq *cpufreq; - -static int ls1x_cpufreq_notifier(struct notifier_block *nb, - unsigned long val, void *data) -{ - if (val == CPUFREQ_POSTCHANGE) - current_cpu_data.udelay_val = loops_per_jiffy; - - return NOTIFY_OK; -} - -static struct notifier_block ls1x_cpufreq_notifier_block = { - .notifier_call = ls1x_cpufreq_notifier -}; - -static int ls1x_cpufreq_target(struct cpufreq_policy *policy, - unsigned int index) -{ - struct device *cpu_dev = get_cpu_device(policy->cpu); - unsigned int old_freq, new_freq; - - old_freq = policy->cur; - new_freq = policy->freq_table[index].frequency; - - /* - * The procedure of reconfiguring CPU clk is as below. - * - * - Reparent CPU clk to OSC clk - * - Reset CPU clock (very important) - * - Reconfigure CPU DIV - * - Reparent CPU clk back to CPU DIV clk - */ - - clk_set_parent(policy->clk, cpufreq->osc_clk); - __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) | RST_CPU_EN | RST_CPU, - LS1X_CLK_PLL_DIV); - __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) & ~(RST_CPU_EN | RST_CPU), - LS1X_CLK_PLL_DIV); - clk_set_rate(cpufreq->mux_clk, new_freq * 1000); - clk_set_parent(policy->clk, cpufreq->mux_clk); - dev_dbg(cpu_dev, "%u KHz --> %u KHz\n", old_freq, new_freq); - - return 0; -} - -static int ls1x_cpufreq_init(struct cpufreq_policy *policy) -{ - struct device *cpu_dev = get_cpu_device(policy->cpu); - struct cpufreq_frequency_table *freq_tbl; - unsigned int pll_freq, freq; - int steps, i; - - pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000; - - steps = 1 << DIV_CPU_WIDTH; - freq_tbl = kcalloc(steps, sizeof(*freq_tbl), GFP_KERNEL); - if (!freq_tbl) - return -ENOMEM; - - for (i = 0; i < (steps - 1); i++) { - freq = pll_freq / (i + 1); - if ((freq < cpufreq->min_freq) || (freq > cpufreq->max_freq)) - freq_tbl[i].frequency = CPUFREQ_ENTRY_INVALID; - else - freq_tbl[i].frequency = freq; - dev_dbg(cpu_dev, - "cpufreq table: index %d: frequency %d\n", i, - freq_tbl[i].frequency); - } - freq_tbl[i].frequency = CPUFREQ_TABLE_END; - - policy->clk = cpufreq->clk; - cpufreq_generic_init(policy, freq_tbl, 0); - - return 0; -} - -static int ls1x_cpufreq_exit(struct cpufreq_policy *policy) -{ - kfree(policy->freq_table); - return 0; -} - -static struct cpufreq_driver ls1x_cpufreq_driver = { - .name = "cpufreq-ls1x", - .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = cpufreq_generic_frequency_table_verify, - .target_index = ls1x_cpufreq_target, - .get = cpufreq_generic_get, - .init = ls1x_cpufreq_init, - .exit = ls1x_cpufreq_exit, - .attr = cpufreq_generic_attr, -}; - -static int ls1x_cpufreq_remove(struct platform_device *pdev) -{ - cpufreq_unregister_notifier(&ls1x_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - cpufreq_unregister_driver(&ls1x_cpufreq_driver); - - return 0; -} - -static int ls1x_cpufreq_probe(struct platform_device *pdev) -{ - struct plat_ls1x_cpufreq *pdata = dev_get_platdata(&pdev->dev); - struct clk *clk; - int ret; - - if (!pdata || !pdata->clk_name || !pdata->osc_clk_name) { - dev_err(&pdev->dev, "platform data missing\n"); - return -EINVAL; - } - - cpufreq = - devm_kzalloc(&pdev->dev, sizeof(struct ls1x_cpufreq), GFP_KERNEL); - if (!cpufreq) - return -ENOMEM; - - cpufreq->dev = &pdev->dev; - - clk = devm_clk_get(&pdev->dev, pdata->clk_name); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get %s clock\n", - pdata->clk_name); - return PTR_ERR(clk); - } - cpufreq->clk = clk; - - clk = clk_get_parent(clk); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get parent of %s clock\n", - __clk_get_name(cpufreq->clk)); - return PTR_ERR(clk); - } - cpufreq->mux_clk = clk; - - clk = clk_get_parent(clk); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get parent of %s clock\n", - __clk_get_name(cpufreq->mux_clk)); - return PTR_ERR(clk); - } - cpufreq->pll_clk = clk; - - clk = devm_clk_get(&pdev->dev, pdata->osc_clk_name); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "unable to get %s clock\n", - pdata->osc_clk_name); - return PTR_ERR(clk); - } - cpufreq->osc_clk = clk; - - cpufreq->max_freq = pdata->max_freq; - cpufreq->min_freq = pdata->min_freq; - - ret = cpufreq_register_driver(&ls1x_cpufreq_driver); - if (ret) { - dev_err(&pdev->dev, - "failed to register CPUFreq driver: %d\n", ret); - return ret; - } - - ret = cpufreq_register_notifier(&ls1x_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - if (ret) { - dev_err(&pdev->dev, - "failed to register CPUFreq notifier: %d\n",ret); - cpufreq_unregister_driver(&ls1x_cpufreq_driver); - } - - return ret; -} - -static struct platform_driver ls1x_cpufreq_platdrv = { - .probe = ls1x_cpufreq_probe, - .remove = ls1x_cpufreq_remove, - .driver = { - .name = "ls1x-cpufreq", - }, -}; - -module_platform_driver(ls1x_cpufreq_platdrv); - -MODULE_ALIAS("platform:ls1x-cpufreq"); -MODULE_AUTHOR("Kelvin Cheung "); -MODULE_DESCRIPTION("Loongson1 CPUFreq driver"); -MODULE_LICENSE("GPL"); From 38a29e5834eba1e71bc4aab82b09ac065af62b80 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Jan 2023 16:11:15 -0800 Subject: [PATCH 02/13] drivers/cpufreq: Remove "select SRCU" Now that the SRCU Kconfig option is unconditionally selected, there is no longer any point in selecting it. Therefore, remove the "select SRCU" Kconfig statements. Signed-off-by: Paul E. McKenney Acked-by: Viresh Kumar Reviewed-by: John Ogness Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 448b8ffb4ebd9..76aa1336e2be0 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -3,7 +3,6 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" - select SRCU help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because From 7bc1fcd399018245575974508c26e882da0bd915 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:06 +0800 Subject: [PATCH 03/13] ACPI: CPPC: Add AMD pstate energy performance preference cppc control Add support for setting and querying EPP preferences to the generic CPPC driver. This enables downstream drivers such as amd-pstate to discover and use these values. Downstream drivers that want to use the new symbols cppc_get_epp_caps and cppc_set_epp_perf for querying and setting EPP preferences will need to call cppc_set_epp_perf to enable the EPP function firstly. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 67 ++++++++++++++++++++++++++++++++++++++++ include/acpi/cppc_acpi.h | 12 +++++++ 2 files changed, 79 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 0f17b1c32718e..02d83c8072718 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1153,6 +1153,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); } +/** + * cppc_get_epp_perf - Get the epp register value. + * @cpunum: CPU from which to get epp preference value. + * @epp_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_epp_perf(int cpunum, u64 *epp_perf) +{ + return cppc_get_perf(cpunum, ENERGY_PERF, epp_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_epp_perf); + /** * cppc_get_perf_caps - Get a CPU's performance capabilities. * @cpunum: CPU from which to get capabilities info. @@ -1365,6 +1378,60 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) } EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs); +/* + * Set Energy Performance Preference Register value through + * Performance Controls Interface + */ +int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) +{ + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_register_resource *epp_set_reg; + struct cpc_register_resource *auto_sel_reg; + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + + if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); + return -ENODEV; + } + + if (CPC_SUPPORTED(auto_sel_reg)) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + if (CPC_SUPPORTED(epp_set_reg)) { + ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf); + if (ret) + return ret; + } + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + /* after writing CPC, transfer the ownership of PCC to platform */ + ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); + up_write(&pcc_ss_data->pcc_lock); + } else { + ret = -ENOTSUPP; + pr_debug("_CPC in PCC is not supported\n"); + } + + return ret; +} +EXPORT_SYMBOL_GPL(cppc_set_epp_perf); + /** * cppc_set_enable - Set to enable CPPC on the processor by writing the * Continuous Performance Control package EnableRegister field. diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index c5614444031ff..6b487a5bd6382 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -108,12 +108,14 @@ struct cppc_perf_caps { u32 lowest_nonlinear_perf; u32 lowest_freq; u32 nominal_freq; + u32 energy_perf; }; struct cppc_perf_ctrls { u32 max_perf; u32 min_perf; u32 desired_perf; + u32 energy_perf; }; struct cppc_perf_fb_ctrs { @@ -149,6 +151,8 @@ extern bool cpc_ffh_supported(void); extern bool cpc_supported_by_cpu(void); extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); +extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); +extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -202,6 +206,14 @@ static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) { return -ENOTSUPP; } +static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) +{ + return -ENOTSUPP; +} +static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) +{ + return -ENOTSUPP; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ From e22abc6bb97cee240200d037a16b73951df16f9a Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:07 +0800 Subject: [PATCH 04/13] Documentation: amd-pstate: add EPP profiles introduction The amd-pstate driver supports a feature called energy performance preference (EPP). Add information to the documentation to explain how users can interact with the sysfs files for this feature. 1) See all EPP profiles $ sudo cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_available_preferences default performance balance_performance balance_power power 2) Check current EPP profile $ sudo cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference performance 3) Set new EPP profile $ sudo bash -c "echo power > /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference" Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/amd-pstate.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 5376d53faaa8e..98a2bb44f80c4 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -262,6 +262,25 @@ lowest non-linear performance in `AMD CPPC Performance Capability `_.) This attribute is read-only. +``energy_performance_available_preferences`` + +A list of all the supported EPP preferences that could be used for +``energy_performance_preference`` on this system. +These profiles represent different hints that are provided +to the low-level firmware about the user's desired energy vs efficiency +tradeoff. ``default`` represents the epp value is set by platform +firmware. This attribute is read-only. + +``energy_performance_preference`` + +The current energy performance preference can be read from this attribute. +and user can change current preference according to energy or performance needs +Please get all support profiles list from +``energy_performance_available_preferences`` attribute, all the profiles are +integer values defined between 0 to 255 when EPP feature is enabled by platform +firmware, if EPP feature is disabled, driver will ignore the written value +This attribute is read-write. + Other performance and frequency values can be read back from ``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`. From 36c5014e5460963ad7766487c0e22a7ff28681fc Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 31 Jan 2023 17:00:08 +0800 Subject: [PATCH 05/13] cpufreq: amd-pstate: optimize driver working mode selection in amd_pstate_param() The amd-pstate driver may support multiple working modes. Introduce a variable to keep track of which mode is currently enabled. Here we use cppc_state var to indicate which mode is enabled. This change will help to simplify the the amd_pstate_param() to choose which mode used for the following driver registration. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 39 +++++++++++++++++++++++++++--------- include/linux/amd-pstate.h | 17 ++++++++++++++++ 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index c17bd845f5fcb..65c16edbbb20c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -60,7 +60,18 @@ * module parameter to be able to enable it manually for debugging. */ static struct cpufreq_driver amd_pstate_driver; -static int cppc_load __initdata; +static int cppc_state = AMD_PSTATE_DISABLE; + +static inline int get_mode_idx_from_str(const char *str, size_t size) +{ + int i; + + for (i=0; i < AMD_PSTATE_MAX; i++) { + if (!strncmp(str, amd_pstate_mode_string[i], size)) + return i; + } + return -EINVAL; +} static inline int pstate_enable(bool enable) { @@ -626,10 +637,10 @@ static int __init amd_pstate_init(void) /* * by default the pstate driver is disabled to load * enable the amd_pstate passive mode driver explicitly - * with amd_pstate=passive in kernel command line + * with amd_pstate=passive or other modes in kernel command line */ - if (!cppc_load) { - pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n"); + if (cppc_state == AMD_PSTATE_DISABLE) { + pr_debug("driver load is disabled, boot with specific mode to enable this\n"); return -ENODEV; } @@ -671,16 +682,24 @@ device_initcall(amd_pstate_init); static int __init amd_pstate_param(char *str) { + size_t size; + int mode_idx; + if (!str) return -EINVAL; - if (!strcmp(str, "disable")) { - cppc_load = 0; - pr_info("driver is explicitly disabled\n"); - } else if (!strcmp(str, "passive")) - cppc_load = 1; + size = strlen(str); + mode_idx = get_mode_idx_from_str(str, size); - return 0; + if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { + cppc_state = mode_idx; + if (cppc_state == AMD_PSTATE_DISABLE) + pr_info("driver is explicitly disabled\n"); + + return 0; + } + + return -EINVAL; } early_param("amd_pstate", amd_pstate_param); diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 1c4b8659f171d..dae2ce0f67354 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -74,4 +74,21 @@ struct amd_cpudata { bool boost_supported; }; +/* + * enum amd_pstate_mode - driver working mode of amd pstate + */ +enum amd_pstate_mode { + AMD_PSTATE_DISABLE = 0, + AMD_PSTATE_PASSIVE, + AMD_PSTATE_ACTIVE, + AMD_PSTATE_MAX, +}; + +static const char * const amd_pstate_mode_string[] = { + [AMD_PSTATE_DISABLE] = "disable", + [AMD_PSTATE_PASSIVE] = "passive", + [AMD_PSTATE_ACTIVE] = "active", + NULL, +}; + #endif /* _LINUX_AMD_PSTATE_H */ From ffa5096a7c338641f70fb06d4778e8cf400181a8 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:09 +0800 Subject: [PATCH 06/13] cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors Add EPP driver support for AMD SoCs which support a dedicated MSR for CPPC. EPP is used by the DPM controller to configure the frequency that a core operates at during short periods of activity. The SoC EPP targets are configured on a scale from 0 to 255 where 0 represents maximum performance and 255 represents maximum efficiency. The amd-pstate driver exports profile string names to userspace that are tied to specific EPP values. The balance_performance string (0x80) provides the best balance for efficiency versus power on most systems, but users can choose other strings to meet their needs as well. $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences default performance balance_performance balance_power power $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference balance_performance To enable the driver,it needs to add `amd_pstate=active` to kernel command line and kernel will load the active mode epp driver Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 420 ++++++++++++++++++++++++++++++++++- include/linux/amd-pstate.h | 16 +- 2 files changed, 429 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 65c16edbbb20c..bca86b5b8b120 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,9 +59,52 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ +static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; +static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_DISABLE; +/* + * AMD Energy Preference Performance (EPP) + * The EPP is used in the CCLK DPM controller to drive + * the frequency that a core is going to operate during + * short periods of activity. EPP values will be utilized for + * different OS profiles (balanced, performance, power savings) + * display strings corresponding to EPP index in the + * energy_perf_strings[] + * index String + *------------------------------------- + * 0 default + * 1 performance + * 2 balance_performance + * 3 balance_power + * 4 power + */ +enum energy_perf_value_index { + EPP_INDEX_DEFAULT = 0, + EPP_INDEX_PERFORMANCE, + EPP_INDEX_BALANCE_PERFORMANCE, + EPP_INDEX_BALANCE_POWERSAVE, + EPP_INDEX_POWERSAVE, +}; + +static const char * const energy_perf_strings[] = { + [EPP_INDEX_DEFAULT] = "default", + [EPP_INDEX_PERFORMANCE] = "performance", + [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", + [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", + [EPP_INDEX_POWERSAVE] = "power", + NULL +}; + +static unsigned int epp_values[] = { + [EPP_INDEX_DEFAULT] = 0, + [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, + [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, + [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, + [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, + }; + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -73,6 +116,114 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) return -EINVAL; } +static DEFINE_MUTEX(amd_pstate_limits_lock); +static DEFINE_MUTEX(amd_pstate_driver_lock); + +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +{ + u64 epp; + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (!cppc_req_cached) { + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + &cppc_req_cached); + if (epp) + return epp; + } + epp = (cppc_req_cached >> 24) & 0xFF; + } else { + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return -EIO; + } + } + + return (s16)(epp & 0xff); +} + +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) +{ + s16 epp; + int index = -EINVAL; + + epp = amd_pstate_get_epp(cpudata, 0); + if (epp < 0) + return epp; + + switch (epp) { + case AMD_CPPC_EPP_PERFORMANCE: + index = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + index = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + index = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + index = EPP_INDEX_POWERSAVE; + break; + default: + break; + } + + return index; +} + +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + int ret; + struct cppc_perf_ctrls perf_ctrls; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (!ret) + cpudata->epp_cached = epp; + } else { + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; + } + cpudata->epp_cached = epp; + } + + return ret; +} + +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, + int pref_index) +{ + int epp = -EINVAL; + int ret; + + if (!pref_index) { + pr_debug("EPP pref_index is invalid\n"); + return -EINVAL; + } + + if (epp == -EINVAL) + epp = epp_values[pref_index]; + + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } + + ret = amd_pstate_set_epp(cpudata, epp); + + return ret; +} + static inline int pstate_enable(bool enable) { return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); @@ -81,11 +232,21 @@ static inline int pstate_enable(bool enable) static int cppc_enable(bool enable) { int cpu, ret = 0; + struct cppc_perf_ctrls perf_ctrls; for_each_present_cpu(cpu) { ret = cppc_set_enable(cpu, enable); if (ret) return ret; + + /* Enable autonomous mode for EPP */ + if (cppc_state == AMD_PSTATE_ACTIVE) { + /* Set desired perf as zero to allow EPP firmware control */ + perf_ctrls.desired_perf = 0; + ret = cppc_set_perf(cpu, &perf_ctrls); + if (ret) + return ret; + } } return ret; @@ -429,7 +590,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) return; cpudata->boost_supported = true; - amd_pstate_driver.boost_enabled = true; + current_pstate_driver->boost_enabled = true; } static void amd_perf_ctl_reset(unsigned int cpu) @@ -603,10 +764,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sprintf(&buf[0], "%u\n", perf); } +static ssize_t show_energy_performance_available_preferences( + struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + int offset = 0; + + while (energy_perf_strings[i] != NULL) + offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); + + sysfs_emit_at(buf, offset, "\n"); + + return offset; +} + +static ssize_t store_energy_performance_preference( + struct cpufreq_policy *policy, const char *buf, size_t count) +{ + struct amd_cpudata *cpudata = policy->driver_data; + char str_preference[21]; + ssize_t ret; + + ret = sscanf(buf, "%20s", str_preference); + if (ret != 1) + return -EINVAL; + + ret = match_string(energy_perf_strings, -1, str_preference); + if (ret < 0) + return -EINVAL; + + mutex_lock(&amd_pstate_limits_lock); + ret = amd_pstate_set_energy_pref_index(cpudata, ret); + mutex_unlock(&amd_pstate_limits_lock); + + return ret ?: count; +} + +static ssize_t show_energy_performance_preference( + struct cpufreq_policy *policy, char *buf) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int preference; + + preference = amd_pstate_get_energy_pref_index(cpudata); + if (preference < 0) + return preference; + + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_rw(energy_performance_preference); +cpufreq_freq_attr_ro(energy_performance_available_preferences); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, @@ -615,6 +827,186 @@ static struct freq_attr *amd_pstate_attr[] = { NULL, }; +static struct freq_attr *amd_pstate_epp_attr[] = { + &amd_pstate_max_freq, + &amd_pstate_lowest_nonlinear_freq, + &amd_pstate_highest_perf, + &energy_performance_preference, + &energy_performance_available_preferences, + NULL, +}; + +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) +{ + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + struct amd_cpudata *cpudata; + struct device *dev; + int rc; + u64 value; + + /* + * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, + * which is ideal for initialization process. + */ + amd_perf_ctl_reset(policy->cpu); + dev = get_cpu_device(policy->cpu); + if (!dev) + goto free_cpudata1; + + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); + if (!cpudata) + return -ENOMEM; + + cpudata->cpu = policy->cpu; + cpudata->epp_policy = 0; + + rc = amd_pstate_init_perf(cpudata); + if (rc) + goto free_cpudata1; + + min_freq = amd_get_min_freq(cpudata); + max_freq = amd_get_max_freq(cpudata); + nominal_freq = amd_get_nominal_freq(cpudata); + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", + min_freq, max_freq); + ret = -EINVAL; + goto free_cpudata1; + } + + policy->cpuinfo.min_freq = min_freq; + policy->cpuinfo.max_freq = max_freq; + /* It will be updated by governor */ + policy->cur = policy->cpuinfo.min_freq; + + /* Initial processor data capability frequencies */ + cpudata->max_freq = max_freq; + cpudata->min_freq = min_freq; + cpudata->nominal_freq = nominal_freq; + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; + + policy->driver_data = cpudata; + + cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + + /* + * Set the policy to powersave to provide a valid fallback value in case + * the default cpufreq governor is neither powersave nor performance. + */ + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + policy->fast_switch_possible = true; + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret) + return ret; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); + if (ret) + return ret; + WRITE_ONCE(cpudata->cppc_cap1_cached, value); + } + amd_pstate_boost_init(cpudata); + + return 0; + +free_cpudata1: + kfree(cpudata); + return ret; +} + +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) +{ + pr_debug("CPU %d exiting\n", policy->cpu); + policy->fast_switch_possible = false; + return 0; +} + +static void amd_pstate_epp_init(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 max_perf, min_perf; + u64 value; + s16 epp; + + max_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); + + value = READ_ONCE(cpudata->cppc_req_cached); + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_perf = max_perf; + + /* Initial min/max values for CPPC Performance Controls Register */ + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(max_perf); + + /* CPPC EPP feature require to set zero to the desire perf bit */ + value &= ~AMD_CPPC_DES_PERF(~0L); + value |= AMD_CPPC_DES_PERF(0); + + if (cpudata->epp_policy == cpudata->policy) + goto skip_epp; + + cpudata->epp_policy = cpudata->policy; + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + epp = amd_pstate_get_epp(cpudata, value); + if (epp < 0) + goto skip_epp; + /* force the epp value to be zero for performance policy */ + epp = 0; + } else { + /* Get BIOS pre-defined epp value */ + epp = amd_pstate_get_epp(cpudata, value); + if (epp) + goto skip_epp; + } + /* Set initial EPP value */ + if (boot_cpu_has(X86_FEATURE_CPPC)) { + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + } + +skip_epp: + WRITE_ONCE(cpudata->cppc_req_cached, value); + amd_pstate_set_epp(cpudata, epp); + cpufreq_cpu_put(policy); +} + +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", + policy->cpuinfo.max_freq, policy->max); + + cpudata->policy = policy->policy; + + amd_pstate_epp_init(policy->cpu); + + return 0; +} + +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); + return 0; +} + static struct cpufreq_driver amd_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, .verify = amd_pstate_verify, @@ -628,6 +1020,16 @@ static struct cpufreq_driver amd_pstate_driver = { .attr = amd_pstate_attr, }; +static struct cpufreq_driver amd_pstate_epp_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = amd_pstate_epp_verify_policy, + .setpolicy = amd_pstate_epp_set_policy, + .init = amd_pstate_epp_cpu_init, + .exit = amd_pstate_epp_cpu_exit, + .name = "amd_pstate_epp", + .attr = amd_pstate_epp_attr, +}; + static int __init amd_pstate_init(void) { int ret; @@ -656,7 +1058,8 @@ static int __init amd_pstate_init(void) /* capability check */ if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; + if (cppc_state == AMD_PSTATE_PASSIVE) + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); @@ -667,14 +1070,13 @@ static int __init amd_pstate_init(void) /* enable amd pstate feature */ ret = amd_pstate_enable(true); if (ret) { - pr_err("failed to enable amd-pstate with return %d\n", ret); + pr_err("failed to enable with return %d\n", ret); return ret; } - ret = cpufreq_register_driver(&amd_pstate_driver); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) - pr_err("failed to register amd_pstate_driver with return %d\n", - ret); + pr_err("failed to register with return %d\n", ret); return ret; } @@ -696,6 +1098,12 @@ static int __init amd_pstate_param(char *str) if (cppc_state == AMD_PSTATE_DISABLE) pr_info("driver is explicitly disabled\n"); + if (cppc_state == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + + if (cppc_state == AMD_PSTATE_PASSIVE) + current_pstate_driver = &amd_pstate_driver; + return 0; } diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index dae2ce0f67354..72ea7cf85ca3c 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -12,6 +12,11 @@ #include +#define AMD_CPPC_EPP_PERFORMANCE 0x00 +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF +#define AMD_CPPC_EPP_POWERSAVE 0xFF + /********************************************************************* * AMD P-state INTERFACE * *********************************************************************/ @@ -47,6 +52,10 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @epp_policy: Last saved policy used to set energy-performance preference + * @epp_cached: Cached CPPC energy-performance preference value + * @policy: Cpufreq policy value + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value * * The amd_cpudata is key private data for each CPU thread in AMD P-State, and * represents all the attributes and goals that AMD P-State requests at runtime. @@ -72,6 +81,12 @@ struct amd_cpudata { u64 freq; bool boost_supported; + + /* EPP feature related attributes*/ + s16 epp_policy; + s16 epp_cached; + u32 policy; + u64 cppc_cap1_cached; }; /* @@ -90,5 +105,4 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_ACTIVE] = "active", NULL, }; - #endif /* _LINUX_AMD_PSTATE_H */ From d4da12f8033a123353eccf993cb95ee5bff21e7c Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:10 +0800 Subject: [PATCH 07/13] cpufreq: amd-pstate: implement amd pstate cpu online and offline callback Adds online and offline driver callback support to allow cpu cores go offline and help to restore the previous working states when core goes back online later for EPP driver mode. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 82 ++++++++++++++++++++++++++++++++++++ include/linux/amd-pstate.h | 1 + 2 files changed, 83 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bca86b5b8b120..26f6ac83d87e6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1000,6 +1000,86 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } +static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +{ + struct cppc_perf_ctrls perf_ctrls; + u64 value, max_perf; + int ret; + + ret = amd_pstate_enable(true); + if (ret) + pr_err("failed to enable amd pstate during resume, return %d\n", ret); + + value = READ_ONCE(cpudata->cppc_req_cached); + max_perf = READ_ONCE(cpudata->highest_perf); + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + } else { + perf_ctrls.max_perf = max_perf; + perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); + cppc_set_perf(cpudata->cpu, &perf_ctrls); + } +} + +static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); + + if (cppc_state == AMD_PSTATE_ACTIVE) { + amd_pstate_epp_reenable(cpudata); + cpudata->suspended = false; + } + + return 0; +} + +static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + struct cppc_perf_ctrls perf_ctrls; + int min_perf; + u64 value; + + min_perf = READ_ONCE(cpudata->lowest_perf); + value = READ_ONCE(cpudata->cppc_req_cached); + + mutex_lock(&amd_pstate_limits_lock); + if (boot_cpu_has(X86_FEATURE_CPPC)) { + cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; + + /* Set max perf same as min perf */ + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(min_perf); + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + } else { + perf_ctrls.desired_perf = 0; + perf_ctrls.max_perf = min_perf; + perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); + cppc_set_perf(cpudata->cpu, &perf_ctrls); + } + mutex_unlock(&amd_pstate_limits_lock); +} + +static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); + + if (cpudata->suspended) + return 0; + + if (cppc_state == AMD_PSTATE_ACTIVE) + amd_pstate_epp_offline(policy); + + return 0; +} + static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); @@ -1026,6 +1106,8 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .setpolicy = amd_pstate_epp_set_policy, .init = amd_pstate_epp_cpu_init, .exit = amd_pstate_epp_cpu_exit, + .offline = amd_pstate_epp_cpu_offline, + .online = amd_pstate_epp_cpu_online, .name = "amd_pstate_epp", .attr = amd_pstate_epp_attr, }; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 72ea7cf85ca3c..f5f22418e64bd 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -87,6 +87,7 @@ struct amd_cpudata { s16 epp_cached; u32 policy; u64 cppc_cap1_cached; + bool suspended; }; /* From 50ddd2f7826927e6dc111a43b3a183f53c260fa4 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:11 +0800 Subject: [PATCH 08/13] cpufreq: amd-pstate: implement suspend and resume callbacks add suspend and resume support for the AMD processors by amd_pstate_epp driver instance. When the CPPC is suspended, EPP driver will set EPP profile to 'power' profile and set max/min perf to lowest perf value. When resume happens, it will restore the MSR registers with previous cached value. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 26f6ac83d87e6..4e3770e0d4d35 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1087,6 +1087,44 @@ static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) return 0; } +static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int ret; + + /* avoid suspending when EPP is not enabled */ + if (cppc_state != AMD_PSTATE_ACTIVE) + return 0; + + /* set this flag to avoid setting core offline*/ + cpudata->suspended = true; + + /* disable CPPC in lowlevel firmware */ + ret = amd_pstate_enable(false); + if (ret) + pr_err("failed to suspend, return %d\n", ret); + + return 0; +} + +static int amd_pstate_epp_resume(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata->suspended) { + mutex_lock(&amd_pstate_limits_lock); + + /* enable amd pstate from suspend state*/ + amd_pstate_epp_reenable(cpudata); + + mutex_unlock(&amd_pstate_limits_lock); + + cpudata->suspended = false; + } + + return 0; +} + static struct cpufreq_driver amd_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, .verify = amd_pstate_verify, @@ -1108,6 +1146,8 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .exit = amd_pstate_epp_cpu_exit, .offline = amd_pstate_epp_cpu_offline, .online = amd_pstate_epp_cpu_online, + .suspend = amd_pstate_epp_suspend, + .resume = amd_pstate_epp_resume, .name = "amd_pstate_epp", .attr = amd_pstate_epp_attr, }; From abd61c08ef349af08df0bf587d33f5bde5996a89 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:12 +0800 Subject: [PATCH 09/13] cpufreq: amd-pstate: add driver working mode switch support While amd-pstate driver was loaded with specific driver mode, it will need to check which mode is enabled for the pstate driver,add this sysfs entry to show the current status $ cat /sys/devices/system/cpu/amd-pstate/status active Meanwhile, user can switch the pstate driver mode with writing mode string to sysfs entry as below. Enable passive mode: $ sudo bash -c "echo passive > /sys/devices/system/cpu/amd-pstate/status" Enable active mode (EPP driver mode): $ sudo bash -c "echo active > /sys/devices/system/cpu/amd-pstate/status" Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 4e3770e0d4d35..1ae2e0d56ed1f 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -63,6 +63,7 @@ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_DISABLE; +struct kobject *amd_pstate_kobj; /* * AMD Energy Preference Performance (EPP) @@ -673,6 +674,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; amd_pstate_boost_init(cpudata); + if (!current_pstate_driver->adjust_perf) + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; return 0; @@ -813,12 +816,99 @@ static ssize_t show_energy_performance_preference( return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } +static ssize_t amd_pstate_show_status(char *buf) +{ + if (!current_pstate_driver) + return sysfs_emit(buf, "disable\n"); + + return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); +} + +static void amd_pstate_driver_cleanup(void) +{ + current_pstate_driver = NULL; +} + +static int amd_pstate_update_status(const char *buf, size_t size) +{ + int ret; + int mode_idx; + + if (size > 7 || size < 6) + return -EINVAL; + mode_idx = get_mode_idx_from_str(buf, size); + + switch(mode_idx) { + case AMD_PSTATE_DISABLE: + if (!current_pstate_driver) + return -EINVAL; + if (cppc_state == AMD_PSTATE_ACTIVE) + return -EBUSY; + ret = cpufreq_unregister_driver(current_pstate_driver); + amd_pstate_driver_cleanup(); + break; + case AMD_PSTATE_PASSIVE: + if (current_pstate_driver) { + if (current_pstate_driver == &amd_pstate_driver) + return 0; + cpufreq_unregister_driver(current_pstate_driver); + cppc_state = AMD_PSTATE_PASSIVE; + current_pstate_driver = &amd_pstate_driver; + } + + ret = cpufreq_register_driver(current_pstate_driver); + break; + case AMD_PSTATE_ACTIVE: + if (current_pstate_driver) { + if (current_pstate_driver == &amd_pstate_epp_driver) + return 0; + cpufreq_unregister_driver(current_pstate_driver); + current_pstate_driver = &amd_pstate_epp_driver; + cppc_state = AMD_PSTATE_ACTIVE; + } + + ret = cpufreq_register_driver(current_pstate_driver); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static ssize_t show_status(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t ret; + + mutex_lock(&amd_pstate_driver_lock); + ret = amd_pstate_show_status(buf); + mutex_unlock(&amd_pstate_driver_lock); + + return ret; +} + +static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + char *p = memchr(buf, '\n', count); + int ret; + + mutex_lock(&amd_pstate_driver_lock); + ret = amd_pstate_update_status(buf, p ? p - buf : count); + mutex_unlock(&amd_pstate_driver_lock); + + return ret < 0 ? ret : count; +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); +define_one_global_rw(status); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, @@ -836,6 +926,15 @@ static struct freq_attr *amd_pstate_epp_attr[] = { NULL, }; +static struct attribute *pstate_global_attributes[] = { + &status.attr, + NULL +}; + +static const struct attribute_group amd_pstate_global_attr_group = { + .attrs = pstate_global_attributes, +}; + static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -1200,6 +1299,25 @@ static int __init amd_pstate_init(void) if (ret) pr_err("failed to register with return %d\n", ret); + amd_pstate_kobj = kobject_create_and_add("amd_pstate", &cpu_subsys.dev_root->kobj); + if (!amd_pstate_kobj) { + ret = -EINVAL; + pr_err("global sysfs registration failed.\n"); + goto kobject_free; + } + + ret = sysfs_create_group(amd_pstate_kobj, &amd_pstate_global_attr_group); + if (ret) { + pr_err("sysfs attribute export failed with error %d.\n", ret); + goto global_attr_free; + } + + return ret; + +global_attr_free: + kobject_put(amd_pstate_kobj); +kobject_free: + cpufreq_unregister_driver(current_pstate_driver); return ret; } device_initcall(amd_pstate_init); From 92e6088427c5da7ef8dc92d6ab2f0f8f6a01fab7 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:13 +0800 Subject: [PATCH 10/13] Documentation: amd-pstate: add amd pstate driver mode introduction The amd-pstate driver has two operation modes supported: * CPPC Autonomous (active) mode * CPPC non-autonomous (passive) mode. active mode and passive mode can be chosen by different kernel parameters. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/amd-pstate.rst | 26 +++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 98a2bb44f80c4..b6aee69f564f3 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -299,8 +299,30 @@ module which supports the new AMD P-States mechanism on most of the future AMD platforms. The AMD P-States mechanism is the more performance and energy efficiency frequency management method on AMD processors. -Kernel Module Options for ``amd-pstate`` -========================================= + +AMD Pstate Driver Operation Modes +================================= + +``amd_pstate`` CPPC has two operation modes: CPPC Autonomous(active) mode and +CPPC non-autonomous(passive) mode. +active mode and passive mode can be chosen by different kernel parameters. +When in Autonomous mode, CPPC ignores requests done in the Desired Performance +Target register and takes into account only the values set to the Minimum requested +performance, Maximum requested performance, and Energy Performance Preference +registers. When Autonomous is disabled, it only considers the Desired Performance Target. + +Active Mode +------------ + +``amd_pstate=active`` + +This is the low-level firmware control mode which is implemented by ``amd_pstate_epp`` +driver with ``amd_pstate=active`` passed to the kernel in the command line. +In this mode, ``amd_pstate_epp`` driver provides a hint to the hardware if software +wants to bias toward performance (0x0) or energy efficiency (0xff) to the CPPC firmware. +then CPPC power algorithm will calculate the runtime workload and adjust the realtime +cores frequency according to the power supply and thermal, core voltage and some other +hardware conditions. Passive Mode ------------ From 5014603e409b01001bfbeae090a16733f61a7640 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:14 +0800 Subject: [PATCH 11/13] Documentation: introduce amd pstate active mode kernel command line options AMD Pstate driver support another firmware based autonomous mode with "amd_pstate=active" added to the kernel command line. In autonomous mode SMU firmware decides frequencies at runtime based on workload utilization, usage in other IPs, infrastructure limits such as power, thermals and so on. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6cfa6e3996cf7..e3618dfdb36ab 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7020,3 +7020,10 @@ management firmware translates the requests into actual hardware states (core frequency, data fabric and memory clocks etc.) + active + Use amd_pstate_epp driver instance as the scaling driver, + driver provides a hint to the hardware if software wants + to bias toward performance (0x0) or energy efficiency (0xff) + to the CPPC firmware. then CPPC power algorithm will + calculate the runtime workload and adjust the realtime cores + frequency. From 3ec32b6d17c5b229c6f5d05849932af1f0c6f523 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:15 +0800 Subject: [PATCH 12/13] cpufreq: amd-pstate: convert sprintf with sysfs_emit() replace the sprintf with a more generic sysfs_emit function No intended potential function impact Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1ae2e0d56ed1f..168a28bed6ee5 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -736,7 +736,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, if (max_freq < 0) return max_freq; - return sprintf(&buf[0], "%u\n", max_freq); + return sysfs_emit(buf, "%u\n", max_freq); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, @@ -749,7 +749,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli if (freq < 0) return freq; - return sprintf(&buf[0], "%u\n", freq); + return sysfs_emit(buf, "%u\n", freq); } /* @@ -764,7 +764,7 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, perf = READ_ONCE(cpudata->highest_perf); - return sprintf(&buf[0], "%u\n", perf); + return sysfs_emit(buf, "%u\n", perf); } static ssize_t show_energy_performance_available_preferences( From b9e6a2d47b2565eb450d3ee900fba49cc9b25cbd Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:16 +0800 Subject: [PATCH 13/13] Documentation: amd-pstate: introduce new global sysfs attributes The amd-pstate driver supports switching working modes at runtime. Users can view and change modes by interacting with the "status" sysfs attribute. 1) check driver mode: $ cat /sys/devices/system/cpu/amd-pstate/status 2) switch mode: `# echo "passive" | sudo tee /sys/devices/system/cpu/amd-pstate/status` or `# echo "active" | sudo tee /sys/devices/system/cpu/amd-pstate/status` Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/amd-pstate.rst | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index b6aee69f564f3..5304adf2fc2f3 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -339,6 +339,35 @@ processor must provide at least nominal performance requested and go higher if c operating conditions allow. +User Space Interface in ``sysfs`` +================================= + +Global Attributes +----------------- + +``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to +control its functionality at the system level. They are located in the +``/sys/devices/system/cpu/amd-pstate/`` directory and affect all CPUs. + +``status`` + Operation mode of the driver: "active", "passive" or "disable". + + "active" + The driver is functional and in the ``active mode`` + + "passive" + The driver is functional and in the ``passive mode`` + + "disable" + The driver is unregistered and not functional now. + + This attribute can be written to in order to change the driver's + operation mode or to unregister it. The string written to it must be + one of the possible values of it and, if successful, writing one of + these values to the sysfs file will cause the driver to switch over + to the operation mode represented by that string - or to be + unregistered in the "disable" case. + ``cpupower`` tool support for ``amd-pstate`` ===============================================