From a121798ae669351ec0697c94f71c3a692b2a755b Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:46 +0000 Subject: [PATCH 01/30] x86/resctrl: Fix allocation of cleanest CLOSID on platforms with no monitors Commit 6eac36bb9eb0 ("x86/resctrl: Allocate the cleanest CLOSID by searching closid_num_dirty_rmid") added logic that causes resctrl to search for the CLOSID with the fewest dirty cache lines when creating a new control group, if requested by the arch code. This depends on the values read from the llc_occupancy counters. The logic is applicable to architectures where the CLOSID effectively forms part of the monitoring identifier and so do not allow complete freedom to choose an unused monitoring identifier for a given CLOSID. This support missed that some platforms may not have these counters. This causes a NULL pointer dereference when creating a new control group as the array was not allocated by dom_data_init(). As this feature isn't necessary on platforms that don't have cache occupancy monitors, add this to the check that occurs when a new control group is allocated. Fixes: 6eac36bb9eb0 ("x86/resctrl: Allocate the cleanest CLOSID by searching closid_num_dirty_rmid") Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: David Hildenbrand Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-2-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6419e04d8a7b2..04b653d613e88 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -157,7 +157,8 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && + is_llc_occupancy_enabled()) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; From 3c021531131c9ee5df5da739819a515326ee9570 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:47 +0000 Subject: [PATCH 02/30] x86/resctrl: Add a helper to avoid reaching into the arch code resource list Resctrl occasionally wants to know something about a specific resource, in these cases it reaches into the arch code's rdt_resources_all[] array. Once the filesystem parts of resctrl are moved to /fs/, this means it will need visibility of the architecture specific struct rdt_hw_resource definition, and the array of all resources. All architectures would also need a r_resctrl member in this struct. Instead, abstract this via a helper to allow architectures to do different things here. Move the level enum to the resctrl header and add a helper to retrieve the struct rdt_resource by 'rid'. resctrl_arch_get_resource() should not return NULL for any value in the enum, it may instead return a dummy resource that is !alloc_enabled && !mon_enabled. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Peter Newman Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-3-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 12 ++++++++++-- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +- arch/x86/kernel/cpu/resctrl/internal.h | 10 ---------- arch/x86/kernel/cpu/resctrl/monitor.c | 8 ++++---- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 14 +++++++------- include/linux/resctrl.h | 17 +++++++++++++++++ 6 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 3d1735ed8d1ff..12b41316d8f7f 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -62,7 +62,7 @@ static void mba_wrmsr_amd(struct msr_param *m); #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) -struct rdt_hw_resource rdt_resources_all[] = { +struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { [RDT_RESOURCE_L3] = { .r_resctrl = { @@ -127,6 +127,14 @@ u32 resctrl_arch_system_num_rmid_idx(void) return r->num_rmid; } +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) +{ + if (l >= RDT_NUM_RESOURCES) + return NULL; + + return &rdt_resources_all[l].r_resctrl; +} + /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. @@ -174,7 +182,7 @@ static inline void cache_alloc_hsw_probe(void) bool is_mba_sc(struct rdt_resource *r) { if (!r) - return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; + r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); /* * The software controller support is only applicable to MBA resource. diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 536351159cc23..4af27ef5a8a19 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -649,7 +649,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) resid = md.u.rid; domid = md.u.domid; evtid = md.u.evtid; - r = &rdt_resources_all[resid].r_resctrl; + r = resctrl_arch_get_resource(resid); if (md.u.sum) { /* diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 20c898f09b7e7..75252a7e1ebc6 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -512,16 +512,6 @@ extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; extern enum resctrl_event_id mba_mbps_default_event; -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 94a1d97804614..58b5b21349a8c 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -365,7 +365,7 @@ static void limbo_release_entry(struct rmid_entry *entry) */ void __check_limbo(struct rdt_mon_domain *d, bool force_free) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); u32 idx_limit = resctrl_arch_system_num_rmid_idx(); struct rmid_entry *entry; u32 idx, cur_idx = 1; @@ -521,7 +521,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_mon_domain *d; u32 idx; @@ -761,7 +761,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) struct rdtgroup *entry; u32 cur_bw, user_bw; - r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); evt_id = rgrp->mba_mbps_event; closid = rgrp->closid; @@ -925,7 +925,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!resctrl_mounted || !resctrl_arch_mon_capable()) goto out_unlock; - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); d = container_of(work, struct rdt_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 04b653d613e88..45093b9e8e630 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2256,7 +2256,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; + return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -2346,8 +2346,8 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, */ static bool supports_mba_mbps(void) { - struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); return (is_mbm_enabled() && r->alloc_capable && is_mba_linear() && @@ -2360,7 +2360,7 @@ static bool supports_mba_mbps(void) */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); u32 num_closid = resctrl_arch_get_num_closid(r); struct rdt_ctrl_domain *d; unsigned long fflags; @@ -2714,7 +2714,7 @@ static int rdt_get_tree(struct fs_context *fc) resctrl_mounted = true; if (is_mbm_enabled()) { - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); list_for_each_entry(dom, &r->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); @@ -3951,7 +3951,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) + if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) seq_puts(seq, ",mba_MBps"); if (resctrl_debug) @@ -4151,7 +4151,7 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) void resctrl_offline_cpu(unsigned int cpu) { - struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716e..37279e2a89dae 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -37,6 +37,16 @@ enum resctrl_conf_type { CDP_DATA, }; +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + #define CDP_NUM_TYPES (CDP_DATA + 1) /* @@ -226,6 +236,13 @@ struct rdt_resource { bool cdp_capable; }; +/* + * Get the resource that exists at this level. If the level is not supported + * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES + * will return NULL. + */ +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); + /** * struct resctrl_schema - configuration abilities of a resource presented to * user-space From 131dab13a82d9edd97dd96d98d2919f56f339331 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:48 +0000 Subject: [PATCH 03/30] x86/resctrl: Remove fflags from struct rdt_resource The resctrl arch code specifies whether a resource controls a cache or memory using the fflags field. This field is then used by resctrl to determine which files should be exposed in the filesystem. Allowing the architecture to pick this value means the RFTYPE_ flags have to be in a shared header, and allows an architecture to create a combination that resctrl does not support. Remove the fflags field, and pick the value based on the resource id. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-4-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 4 ---- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 18 ++++++++++++++++-- include/linux/resctrl.h | 2 -- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 12b41316d8f7f..8ef2e449b4650 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -74,7 +74,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .mon_domains = mon_domain_init(RDT_RESOURCE_L3), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, }, .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, @@ -88,7 +87,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, }, .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, @@ -102,7 +100,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), .parse_ctrlval = parse_bw, .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, }, }, [RDT_RESOURCE_SMBA] = @@ -114,7 +111,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), .parse_ctrlval = parse_bw, .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, }, }, }; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 45093b9e8e630..3391ac8ecb2de 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2165,6 +2165,20 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, return ret; } +static unsigned long fflags_from_resource(struct rdt_resource *r) +{ + switch (r->rid) { + case RDT_RESOURCE_L3: + case RDT_RESOURCE_L2: + return RFTYPE_RES_CACHE; + case RDT_RESOURCE_MBA: + case RDT_RESOURCE_SMBA: + return RFTYPE_RES_MB; + } + + return WARN_ON_ONCE(1); +} + static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { struct resctrl_schema *s; @@ -2185,14 +2199,14 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - fflags = r->fflags | RFTYPE_CTRL_INFO; + fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } for_each_mon_capable_rdt_resource(r) { - fflags = r->fflags | RFTYPE_MON_INFO; + fflags = fflags_from_resource(r) | RFTYPE_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); if (ret) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 37279e2a89dae..496ddcaa4ecf3 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -210,7 +210,6 @@ enum resctrl_scope { * @format_str: Per resource format string to show domain value * @parse_ctrlval: Per resource function pointer to parse control values * @evt_list: List of monitoring events - * @fflags: flags to choose base and info files * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { @@ -232,7 +231,6 @@ struct rdt_resource { struct resctrl_schema *s, struct rdt_ctrl_domain *d); struct list_head evt_list; - unsigned long fflags; bool cdp_capable; }; From c24f5eab6b2689fd4e97a2d2fe963864036653e6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:49 +0000 Subject: [PATCH 04/30] x86/resctrl: Use schema type to determine how to parse schema values Resctrl's architecture code gets to specify a function pointer that is used when parsing schema entries. This is expected to be one of two helpers from the filesystem code. Setting this function pointer allows the architecture code to change the ABI resctrl presents to user-space, and forces resctrl to expose these helpers. Instead, add a schema format enum to choose which schema parser to use. This allows the helpers to be made static and the structs used for passing arguments moved out of shared headers. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-5-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 8 +++--- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 32 +++++++++++++++++++---- arch/x86/kernel/cpu/resctrl/internal.h | 10 ------- include/linux/resctrl.h | 17 ++++++++---- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 8ef2e449b4650..e9fe129a02f84 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -72,7 +72,7 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .mon_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), .mon_domains = mon_domain_init(RDT_RESOURCE_L3), - .parse_ctrlval = parse_cbm, + .schema_fmt = RESCTRL_SCHEMA_BITMAP, .format_str = "%d=%0*x", }, .msr_base = MSR_IA32_L3_CBM_BASE, @@ -85,7 +85,7 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "L2", .ctrl_scope = RESCTRL_L2_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), - .parse_ctrlval = parse_cbm, + .schema_fmt = RESCTRL_SCHEMA_BITMAP, .format_str = "%d=%0*x", }, .msr_base = MSR_IA32_L2_CBM_BASE, @@ -98,7 +98,7 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "MB", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), - .parse_ctrlval = parse_bw, + .schema_fmt = RESCTRL_SCHEMA_RANGE, .format_str = "%d=%*u", }, }, @@ -109,7 +109,7 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "SMBA", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), - .parse_ctrlval = parse_bw, + .schema_fmt = RESCTRL_SCHEMA_RANGE, .format_str = "%d=%*u", }, }, diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 4af27ef5a8a19..f4334f437ffc7 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,6 +23,15 @@ #include "internal.h" +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + +typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, + struct resctrl_schema *s, + struct rdt_ctrl_domain *d); + /* * Check whether MBA bandwidth percentage value is correct. The value is * checked against the minimum and max bandwidth values specified by the @@ -64,8 +73,8 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d) +static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -143,8 +152,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d) +static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -210,6 +219,7 @@ static int parse_line(char *line, struct resctrl_schema *s, struct rdtgroup *rdtgrp) { enum resctrl_conf_type t = s->conf_type; + ctrlval_parser_t *parse_ctrlval = NULL; struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; struct rdt_parse_data data; @@ -220,6 +230,18 @@ static int parse_line(char *line, struct resctrl_schema *s, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + parse_ctrlval = &parse_cbm; + break; + case RESCTRL_SCHEMA_RANGE: + parse_ctrlval = &parse_bw; + break; + } + + if (WARN_ON_ONCE(!parse_ctrlval)) + return -EINVAL; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); @@ -240,7 +262,7 @@ static int parse_line(char *line, struct resctrl_schema *s, if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; - if (r->parse_ctrlval(&data, s, d)) + if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { cfg = &d->staged_config[t]; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 75252a7e1ebc6..b5543bd506c3b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -459,11 +459,6 @@ static inline bool is_mbm_event(int e) e <= QOS_L3_MBM_LOCAL_EVENT_ID); } -struct rdt_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - /** * struct rdt_hw_resource - arch private attributes of a resctrl resource * @r_resctrl: Attributes of the resource used directly by resctrl. @@ -500,11 +495,6 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r return container_of(r, struct rdt_hw_resource, r_resctrl); } -int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d); -int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d); - extern struct mutex rdtgroup_mutex; extern struct rdt_hw_resource rdt_resources_all[]; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 496ddcaa4ecf3..3e1a41356b2a7 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -183,7 +183,6 @@ struct resctrl_membw { u32 *mb_map; }; -struct rdt_parse_data; struct resctrl_schema; enum resctrl_scope { @@ -192,6 +191,16 @@ enum resctrl_scope { RESCTRL_L3_NODE, }; +/** + * enum resctrl_schema_fmt - The format user-space provides for a schema. + * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. + * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + */ +enum resctrl_schema_fmt { + RESCTRL_SCHEMA_BITMAP, + RESCTRL_SCHEMA_RANGE, +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource @@ -208,7 +217,7 @@ enum resctrl_scope { * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. * @format_str: Per resource format string to show domain value - * @parse_ctrlval: Per resource function pointer to parse control values + * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events * @cdp_capable: Is the CDP feature available on this resource */ @@ -227,9 +236,7 @@ struct rdt_resource { int data_width; u32 default_ctrl; const char *format_str; - int (*parse_ctrlval)(struct rdt_parse_data *data, - struct resctrl_schema *s, - struct rdt_ctrl_domain *d); + enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; bool cdp_capable; }; From bb9343c8f2906ac7087a7f9560b37636c220551d Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:50 +0000 Subject: [PATCH 05/30] x86/resctrl: Use schema type to determine the schema format string Resctrl's architecture code gets to specify a format string that is used when printing schema entries. This is expected to be one of two values that the filesystem code supports. Setting this format string allows the architecture code to change the ABI resctrl presents to user-space. Instead, use the schema format enum to choose which format string to use. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-6-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 4 ---- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 14 ++++++++++++++ include/linux/resctrl.h | 4 ++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e9fe129a02f84..542503a8c953d 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -73,7 +73,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), .mon_domains = mon_domain_init(RDT_RESOURCE_L3), .schema_fmt = RESCTRL_SCHEMA_BITMAP, - .format_str = "%d=%0*x", }, .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, @@ -86,7 +85,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_scope = RESCTRL_L2_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), .schema_fmt = RESCTRL_SCHEMA_BITMAP, - .format_str = "%d=%0*x", }, .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, @@ -99,7 +97,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), .schema_fmt = RESCTRL_SCHEMA_RANGE, - .format_str = "%d=%*u", }, }, [RDT_RESOURCE_SMBA] = @@ -110,7 +107,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), .schema_fmt = RESCTRL_SCHEMA_RANGE, - .format_str = "%d=%*u", }, }, }; diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index f4334f437ffc7..c763cb4fb1a88 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -487,7 +487,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, r->format_str, dom->hdr.id, max_data_width, + seq_printf(s, schema->fmt_str, dom->hdr.id, max_data_width, ctrl_val); sep = true; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 3391ac8ecb2de..e7862d0936c94 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2611,6 +2611,20 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type if (cl > max_name_width) max_name_width = cl; + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + s->fmt_str = "%d=%0*x"; + break; + case RESCTRL_SCHEMA_RANGE: + s->fmt_str = "%d=%0*u"; + break; + } + + if (WARN_ON_ONCE(!s->fmt_str)) { + kfree(s); + return -EINVAL; + } + INIT_LIST_HEAD(&s->list); list_add(&s->list, &resctrl_schema_all); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 3e1a41356b2a7..736b9a9a9464e 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -216,7 +216,6 @@ enum resctrl_schema_fmt { * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. - * @format_str: Per resource format string to show domain value * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events * @cdp_capable: Is the CDP feature available on this resource @@ -235,7 +234,6 @@ struct rdt_resource { char *name; int data_width; u32 default_ctrl; - const char *format_str; enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; bool cdp_capable; @@ -253,6 +251,7 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); * user-space * @list: Member of resctrl_schema_all. * @name: The name to use in the "schemata" file. + * @fmt_str: Format string to show domain value. * @conf_type: Whether this schema is specific to code/data. * @res: The resource structure exported by the architecture to describe * the hardware that is configured by this schema. @@ -263,6 +262,7 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); struct resctrl_schema { struct list_head list; char name[8]; + const char *fmt_str; enum resctrl_conf_type conf_type; struct rdt_resource *res; u32 num_closid; From 43312b8ea1c61abbc5e3bdc87fe2e18f755d549d Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:51 +0000 Subject: [PATCH 06/30] x86/resctrl: Remove data_width and the tabular format The resctrl architecture code provides a data_width for the controls of each resource. This is used to zero pad all control values in the schemata file so they appear in columns. The same is done with the resource names to complete the visual effect. e.g. | SMBA:0=2048 | L3:0=00ff AMD platforms discover their maximum bandwidth for the MB resource from firmware, but hard-code the data_width to 4. If the maximum bandwidth requires more digits - the tabular format is silently broken. This is also broken when the mba_MBps mount option is used as the field width isn't updated. If new schema are added resctrl will need to be able to determine the maximum width. The benefit of this pretty-printing is questionable. Instead of handling runtime discovery of the data_width for AMD platforms, remove the feature. These fields are always zero padded so should be harmless to remove if the whole field has been treated as a number. In the above example, this would now look like this: | SMBA:0=2048 | L3:0=ff Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-7-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 26 ----------------------- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 3 +-- arch/x86/kernel/cpu/resctrl/internal.h | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 10 +++++++-- include/linux/resctrl.h | 2 -- 5 files changed, 10 insertions(+), 33 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 542503a8c953d..754fb65565ec9 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -43,12 +43,6 @@ static DEFINE_MUTEX(domain_list_lock); */ DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); -/* - * Used to store the max resource name width and max resource data width - * to display the schemata in a tabular format - */ -int max_name_width, max_data_width; - /* * Global boolean for rdt_alloc which is true if any * resource allocation is enabled. @@ -228,7 +222,6 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) return false; r->membw.arch_needs_linear = false; } - r->data_width = 3; if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; @@ -269,8 +262,6 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = 0; r->membw.bw_gran = 1; - /* Max value is 2048, Data width should be 4 in decimal */ - r->data_width = 4; r->alloc_capable = true; @@ -290,7 +281,6 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->cache.cbm_len = eax.split.cbm_len + 1; r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; r->cache.shareable_bits = ebx & r->default_ctrl; - r->data_width = (r->cache.cbm_len + 3) / 4; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; r->alloc_capable = true; @@ -786,20 +776,6 @@ static int resctrl_arch_offline_cpu(unsigned int cpu) return 0; } -/* - * Choose a width for the resource name and resource data based on the - * resource that has widest name and cbm. - */ -static __init void rdt_init_padding(void) -{ - struct rdt_resource *r; - - for_each_alloc_capable_rdt_resource(r) { - if (r->data_width > max_data_width) - max_data_width = r->data_width; - } -} - enum { RDT_FLAG_CMT, RDT_FLAG_MBM_TOTAL, @@ -1102,8 +1078,6 @@ static int __init resctrl_late_init(void) if (!get_rdt_resources()) return -ENODEV; - rdt_init_padding(); - state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/resctrl/cat:online:", resctrl_arch_online_cpu, diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index c763cb4fb1a88..59610b209b4e9 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -487,8 +487,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, schema->fmt_str, dom->hdr.id, max_data_width, - ctrl_val); + seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val); sep = true; } seq_puts(s, "\n"); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index b5543bd506c3b..f975cd6cfe619 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -326,7 +326,7 @@ struct rdtgroup { /* List of all resource groups */ extern struct list_head rdt_all_groups; -extern int max_name_width, max_data_width; +extern int max_name_width; int __init rdtgroup_init(void); void __exit rdtgroup_exit(void); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e7862d0936c94..1e0bae1a9d950 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -57,6 +57,12 @@ static struct kernfs_node *kn_mongrp; /* Kernel fs node for "mon_data" directory under root */ static struct kernfs_node *kn_mondata; +/* + * Used to store the max resource name width to display the schemata names in + * a tabular format. + */ +int max_name_width; + static struct seq_buf last_cmd_status; static char last_cmd_status_buf[512]; @@ -2613,10 +2619,10 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type switch (r->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: - s->fmt_str = "%d=%0*x"; + s->fmt_str = "%d=%x"; break; case RESCTRL_SCHEMA_RANGE: - s->fmt_str = "%d=%0*u"; + s->fmt_str = "%d=%u"; break; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 736b9a9a9464e..e1a982adef45a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -214,7 +214,6 @@ enum resctrl_schema_fmt { * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. - * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events @@ -232,7 +231,6 @@ struct rdt_resource { struct list_head ctrl_domains; struct list_head mon_domains; char *name; - int data_width; u32 default_ctrl; enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; From 634ebb98b929443ea1a5502c93f26d01aedbd5c8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:52 +0000 Subject: [PATCH 07/30] x86/resctrl: Add max_bw to struct resctrl_membw __rdt_get_mem_config_amd() and __get_mem_config_intel() both use the default_ctrl property as a maximum value. This is because the MBA schema works differently between these platforms. Doing this complicates determining whether the default_ctrl property belongs to the arch code, or can be derived from the schema format. Deriving the maximum or default value from the schema format would avoid the architecture code having to tell resctrl such obvious things as the maximum percentage is 100, and the maximum bitmap is all ones. Maximum bandwidth is always going to vary per platform. Add max_bw as a special case. This is currently used for the maximum MBA percentage on Intel platforms, but can be removed from the architecture code if 'percentage' becomes a schema format resctrl supports directly. This value isn't needed for other schema formats. This will allow the default_ctrl to be generated from the schema properties when it is needed. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-8-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 2 ++ arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 ++-- include/linux/resctrl.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 754fb65565ec9..4504a12efc971 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -212,6 +212,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; + r->membw.max_bw = MAX_MBA_BW; r->membw.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; @@ -250,6 +251,7 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); hw_res->num_closid = edx + 1; r->default_ctrl = 1 << eax; + r->membw.max_bw = 1 << eax; /* AMD does not use delay */ r->membw.delay_linear = false; diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 59610b209b4e9..23a01eaebd587 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -63,9 +63,9 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } - if (bw < r->membw.min_bw || bw > r->default_ctrl) { + if (bw < r->membw.min_bw || bw > r->membw.max_bw) { rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", - bw, r->membw.min_bw, r->default_ctrl); + bw, r->membw.min_bw, r->membw.max_bw); return false; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index e1a982adef45a..465f3cf8c4bcb 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -165,6 +165,7 @@ enum membw_throttle_mode { /** * struct resctrl_membw - Memory bandwidth allocation related data * @min_bw: Minimum memory bandwidth percentage user can request + * @max_bw: Maximum memory bandwidth value, used as the reset value * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale * @arch_needs_linear: True if we can't configure non-linear resources @@ -175,6 +176,7 @@ enum membw_throttle_mode { */ struct resctrl_membw { u32 min_bw; + u32 max_bw; u32 bw_gran; u32 delay_linear; bool arch_needs_linear; From dbc58f7eec4039ee8f3ec27b2b41d89500debfac Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:53 +0000 Subject: [PATCH 08/30] x86/resctrl: Generate default_ctrl instead of sharing it The struct rdt_resource default_ctrl is used by both the architecture code for resetting the hardware controls, and sometimes by the filesystem code as the default value for the schema, unless the bandwidth software controller is in use. Having the default exposed by the architecture code causes unnecessary duplication for each architecture as the default value must be specified, but can be derived from other schema properties. Now that the maximum bandwidth is explicitly described, resctrl can derive the default value from the schema format and the other resource properties. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-9-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 13 +++++-------- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 5 +++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++--- include/linux/resctrl.h | 19 +++++++++++++++++-- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 4504a12efc971..d001ca43b53d9 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -155,7 +155,6 @@ static inline void cache_alloc_hsw_probe(void) return; hw_res->num_closid = 4; - r->default_ctrl = max_cbm; r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; r->cache.min_cbm_bits = 2; @@ -211,7 +210,6 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; - r->default_ctrl = MAX_MBA_BW; r->membw.max_bw = MAX_MBA_BW; r->membw.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { @@ -250,7 +248,6 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); hw_res->num_closid = edx + 1; - r->default_ctrl = 1 << eax; r->membw.max_bw = 1 << eax; /* AMD does not use delay */ @@ -276,13 +273,13 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) union cpuid_0x10_1_eax eax; union cpuid_0x10_x_ecx ecx; union cpuid_0x10_x_edx edx; - u32 ebx; + u32 ebx, default_ctrl; cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; - r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; - r->cache.shareable_bits = ebx & r->default_ctrl; + default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; + r->cache.shareable_bits = ebx & default_ctrl; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; r->alloc_capable = true; @@ -329,7 +326,7 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) return MAX_MBA_BW - bw; pr_warn_once("Non Linear delay-bw map not supported but queried\n"); - return r->default_ctrl; + return MAX_MBA_BW; } static void mba_wrmsr_intel(struct msr_param *m) @@ -438,7 +435,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) * For Memory Allocation: Set b/w requested to 100% */ for (i = 0; i < hw_res->num_closid; i++, dc++) - *dc = r->default_ctrl; + *dc = resctrl_get_default_ctrl(r); } static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 23a01eaebd587..5d87f279085f7 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -113,8 +113,9 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, */ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) { - unsigned long first_bit, zero_bit, val; + u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1; unsigned int cbm_len = r->cache.cbm_len; + unsigned long first_bit, zero_bit, val; int ret; ret = kstrtoul(buf, 16, &val); @@ -123,7 +124,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) return false; } - if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { + if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) { rdt_last_cmd_puts("Mask out of range\n"); return false; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1e0bae1a9d950..cd8f65c12124b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -978,7 +978,7 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct resctrl_schema *s = of->kn->parent->priv; struct rdt_resource *r = s->res; - seq_printf(seq, "%x\n", r->default_ctrl); + seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r)); return 0; } @@ -2882,7 +2882,7 @@ static int reset_all_ctrls(struct rdt_resource *r) hw_dom = resctrl_to_arch_ctrl_dom(d); for (i = 0; i < hw_res->num_closid; i++) - hw_dom->ctrl_val[i] = r->default_ctrl; + hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r); msr_param.dom = d; smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } @@ -3417,7 +3417,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) } cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = r->default_ctrl; + cfg->new_ctrl = resctrl_get_default_ctrl(r); cfg->have_new_ctrl = true; } } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 465f3cf8c4bcb..d16dc960f1fce 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -216,7 +216,6 @@ enum resctrl_schema_fmt { * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. - * @default_ctrl: Specifies default cache cbm or memory B/W percent. * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events * @cdp_capable: Is the CDP feature available on this resource @@ -233,7 +232,6 @@ struct rdt_resource { struct list_head ctrl_domains; struct list_head mon_domains; char *name; - u32 default_ctrl; enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; bool cdp_capable; @@ -268,6 +266,23 @@ struct resctrl_schema { u32 num_closid; }; +/** + * resctrl_get_default_ctrl() - Return the default control value for this + * resource. + * @r: The resource whose default control type is queried. + */ +static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r) +{ + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + return BIT_MASK(r->cache.cbm_len) - 1; + case RESCTRL_SCHEMA_RANGE: + return r->membw.max_bw; + } + + return WARN_ON_ONCE(1); +} + /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); From aebd5354dd191860033f8599048d3d3998482ee7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:54 +0000 Subject: [PATCH 09/30] x86/resctrl: Add helper for setting CPU default properties rdtgroup_rmdir_ctrl() and rdtgroup_rmdir_mon() set the per-CPU pqr_state for CPUs that were part of the rmdir()'d group. Another architecture might not have a 'pqr_state', its hardware may need the values in a different format. MPAM's equivalent of RMID values are not unique, and always need the CLOSID to be provided too. There is only one caller that modifies a single value, (rdtgroup_rmdir_mon()). MPAM always needs both CLOSID and RMID for the hardware value as these are written to the same system register. As rdtgroup_rmdir_mon() has the CLOSID on hand, only provide a helper to set both values. These values are read by __resctrl_sched_in(), but may be written by a different CPU without any locking, add READ/WRTE_ONCE() to avoid torn values. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-10-james.morse@arm.com --- arch/x86/include/asm/resctrl.h | 14 +++++++++++--- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 20 ++++++++++++++------ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 8b1b6ce1e51b2..6908cd0e6e401 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -4,8 +4,9 @@ #ifdef CONFIG_X86_CPU_RESCTRL -#include #include +#include +#include /* * This value can never be a valid CLOSID, and is used when mapping a @@ -96,8 +97,8 @@ static inline void resctrl_arch_disable_mon(void) static inline void __resctrl_sched_in(struct task_struct *tsk) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); - u32 closid = state->default_closid; - u32 rmid = state->default_rmid; + u32 closid = READ_ONCE(state->default_closid); + u32 rmid = READ_ONCE(state->default_rmid); u32 tmp; /* @@ -132,6 +133,13 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) return val * scale; } +static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, + u32 rmid) +{ + WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid); + WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid); +} + static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index cd8f65c12124b..f706e5a288b15 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3731,14 +3731,21 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + u32 closid, rmid; int cpu; /* Give any tasks back to the parent group */ rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); - /* Update per cpu rmid of the moved CPUs first */ + /* + * Update per cpu closid/rmid of the moved CPUs first. + * Note: the closid will not change, but the arch code still needs it. + */ + closid = prdtgrp->closid; + rmid = prdtgrp->mon.rmid; for_each_cpu(cpu, &rdtgrp->cpu_mask) - per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); + /* * Update the MSR on moved CPUs and CPUs which have moved * task running on them. @@ -3771,6 +3778,7 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { + u32 closid, rmid; int cpu; /* Give any tasks back to the default group */ @@ -3781,10 +3789,10 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); /* Update per cpu closid and rmid of the moved CPUs first */ - for_each_cpu(cpu, &rdtgrp->cpu_mask) { - per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; - per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; - } + closid = rdtgroup_default.closid; + rmid = rdtgroup_default.mon.rmid; + for_each_cpu(cpu, &rdtgrp->cpu_mask) + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); /* * Update the MSR on moved CPUs and CPUs which have moved From 6f06aee356bfd0c817cbe83debedd240fbed0719 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:55 +0000 Subject: [PATCH 10/30] x86/resctrl: Remove rdtgroup from update_cpu_closid_rmid() update_cpu_closid_rmid() takes a struct rdtgroup as an argument, which it uses to update the local CPUs default pqr values. This is a problem once the resctrl parts move out to /fs/, as the arch code cannot poke around inside struct rdtgroup. Rename update_cpu_closid_rmid() as resctrl_arch_sync_cpus_defaults() to be used as the target of an IPI, and pass the effective CLOSID and RMID in a new struct. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-11-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 17 +++++++++++++---- include/linux/resctrl.h | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index f706e5a288b15..62d9a50c7bbaa 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -355,13 +355,13 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, * from update_closid_rmid() is protected against __switch_to() because * preemption is disabled. */ -static void update_cpu_closid_rmid(void *info) +void resctrl_arch_sync_cpu_closid_rmid(void *info) { - struct rdtgroup *r = info; + struct resctrl_cpu_defaults *r = info; if (r) { this_cpu_write(pqr_state.default_closid, r->closid); - this_cpu_write(pqr_state.default_rmid, r->mon.rmid); + this_cpu_write(pqr_state.default_rmid, r->rmid); } /* @@ -376,11 +376,20 @@ static void update_cpu_closid_rmid(void *info) * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, * * Per task closids/rmids must have been set up before calling this function. + * @r may be NULL. */ static void update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) { - on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); + struct resctrl_cpu_defaults defaults, *p = NULL; + + if (r) { + defaults.closid = r->closid; + defaults.rmid = r->mon.rmid; + p = &defaults; + } + + on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1); } static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d16dc960f1fce..31808b3ddecb0 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -266,6 +266,28 @@ struct resctrl_schema { u32 num_closid; }; +struct resctrl_cpu_defaults { + u32 closid; + u32 rmid; +}; + +/** + * resctrl_arch_sync_cpu_closid_rmid() - Refresh this CPU's CLOSID and RMID. + * Call via IPI. + * @info: If non-NULL, a pointer to a struct resctrl_cpu_defaults + * specifying the new CLOSID and RMID for tasks in the default + * resctrl ctrl and mon group when running on this CPU. If NULL, + * this CPU is not re-assigned to a different default group. + * + * Propagates reassignment of CPUs and/or tasks to different resctrl groups + * when requested by the resctrl core code. + * + * This function records the per-cpu defaults specified by @info (if any), + * and then reconfigures the CPU's hardware CLOSID and RMID for subsequent + * execution based on @current, in the same way as during a task switch. + */ +void resctrl_arch_sync_cpu_closid_rmid(void *info); + /** * resctrl_get_default_ctrl() - Return the default control value for this * resource. From 8079565d177f5c4eac6c2ee2ac6c404eee76d500 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:56 +0000 Subject: [PATCH 11/30] x86/resctrl: Expose resctrl fs's init function to the rest of the kernel rdtgroup_init() needs exposing to the rest of the kernel so that arch code can call it once it lives in core code. As this is one of the few functions exposed, rename it to have "resctrl" in the name. The same goes for the exit call. Rename x86's arch code init functions for RDT to have an arch prefix to make it clear these are part of the architecture code. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-12-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 12 ++++++------ arch/x86/kernel/cpu/resctrl/internal.h | 3 --- arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 ++++---- include/linux/resctrl.h | 3 +++ 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index d001ca43b53d9..212995149eace 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -1061,7 +1061,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) } } -static int __init resctrl_late_init(void) +static int __init resctrl_arch_late_init(void) { struct rdt_resource *r; int state, ret; @@ -1084,7 +1084,7 @@ static int __init resctrl_late_init(void) if (state < 0) return state; - ret = rdtgroup_init(); + ret = resctrl_init(); if (ret) { cpuhp_remove_state(state); return ret; @@ -1100,18 +1100,18 @@ static int __init resctrl_late_init(void) return 0; } -late_initcall(resctrl_late_init); +late_initcall(resctrl_arch_late_init); -static void __exit resctrl_exit(void) +static void __exit resctrl_arch_exit(void) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; cpuhp_remove_state(rdt_online); - rdtgroup_exit(); + resctrl_exit(); if (r->mon_capable) rdt_put_mon_l3_config(); } -__exitcall(resctrl_exit); +__exitcall(resctrl_arch_exit); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index f975cd6cfe619..8291f1b599812 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -328,9 +328,6 @@ extern struct list_head rdt_all_groups; extern int max_name_width; -int __init rdtgroup_init(void); -void __exit rdtgroup_exit(void); - /** * struct rftype - describe each file in the resctrl file system * @name: File name diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 58b5b21349a8c..e8388d19a579f 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1027,7 +1027,7 @@ static int dom_data_init(struct rdt_resource *r) /* * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and * are always allocated. These are used for the rdtgroup_default - * control group, which will be setup later in rdtgroup_init(). + * control group, which will be setup later in resctrl_init(). */ idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 62d9a50c7bbaa..b2dad689e780c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4235,14 +4235,14 @@ void resctrl_offline_cpu(unsigned int cpu) } /* - * rdtgroup_init - rdtgroup initialization + * resctrl_init - resctrl filesystem initialization * * Setup resctrl file system including set up root, create mount point, - * register rdtgroup filesystem, and initialize files under root directory. + * register resctrl filesystem, and initialize files under root directory. * * Return: 0 on success or -errno */ -int __init rdtgroup_init(void) +int __init resctrl_init(void) { int ret = 0; @@ -4290,7 +4290,7 @@ int __init rdtgroup_init(void) return ret; } -void __exit rdtgroup_exit(void) +void __exit resctrl_exit(void) { debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 31808b3ddecb0..f1979e375da9a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -402,4 +402,7 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; +int __init resctrl_init(void); +void __exit resctrl_exit(void); + #endif /* _RESCTRL_H */ From e3d5138cefbffa8ea1bf8aab3629268bc3381219 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:57 +0000 Subject: [PATCH 12/30] x86/resctrl: Move rdt_find_domain() to be visible to arch and fs code rdt_find_domain() finds a domain given a resource and a cache-id. This is used by both the architecture code and the filesystem code. After the filesystem code moves to live in /fs/, this helper is either duplicated by all architectures, or needs exposing by the filesystem code. Add the declaration to the global header file. As it's now globally visible, and has only a handful of callers, swap the 'rdt' for 'resctrl'. Move the function to live with its caller in ctrlmondata.c as the filesystem code will not have anything corresponding to core.c. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-13-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 38 +++-------------------- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 24 +++++++++++++- arch/x86/kernel/cpu/resctrl/internal.h | 2 -- include/linux/resctrl.h | 14 +++++++++ 4 files changed, 41 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 212995149eace..850cb8643c4a5 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -394,36 +394,6 @@ void rdt_ctrl_update(void *arg) hw_res->msr_update(m); } -/* - * rdt_find_domain - Search for a domain id in a resource domain list. - * - * Search the domain list to find the domain id. If the domain id is - * found, return the domain. NULL otherwise. If the domain id is not - * found (and NULL returned) then the first domain with id bigger than - * the input id can be returned to the caller via @pos. - */ -struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, - struct list_head **pos) -{ - struct rdt_domain_hdr *d; - struct list_head *l; - - list_for_each(l, h) { - d = list_entry(l, struct rdt_domain_hdr, list); - /* When id is found, return its domain. */ - if (id == d->id) - return d; - /* Stop searching when finding id's position in sorted list. */ - if (id < d->id) - break; - } - - if (pos) - *pos = l; - - return NULL; -} - static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); @@ -534,7 +504,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); + hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos); if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) return; @@ -589,7 +559,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); + hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos); if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) return; @@ -654,7 +624,7 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); + hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL); if (!hdr) { pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", id, cpu, r->name); @@ -700,7 +670,7 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->mon_domains, id, NULL); + hdr = resctrl_find_domain(&r->mon_domains, id, NULL); if (!hdr) { pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", id, cpu, r->name); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 5d87f279085f7..763317ea22565 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -610,6 +610,28 @@ int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of, return ret; } +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos) +{ + struct rdt_domain_hdr *d; + struct list_head *l; + + list_for_each(l, h) { + d = list_entry(l, struct rdt_domain_hdr, list); + /* When id is found, return its domain. */ + if (id == d->id) + return d; + /* Stop searching when finding id's position in sorted list. */ + if (id < d->id) + break; + } + + if (pos) + *pos = l; + + return NULL; +} + void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first) @@ -695,7 +717,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * This file provides data from a single domain. Search * the resource to find the domain with "domid". */ - hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + hdr = resctrl_find_domain(&r->mon_domains, domid, NULL); if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { ret = -ENOENT; goto out; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 8291f1b599812..da73404183da2 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -581,8 +581,6 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn); int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, umode_t mask); -struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, - struct list_head **pos); ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index f1979e375da9a..93d9a435f035f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -372,6 +372,20 @@ static inline void resctrl_arch_rmid_read_context_check(void) might_sleep(); } +/** + * resctrl_find_domain() - Search for a domain id in a resource domain list. + * @h: The domain list to search. + * @id: The domain id to search for. + * @pos: A pointer to position in the list id should be inserted. + * + * Search the domain list to find the domain id. If the domain id is + * found, return the domain. NULL otherwise. If the domain id is not + * found (and NULL returned) then the first domain with id bigger than + * the input id can be returned to the caller via @pos. + */ +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos); + /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. From f16adbaf9272ea7ed5d7bf8b261be8a9be949c31 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:58 +0000 Subject: [PATCH 13/30] x86/resctrl: Move resctrl types to a separate header When resctrl is fully factored into core and per-arch code, each arch will need to use some resctrl common definitions in order to define its own specializations and helpers. Following conventional practice, it would be desirable to put the dependent arch definitions in an header that is included by the common header. However, this can make it awkward to avoid a circular dependency between and the arch header. To avoid such dependencies, move the affected common types and constants into a new header that does not need to depend on or on the arch headers. The same logic applies to the monitor-configuration defines, move these too. Some kind of enumeration for events is needed between the filesystem and architecture code. Take the x86 definition as its convenient for x86. The definition of enum resctrl_event_id is needed to allow the architecture code to define resctrl_arch_mon_ctx_alloc() and resctrl_arch_mon_ctx_free(). The definition of enum resctrl_res_level is needed to allow the architecture code to define resctrl_arch_set_cdp_enabled() and resctrl_arch_get_cdp_enabled(). The bits for mbm_local_bytes_config et al are ABI, and must be the same on all architectures. These are documented in Documentation/arch/x86/resctrl.rst The maintainers entry for these headers was missed when resctrl.h was created. Add a wildcard entry to match both resctrl.h and resctrl_types.h. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-14-james.morse@arm.com --- MAINTAINERS | 1 + arch/x86/include/asm/resctrl.h | 1 + arch/x86/kernel/cpu/resctrl/internal.h | 24 ------------ include/linux/resctrl.h | 21 +--------- include/linux/resctrl_types.h | 54 ++++++++++++++++++++++++++ 5 files changed, 57 insertions(+), 44 deletions(-) create mode 100644 include/linux/resctrl_types.h diff --git a/MAINTAINERS b/MAINTAINERS index ed7aa6867674e..2d0b669e74319 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19894,6 +19894,7 @@ S: Supported F: Documentation/arch/x86/resctrl* F: arch/x86/include/asm/resctrl.h F: arch/x86/kernel/cpu/resctrl/ +F: include/linux/resctrl*.h F: tools/testing/selftests/resctrl/ READ-COPY UPDATE (RCU) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 6908cd0e6e401..52f2326e2b1e6 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -6,6 +6,7 @@ #include #include +#include #include /* diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index da73404183da2..5f3713fb2eaf1 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -32,30 +32,6 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) -/* Reads to Local DRAM Memory */ -#define READS_TO_LOCAL_MEM BIT(0) - -/* Reads to Remote DRAM Memory */ -#define READS_TO_REMOTE_MEM BIT(1) - -/* Non-Temporal Writes to Local Memory */ -#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) - -/* Non-Temporal Writes to Remote Memory */ -#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) - -/* Reads to Local Memory the system identifies as "Slow Memory" */ -#define READS_TO_LOCAL_S_MEM BIT(4) - -/* Reads to Remote Memory the system identifies as "Slow Memory" */ -#define READS_TO_REMOTE_S_MEM BIT(5) - -/* Dirty Victims to All Types of Memory */ -#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) - -/* Max event bits supported */ -#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) - /** * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that * aren't marked nohz_full diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 93d9a435f035f..326f7ba220e79 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,7 @@ #include #include #include +#include /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 @@ -37,28 +38,8 @@ enum resctrl_conf_type { CDP_DATA, }; -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - #define CDP_NUM_TYPES (CDP_DATA + 1) -/* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. - */ -enum resctrl_event_id { - QOS_L3_OCCUP_EVENT_ID = 0x01, - QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, - QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, -}; - /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h new file mode 100644 index 0000000000000..f26450b3326b1 --- /dev/null +++ b/include/linux/resctrl_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Arm Ltd. + * Based on arch/x86/kernel/cpu/resctrl/internal.h + */ + +#ifndef __LINUX_RESCTRL_TYPES_H +#define __LINUX_RESCTRL_TYPES_H + +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + +/* Max event bits supported */ +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) + +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + +#endif /* __LINUX_RESCTRL_TYPES_H */ From 9be68b144a5b539c8503f2944888f7a30e669291 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:36:59 +0000 Subject: [PATCH 14/30] x86/resctrl: Add an arch helper to reset one resource On umount(), resctrl resets each resource back to its default configuration. It only ever does this for all resources in one go. reset_all_ctrls() is architecture specific as it works with struct rdt_hw_resource. Make reset_all_ctrls() an arch helper that resets one resource. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-15-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 9 +++++---- include/linux/resctrl.h | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index b2dad689e780c..9eb57ebb36c64 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2867,7 +2867,7 @@ static int rdt_init_fs_context(struct fs_context *fc) return 0; } -static int reset_all_ctrls(struct rdt_resource *r) +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_ctrl_domain *hw_dom; @@ -2896,7 +2896,7 @@ static int reset_all_ctrls(struct rdt_resource *r) smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } - return 0; + return; } /* @@ -3015,9 +3015,10 @@ static void rdt_kill_sb(struct super_block *sb) rdt_disable_ctx(); - /*Put everything back to default values. */ + /* Put everything back to default values. */ for_each_alloc_capable_rdt_resource(r) - reset_all_ctrls(r); + resctrl_arch_reset_all_ctrls(r); + rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 326f7ba220e79..487b9657a7b52 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -394,6 +394,15 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, */ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); +/** + * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its + * default. + * @r: The resctrl resource to reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; From 011842727fa449f834c17b69d1273d88eb6e3309 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:00 +0000 Subject: [PATCH 15/30] x86/resctrl: Move monitor exit work to a resctrl exit call rdt_put_mon_l3_config() is called via the architecture's resctrl_arch_exit() call, and appears to free the rmid_ptrs[] and closid_num_dirty_rmid[] arrays. In reality this code is marked __exit, and is removed by the linker as resctrl can't be built as a module. To separate the filesystem and architecture parts of resctrl, this free()ing work needs to be triggered by the filesystem, as these structures belong to the filesystem code. Rename rdt_put_mon_l3_config() to resctrl_mon_resource_exit() and call it from resctrl_exit(). The kfree() is currently dependent on r->mon_capable. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-16-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 5 ----- arch/x86/kernel/cpu/resctrl/internal.h | 2 +- arch/x86/kernel/cpu/resctrl/monitor.c | 12 +++++++++--- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 ++ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 850cb8643c4a5..a1577bdd3c901 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -1074,14 +1074,9 @@ late_initcall(resctrl_arch_late_init); static void __exit resctrl_arch_exit(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - cpuhp_remove_state(rdt_online); resctrl_exit(); - - if (r->mon_capable) - rdt_put_mon_l3_config(); } __exitcall(resctrl_arch_exit); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 5f3713fb2eaf1..73005ca2dda1a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -586,7 +586,7 @@ void closid_free(int closid); int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); -void __exit rdt_put_mon_l3_config(void); +void __exit resctrl_mon_resource_exit(void); bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e8388d19a579f..15e8c0190bfc6 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1040,10 +1040,13 @@ static int dom_data_init(struct rdt_resource *r) return err; } -static void __exit dom_data_exit(void) +static void __exit dom_data_exit(struct rdt_resource *r) { mutex_lock(&rdtgroup_mutex); + if (!r->mon_capable) + goto out_unlock; + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { kfree(closid_num_dirty_rmid); closid_num_dirty_rmid = NULL; @@ -1052,6 +1055,7 @@ static void __exit dom_data_exit(void) kfree(rmid_ptrs); rmid_ptrs = NULL; +out_unlock: mutex_unlock(&rdtgroup_mutex); } @@ -1237,9 +1241,11 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } -void __exit rdt_put_mon_l3_config(void) +void __exit resctrl_mon_resource_exit(void) { - dom_data_exit(); + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + dom_data_exit(r); } void __init intel_rdt_mbm_apply_quirk(void) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 9eb57ebb36c64..42c48e79364d6 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4296,4 +4296,6 @@ void __exit resctrl_exit(void) debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); sysfs_remove_mount_point(fs_kobj, "resctrl"); + + resctrl_mon_resource_exit(); } From 4b6bdbf27fcee16944911155293771b880344ef0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:01 +0000 Subject: [PATCH 16/30] x86/resctrl: Move monitor init work to a resctrl init call rdt_get_mon_l3_config() is called from the arch's resctrl_arch_late_init(), and initialises both architecture specific fields, such as hw_res->mon_scale and resctrl filesystem fields by calling dom_data_init(). To separate the filesystem and architecture parts of resctrl, this function needs splitting up. Add resctrl_mon_resource_init() to do the filesystem specific work, and call it from resctrl_init(). This runs later, but is still before the filesystem is mounted and the rmid_ptrs[] array can be used. [ bp: Massage commit message. ] Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-17-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 3 +- arch/x86/kernel/cpu/resctrl/monitor.c | 40 ++++++++++++++++++++------ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 22 +++++++++++++- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 73005ca2dda1a..70fbb902e85ea 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -586,13 +586,14 @@ void closid_free(int closid); int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); -void __exit resctrl_mon_resource_exit(void); +void resctrl_mon_resource_exit(void); bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first); +int __init resctrl_mon_resource_init(void); void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 15e8c0190bfc6..1730ba8148347 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1040,7 +1040,7 @@ static int dom_data_init(struct rdt_resource *r) return err; } -static void __exit dom_data_exit(struct rdt_resource *r) +static void dom_data_exit(struct rdt_resource *r) { mutex_lock(&rdtgroup_mutex); @@ -1176,12 +1176,40 @@ static __init int snc_get_config(void) return ret; } +/** + * resctrl_mon_resource_init() - Initialise global monitoring structures. + * + * Allocate and initialise global monitor resources that do not belong to a + * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists. + * Called once during boot after the struct rdt_resource's have been configured + * but before the filesystem is mounted. + * Resctrl's cpuhp callbacks may be called before this point to bring a domain + * online. + * + * Returns 0 for success, or -ENOMEM. + */ +int __init resctrl_mon_resource_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + int ret; + + if (!r->mon_capable) + return 0; + + ret = dom_data_init(r); + if (ret) + return ret; + + l3_mon_evt_init(r); + + return 0; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; - int ret; snc_nodes_per_l3_cache = snc_get_config(); @@ -1211,10 +1239,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); - ret = dom_data_init(r); - if (ret) - return ret; - if (rdt_cpu_has(X86_FEATURE_BMEC)) { u32 eax, ebx, ecx, edx; @@ -1234,14 +1258,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) } } - l3_mon_evt_init(r); - r->mon_capable = true; return 0; } -void __exit resctrl_mon_resource_exit(void) +void resctrl_mon_resource_exit(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 42c48e79364d6..badac3f5da72f 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4102,6 +4102,19 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d mutex_unlock(&rdtgroup_mutex); } +/** + * domain_setup_mon_state() - Initialise domain monitoring structures. + * @r: The resource for the newly online domain. + * @d: The newly online domain. + * + * Allocate monitor resources that belong to this domain. + * Called when the first CPU of a domain comes online, regardless of whether + * the filesystem is mounted. + * During boot this may be called before global allocations have been made by + * resctrl_mon_resource_init(). + * + * Returns 0 for success, or -ENOMEM. + */ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -4252,10 +4265,16 @@ int __init resctrl_init(void) rdtgroup_setup_default(); - ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + ret = resctrl_mon_resource_init(); if (ret) return ret; + ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + if (ret) { + resctrl_mon_resource_exit(); + return ret; + } + ret = register_filesystem(&rdt_fs_type); if (ret) goto cleanup_mountpoint; @@ -4287,6 +4306,7 @@ int __init resctrl_init(void) cleanup_mountpoint: sysfs_remove_mount_point(fs_kobj, "resctrl"); + resctrl_mon_resource_exit(); return ret; } From 88464bff035ecb28f8bf52dd9728fdc1aca228f9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:02 +0000 Subject: [PATCH 17/30] x86/resctrl: Rewrite and move the for_each_*_rdt_resource() walkers The for_each_*_rdt_resource() helpers walk the architecture's array of structures, using the resctrl visible part as an iterator. These became over-complex when the structures were split into a filesystem and architecture-specific struct. This approach avoided the need to touch every call site, and was done before there was a helper to retrieve a resource by rid. Once the filesystem parts of resctrl are moved to /fs/, both the arch's resource array, and the definition of those structures is no longer accessible. To support resctrl, each architecture would have to provide equally complex macros. Rewrite the macro to make use of resctrl_arch_get_resource(), and move these to include/linux/resctrl.h so existing x86 arch code continues to use them. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-18-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 29 -------------------------- include/linux/resctrl.h | 18 ++++++++++++++++ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 70fbb902e85ea..82dbc1606663e 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -475,14 +475,6 @@ extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; extern enum resctrl_event_id mba_mbps_default_event; -static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) -{ - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); - - hw_res++; - return &hw_res->r_resctrl; -} - static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) { return rdt_resources_all[l].cdp_enabled; @@ -492,27 +484,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); -/* - * To return the common struct rdt_resource, which is contained in struct - * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. - */ -#define for_each_rdt_resource(r) \ - for (r = &rdt_resources_all[0].r_resctrl; \ - r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \ - r = resctrl_inc(r)) - -#define for_each_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_capable || r->mon_capable) - -#define for_each_alloc_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_capable) - -#define for_each_mon_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->mon_capable) - /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ union cpuid_0x10_1_eax { struct { diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 487b9657a7b52..a392480dc4b63 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -26,6 +26,24 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX +/* Walk all possible resources, with variants for only controls or monitors. */ +#define for_each_rdt_resource(_r) \ + for ((_r) = resctrl_arch_get_resource(0); \ + (_r) && (_r)->rid < RDT_NUM_RESOURCES; \ + (_r) = resctrl_arch_get_resource((_r)->rid + 1)) + +#define for_each_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable || (r)->mon_capable) + +#define for_each_alloc_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable) + +#define for_each_mon_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->mon_capable) + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. From d012b66a16618509242a51f5f6c9b19868ba5159 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:03 +0000 Subject: [PATCH 18/30] x86/resctrl: Move the is_mbm_*_enabled() helpers to asm/resctrl.h The architecture specific parts of resctrl provide helpers like is_mbm_total_enabled() and is_mbm_local_enabled() to hide accesses to the rdt_mon_features bitmap. Exposing a group of helpers between the architecture and filesystem code is preferable to a single unsigned-long like rdt_mon_features. Helpers can be more readable and have a well defined behaviour, while allowing architectures to hide more complex behaviour. Once the filesystem parts of resctrl are moved, these existing helpers can no longer live in internal.h. Move them to include/linux/resctrl.h Once these are exposed to the wider kernel, they should have a 'resctrl_arch_' prefix, to fit the rest of the arch<->fs interface. Move and rename the helpers that touch rdt_mon_features directly. is_mbm_event() and is_mbm_enabled() are only called from rdtgroup.c, so can be moved into that file. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-19-james.morse@arm.com --- arch/x86/include/asm/resctrl.h | 16 +++++++++ arch/x86/kernel/cpu/resctrl/core.c | 8 ++--- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 +-- arch/x86/kernel/cpu/resctrl/internal.h | 27 --------------- arch/x86/kernel/cpu/resctrl/monitor.c | 16 ++++----- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 40 +++++++++++++++-------- 6 files changed, 56 insertions(+), 55 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 52f2326e2b1e6..6d4c7ea2c9e34 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -42,6 +42,7 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); extern bool rdt_alloc_capable; extern bool rdt_mon_capable; +extern unsigned int rdt_mon_features; DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); @@ -81,6 +82,21 @@ static inline void resctrl_arch_disable_mon(void) static_branch_dec_cpuslocked(&rdt_enable_key); } +static inline bool resctrl_arch_is_llc_occupancy_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); +} + +static inline bool resctrl_arch_is_mbm_total_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); +} + +static inline bool resctrl_arch_is_mbm_local_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); +} + /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index a1577bdd3c901..eba210db4a7bd 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -453,13 +453,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; - if (is_mbm_total_enabled()) { + if (resctrl_arch_is_mbm_total_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_total); hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_total) return -ENOMEM; } - if (is_mbm_local_enabled()) { + if (resctrl_arch_is_mbm_local_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_local); hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_local) { @@ -908,9 +908,9 @@ static __init bool get_rdt_mon_resources(void) if (!rdt_mon_features) return false; - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; - else if (is_mbm_total_enabled()) + else if (resctrl_arch_is_mbm_total_enabled()) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; return !rdt_get_mon_l3_config(r); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 763317ea22565..1ecc93282b7d7 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -559,12 +559,12 @@ ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, rdt_last_cmd_clear(); if (!strcmp(buf, "mbm_local_bytes")) { - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; else ret = -EINVAL; } else if (!strcmp(buf, "mbm_total_bytes")) { - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; else ret = -EINVAL; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 82dbc1606663e..4a5996d1e0607 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -156,7 +156,6 @@ struct rmid_read { void *arch_mon_ctx; }; -extern unsigned int rdt_mon_features; extern struct list_head resctrl_schema_all; extern bool resctrl_mounted; @@ -406,32 +405,6 @@ struct msr_param { u32 high; }; -static inline bool is_llc_occupancy_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); -} - -static inline bool is_mbm_total_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); -} - -static inline bool is_mbm_local_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); -} - -static inline bool is_mbm_enabled(void) -{ - return (is_mbm_total_enabled() || is_mbm_local_enabled()); -} - -static inline bool is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); -} - /** * struct rdt_hw_resource - arch private attributes of a resctrl resource * @r_resctrl: Attributes of the resource used directly by resctrl. diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 1730ba8148347..d883ed56ec907 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -295,11 +295,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) memset(hw_dom->arch_mbm_local, 0, sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } @@ -569,7 +569,7 @@ void free_rmid(u32 closid, u32 rmid) entry = __rmid_entry(idx); - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) add_rmid_to_limbo(entry); else list_add_tail(&entry->list, &rmid_free_lru); @@ -852,10 +852,10 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, * This is protected from concurrent reads from user as both * the user and overflow handler hold the global mutex. */ - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); } @@ -1085,11 +1085,11 @@ static void l3_mon_evt_init(struct rdt_resource *r) { INIT_LIST_HEAD(&r->evt_list); - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) list_add_tail(&mbm_total_event.list, &r->evt_list); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) list_add_tail(&mbm_local_event.list, &r->evt_list); } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index badac3f5da72f..eb32fbc3abea7 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -117,6 +117,18 @@ void rdt_staged_configs_clear(void) } } +static bool resctrl_is_mbm_enabled(void) +{ + return (resctrl_arch_is_mbm_total_enabled() || + resctrl_arch_is_mbm_local_enabled()); +} + +static bool resctrl_is_mbm_event(int e) +{ + return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && + e <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -164,7 +176,7 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && - is_llc_occupancy_enabled()) { + resctrl_arch_is_llc_occupancy_enabled()) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; @@ -2378,7 +2390,7 @@ static bool supports_mba_mbps(void) struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - return (is_mbm_enabled() && + return (resctrl_is_mbm_enabled() && r->alloc_capable && is_mba_linear() && r->ctrl_scope == rmbm->mon_scope); } @@ -2756,7 +2768,7 @@ static int rdt_get_tree(struct fs_context *fc) if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) resctrl_mounted = true; - if (is_mbm_enabled()) { + if (resctrl_is_mbm_enabled()) { r = resctrl_arch_get_resource(RDT_RESOURCE_L3); list_for_each_entry(dom, &r->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, @@ -3125,7 +3137,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, if (ret) return ret; - if (!do_sum && is_mbm_event(mevt->evtid)) + if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); } @@ -4082,9 +4094,9 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d if (resctrl_mounted && resctrl_arch_mon_capable()) rmdir_mondata_subdir_allrdtgrp(r, d); - if (is_mbm_enabled()) + if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); - if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { + if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { /* * When a package is going down, forcefully * decrement rmid->ebusy. There is no way to know @@ -4120,12 +4132,12 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; - if (is_llc_occupancy_enabled()) { + if (resctrl_arch_is_llc_occupancy_enabled()) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (is_mbm_total_enabled()) { + if (resctrl_arch_is_mbm_total_enabled()) { tsize = sizeof(*d->mbm_total); d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_total) { @@ -4133,7 +4145,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain return -ENOMEM; } } - if (is_mbm_local_enabled()) { + if (resctrl_arch_is_mbm_local_enabled()) { tsize = sizeof(*d->mbm_local); d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_local) { @@ -4172,13 +4184,13 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) if (err) goto out_unlock; - if (is_mbm_enabled()) { + if (resctrl_is_mbm_enabled()) { INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); /* @@ -4233,12 +4245,12 @@ void resctrl_offline_cpu(unsigned int cpu) d = get_mon_domain_from_cpu(cpu, l3); if (d) { - if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { + if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0, cpu); } - if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && - has_busy_rmid(d)) { + if (resctrl_arch_is_llc_occupancy_enabled() && + cpu == d->cqm_work_cpu && has_busy_rmid(d)) { cancel_delayed_work(&d->cqm_limbo); cqm_setup_limbo_handler(d, 0, cpu); } From d81826f87a80a86cc5963ff3c44f05700ded3fc9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:04 +0000 Subject: [PATCH 19/30] x86/resctrl: Add resctrl_arch_is_evt_configurable() to abstract BMEC When BMEC is supported the resctrl event can be configured in a number of ways. This depends on architecture support. rdt_get_mon_l3_config() modifies the struct mon_evt and calls resctrl_file_fflags_init() to create the files that allow the configuration. Splitting this into separate architecture and filesystem parts would require the struct mon_evt and resctrl_file_fflags_init() to be exposed. Instead, add resctrl_arch_is_evt_configurable(), and use this from resctrl_mon_resource_init() to initialise struct mon_evt and call resctrl_file_fflags_init(). resctrl_arch_is_evt_configurable() calls rdt_cpu_has() so it doesn't obviously benefit from being inlined. Putting it in core.c will allow rdt_cpu_has() to eventually become static. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-20-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 15 +++++++++++++++ arch/x86/kernel/cpu/resctrl/monitor.c | 22 +++++++++++----------- include/linux/resctrl.h | 2 ++ 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index eba210db4a7bd..6f0d1bbba9019 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -830,6 +830,21 @@ bool __init rdt_cpu_has(int flag) return ret; } +__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) +{ + if (!rdt_cpu_has(X86_FEATURE_BMEC)) + return false; + + switch (evt) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL); + case QOS_L3_MBM_LOCAL_EVENT_ID: + return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL); + default: + return false; + } +} + static __init bool get_mem_config(void) { struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index d883ed56ec907..27d983121b0a5 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1202,6 +1202,17 @@ int __init resctrl_mon_resource_init(void) l3_mon_evt_init(r); + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { + mbm_total_event.configurable = true; + resctrl_file_fflags_init("mbm_total_bytes_config", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + } + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { + mbm_local_event.configurable = true; + resctrl_file_fflags_init("mbm_local_bytes_config", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + } + return 0; } @@ -1245,17 +1256,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; - - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { - mbm_total_event.configurable = true; - resctrl_file_fflags_init("mbm_total_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); - } - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { - mbm_local_event.configurable = true; - resctrl_file_fflags_init("mbm_local_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); - } } r->mon_capable = true; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index a392480dc4b63..cc76f308e6f38 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -309,6 +309,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. From 650680d651aaf409f097ad904c3fea6c4b3a0884 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:05 +0000 Subject: [PATCH 20/30] x86/resctrl: Change mon_event_config_{read,write}() to be arch helpers mon_event_config_{read,write}() are called via IPI and access model specific registers to do their work. To support another architecture, this needs abstracting. Rename mon_event_config_{read,write}() to have a "resctrl_arch_" prefix, and move their struct mon_config_info parameter into . This allows another architecture to supply an implementation of these. As struct mon_config_info is now exposed globally, give it a 'resctrl_' prefix. MPAM systems need access to the domain to do this work, add the resource and domain to struct resctrl_mon_config_info. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-21-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 46 +++++++++++++------------- include/linux/resctrl.h | 31 +++++++++++++++++ 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index eb32fbc3abea7..e7d1d8b6983da 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1580,11 +1580,6 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, return ret; } -struct mon_config_info { - u32 evtid; - u32 mon_config; -}; - #define INVALID_CONFIG_INDEX UINT_MAX /** @@ -1609,31 +1604,32 @@ static inline unsigned int mon_event_config_index_get(u32 evtid) } } -static void mon_event_config_read(void *info) +void resctrl_arch_mon_event_config_read(void *_config_info) { - struct mon_config_info *mon_info = info; + struct resctrl_mon_config_info *config_info = _config_info; unsigned int index; u64 msrval; - index = mon_event_config_index_get(mon_info->evtid); + index = mon_event_config_index_get(config_info->evtid); if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); + pr_warn_once("Invalid event id %d\n", config_info->evtid); return; } rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); /* Report only the valid event configuration bits */ - mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; + config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; } -static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info) +static void mondata_config_read(struct resctrl_mon_config_info *mon_info) { - smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1); + smp_call_function_any(&mon_info->d->hdr.cpu_mask, + resctrl_arch_mon_event_config_read, mon_info, 1); } static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { - struct mon_config_info mon_info; + struct resctrl_mon_config_info mon_info; struct rdt_mon_domain *dom; bool sep = false; @@ -1644,9 +1640,11 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (sep) seq_puts(s, ";"); - memset(&mon_info, 0, sizeof(struct mon_config_info)); + memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); + mon_info.r = r; + mon_info.d = dom; mon_info.evtid = evtid; - mondata_config_read(dom, &mon_info); + mondata_config_read(&mon_info); seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config); sep = true; @@ -1679,30 +1677,32 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, return 0; } -static void mon_event_config_write(void *info) +void resctrl_arch_mon_event_config_write(void *_config_info) { - struct mon_config_info *mon_info = info; + struct resctrl_mon_config_info *config_info = _config_info; unsigned int index; - index = mon_event_config_index_get(mon_info->evtid); + index = mon_event_config_index_get(config_info->evtid); if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); + pr_warn_once("Invalid event id %d\n", config_info->evtid); return; } - wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); + wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0); } static void mbm_config_write_domain(struct rdt_resource *r, struct rdt_mon_domain *d, u32 evtid, u32 val) { - struct mon_config_info mon_info = {0}; + struct resctrl_mon_config_info mon_info = {0}; /* * Read the current config value first. If both are the same then * no need to write it again. */ + mon_info.r = r; + mon_info.d = d; mon_info.evtid = evtid; - mondata_config_read(d, &mon_info); + mondata_config_read(&mon_info); if (mon_info.mon_config == val) return; @@ -1714,7 +1714,7 @@ static void mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write, + smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write, &mon_info, 1); /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index cc76f308e6f38..90b6563cf5326 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -270,6 +270,13 @@ struct resctrl_cpu_defaults { u32 rmid; }; +struct resctrl_mon_config_info { + struct rdt_resource *r; + struct rdt_mon_domain *d; + u32 evtid; + u32 mon_config; +}; + /** * resctrl_arch_sync_cpu_closid_rmid() - Refresh this CPU's CLOSID and RMID. * Call via IPI. @@ -311,6 +318,30 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); __init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +/** + * resctrl_arch_mon_event_config_write() - Write the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and writes the + * event config_info->mon_config into hardware. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_write(void *config_info); + +/** + * resctrl_arch_mon_event_config_read() - Read the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and reads the + * hardware config value into config_info->mon_config. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_read(void *config_info); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. From 37bae1756734b065f5e5cddc54ad121ff0c8db51 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:06 +0000 Subject: [PATCH 21/30] x86/resctrl: Move mba_mbps_default_event init to filesystem code mba_mbps_default_event is initialised based on whether mbm_local or mbm_total is supported. In the case of both, it is initialised to mbm_local. mba_mbps_default_event is initialised in core.c's get_rdt_mon_resources(), while all the readers are in rdtgroup.c. After this code is split into architecture-specific and filesystem code, get_rdt_mon_resources() remains part of the architecture code, which would mean mba_mbps_default_event has to be exposed by the filesystem code. Move the initialisation to the filesystem's resctrl_mon_resource_init(). Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-22-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 5 ----- arch/x86/kernel/cpu/resctrl/monitor.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 6f0d1bbba9019..b9b74f53b8eef 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -923,11 +923,6 @@ static __init bool get_rdt_mon_resources(void) if (!rdt_mon_features) return false; - if (resctrl_arch_is_mbm_local_enabled()) - mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; - else if (resctrl_arch_is_mbm_total_enabled()) - mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; - return !rdt_get_mon_l3_config(r); } diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 27d983121b0a5..306b06ba32db3 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1213,6 +1213,11 @@ int __init resctrl_mon_resource_init(void) RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } + if (resctrl_arch_is_mbm_local_enabled()) + mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; + else if (resctrl_arch_is_mbm_total_enabled()) + mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; + return 0; } From c32a7d7777800b61a5c9272cc94fe21aa919b305 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:07 +0000 Subject: [PATCH 22/30] x86/resctrl: Move mbm_cfg_mask to struct rdt_resource The mbm_cfg_mask field lists the bits that user-space can set when configuring an event. This value is output via the last_cmd_status file. Once the filesystem parts of resctrl are moved to live in /fs/, the struct rdt_hw_resource is inaccessible to the filesystem code. Because this value is output to user-space, it has to be accessible to the filesystem code. Move it to struct rdt_resource. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-23-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 3 --- arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 5 ++--- include/linux/resctrl.h | 3 +++ 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 4a5996d1e0607..725f223ea07be 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -417,8 +417,6 @@ struct msr_param { * @msr_update: Function pointer to update QOS MSRs * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. - * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth - * Monitoring Event Configuration (BMEC) is supported. * @cdp_enabled: CDP state of this resource * * Members of this structure are either private to the architecture @@ -432,7 +430,6 @@ struct rdt_hw_resource { void (*msr_update)(struct msr_param *m); unsigned int mon_scale; unsigned int mbm_width; - unsigned int mbm_cfg_mask; bool cdp_enabled; }; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 306b06ba32db3..83f90128d7688 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1260,7 +1260,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); - hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; + r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } r->mon_capable = true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e7d1d8b6983da..a388ef66ef4ca 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1731,7 +1731,6 @@ static void mbm_config_write_domain(struct rdt_resource *r, static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); char *dom_str = NULL, *id_str; unsigned long dom_id, val; struct rdt_mon_domain *d; @@ -1758,9 +1757,9 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) } /* Value from user cannot be more than the supported set of events */ - if ((val & hw_res->mbm_cfg_mask) != val) { + if ((val & r->mbm_cfg_mask) != val) { rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - hw_res->mbm_cfg_mask); + r->mbm_cfg_mask); return -EINVAL; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 90b6563cf5326..914df24ffe483 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -217,6 +217,8 @@ enum resctrl_schema_fmt { * @name: Name to use in "schemata" file. * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events + * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth + * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { @@ -233,6 +235,7 @@ struct rdt_resource { char *name; enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; + unsigned int mbm_cfg_mask; bool cdp_capable; }; From 7d0ec14c64a107a548616deace93a1913e5d68ed Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:08 +0000 Subject: [PATCH 23/30] x86/resctrl: Add resctrl_arch_ prefix to pseudo lock functions resctrl's pseudo lock has some copy-to-cache and measurement functions that are micro-architecture specific. For example, pseudo_lock_fn() is not at all portable. Label these 'resctrl_arch_' so they stay under /arch/x86. To expose these functions to the filesystem code they need an entry in a header file, and can't be marked static. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-24-james.morse@arm.com --- arch/x86/include/asm/resctrl.h | 5 ++++ arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 36 ++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 6d4c7ea2c9e34..86407dbde583e 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -203,6 +203,11 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; +u64 resctrl_arch_get_prefetch_disable_bits(void); +int resctrl_arch_pseudo_lock_fn(void *_rdtgrp); +int resctrl_arch_measure_cycles_lat_fn(void *_plr); +int resctrl_arch_measure_l2_residency(void *_plr); +int resctrl_arch_measure_l3_residency(void *_plr); void resctrl_cpu_detect(struct cpuinfo_x86 *c); #else diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 42cc162f7fc91..1f42c1190d26d 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -61,7 +61,8 @@ static const struct class pseudo_lock_class = { }; /** - * get_prefetch_disable_bits - prefetch disable bits of supported platforms + * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported + * platforms * @void: It takes no parameters. * * Capture the list of platforms that have been validated to support @@ -75,13 +76,13 @@ static const struct class pseudo_lock_class = { * in the SDM. * * When adding a platform here also add support for its cache events to - * measure_cycles_perf_fn() + * resctrl_arch_measure_l*_residency() * * Return: * If platform is supported, the bits to disable hardware prefetchers, 0 * if platform is not supported. */ -static u64 get_prefetch_disable_bits(void) +u64 resctrl_arch_get_prefetch_disable_bits(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) @@ -408,7 +409,7 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) } /** - * pseudo_lock_fn - Load kernel memory into cache + * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache * @_rdtgrp: resource group to which pseudo-lock region belongs * * This is the core pseudo-locking flow. @@ -426,7 +427,7 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -static int pseudo_lock_fn(void *_rdtgrp) +int resctrl_arch_pseudo_lock_fn(void *_rdtgrp) { struct rdtgroup *rdtgrp = _rdtgrp; struct pseudo_lock_region *plr = rdtgrp->plr; @@ -712,7 +713,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * Not knowing the bits to disable prefetching implies that this * platform does not support Cache Pseudo-Locking. */ - prefetch_disable_bits = get_prefetch_disable_bits(); + prefetch_disable_bits = resctrl_arch_get_prefetch_disable_bits(); if (prefetch_disable_bits == 0) { rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; @@ -872,7 +873,8 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) } /** - * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory + * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read + * pseudo-locked memory * @_plr: pseudo-lock region to measure * * There is no deterministic way to test if a memory region is cached. One @@ -885,7 +887,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -static int measure_cycles_lat_fn(void *_plr) +int resctrl_arch_measure_cycles_lat_fn(void *_plr) { struct pseudo_lock_region *plr = _plr; u32 saved_low, saved_high; @@ -1069,7 +1071,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, return 0; } -static int measure_l2_residency(void *_plr) +int resctrl_arch_measure_l2_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1107,7 +1109,7 @@ static int measure_l2_residency(void *_plr) return 0; } -static int measure_l3_residency(void *_plr) +int resctrl_arch_measure_l3_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1205,14 +1207,14 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) plr->cpu = cpu; if (sel == 1) - thread = kthread_run_on_cpu(measure_cycles_lat_fn, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn, + plr, cpu, "pseudo_lock_measure/%u"); else if (sel == 2) - thread = kthread_run_on_cpu(measure_l2_residency, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency, + plr, cpu, "pseudo_lock_measure/%u"); else if (sel == 3) - thread = kthread_run_on_cpu(measure_l3_residency, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency, + plr, cpu, "pseudo_lock_measure/%u"); else goto out; @@ -1307,7 +1309,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) plr->thread_done = 0; - thread = kthread_run_on_cpu(pseudo_lock_fn, rdtgrp, + thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, rdtgrp, plr->cpu, "pseudo_lock/%u"); if (IS_ERR(thread)) { ret = PTR_ERR(thread); From 7028840552a220ac5efe51a4b554195cd954a912 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:09 +0000 Subject: [PATCH 24/30] x86/resctrl: Allow an architecture to disable pseudo lock Pseudo-lock relies on knowledge of the micro-architecture to disable prefetchers etc. On arm64 these controls are typically secure only, meaning Linux can't access them. Arm's cache-lockdown feature works in a very different way. Resctrl's pseudo-lock isn't going to be used on arm64 platforms. Add a Kconfig symbol that can be selected by the architecture. This enables or disables building of the pseudo_lock.c file, and replaces the functions with stubs. An additional IS_ENABLED() check is needed in rdtgroup_mode_write() so that attempting to enable pseudo-lock reports an "Unknown or unsupported mode" to user-space via the last_cmd_status file. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-25-james.morse@arm.com --- arch/x86/Kconfig | 7 ++++ arch/x86/kernel/cpu/resctrl/Makefile | 5 +-- arch/x86/kernel/cpu/resctrl/internal.h | 49 +++++++++++++++++++++----- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 +- 4 files changed, 53 insertions(+), 11 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0e27ebd7e36a9..3cb3acdbc1071 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -505,6 +505,7 @@ config X86_CPU_RESCTRL depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS select PROC_CPU_RESCTRL if PROC_FS + select RESCTRL_FS_PSEUDO_LOCK help Enable x86 CPU resource control support. @@ -521,6 +522,12 @@ config X86_CPU_RESCTRL Say N if unsure. +config RESCTRL_FS_PSEUDO_LOCK + bool + help + Software mechanism to pin data in a cache portion using + micro-architecture specific knowledge. + config X86_FRED bool "Flexible Return and Event Delivery" depends on X86_64 diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 4a06c37b9cf11..0c13b0befd8a9 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o +obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 725f223ea07be..8d35bb423aadd 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -512,14 +512,6 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); -int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); -int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); -int rdt_pseudo_lock_init(void); -void rdt_pseudo_lock_release(void); -int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); -void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r); struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); @@ -551,4 +543,45 @@ void resctrl_file_fflags_init(const char *config, unsigned long fflags); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); + +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); +int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); +int rdt_pseudo_lock_init(void); +void rdt_pseudo_lock_release(void); +int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); +void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); +#else +static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) +{ + return false; +} + +static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) +{ + return false; +} + +static inline int rdt_pseudo_lock_init(void) { return 0; } +static inline void rdt_pseudo_lock_release(void) { } +static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ + #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index a388ef66ef4ca..e59271515a463 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1453,7 +1453,8 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, goto out; } rdtgrp->mode = RDT_MODE_EXCLUSIVE; - } else if (!strcmp(buf, "pseudo-locksetup")) { + } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) && + !strcmp(buf, "pseudo-locksetup")) { ret = rdtgroup_locksetup_enter(rdtgrp); if (ret) goto out; From 4d20f38ab6d922dd6b8a33795b6e72516d733eb2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:10 +0000 Subject: [PATCH 25/30] x86/resctrl: Make prefetch_disable_bits belong to the arch code prefetch_disable_bits is set by rdtgroup_locksetup_enter() from a value provided by the architecture, but is largely read by other architecture helpers. Make resctrl_arch_get_prefetch_disable_bits() set prefetch_disable_bits so that it can be isolated to arch-code from where the other arch-code helpers can use its cached value. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-26-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 1f42c1190d26d..90044a01d0003 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -84,6 +84,8 @@ static const struct class pseudo_lock_class = { */ u64 resctrl_arch_get_prefetch_disable_bits(void) { + prefetch_disable_bits = 0; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) return 0; @@ -99,7 +101,8 @@ u64 resctrl_arch_get_prefetch_disable_bits(void) * 3 DCU IP Prefetcher Disable (R/W) * 63:4 Reserved */ - return 0xF; + prefetch_disable_bits = 0xF; + break; case INTEL_ATOM_GOLDMONT: case INTEL_ATOM_GOLDMONT_PLUS: /* @@ -110,10 +113,11 @@ u64 resctrl_arch_get_prefetch_disable_bits(void) * 2 DCU Hardware Prefetcher Disable (R/W) * 63:3 Reserved */ - return 0x5; + prefetch_disable_bits = 0x5; + break; } - return 0; + return prefetch_disable_bits; } /** @@ -713,8 +717,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * Not knowing the bits to disable prefetching implies that this * platform does not support Cache Pseudo-Locking. */ - prefetch_disable_bits = resctrl_arch_get_prefetch_disable_bits(); - if (prefetch_disable_bits == 0) { + if (resctrl_arch_get_prefetch_disable_bits() == 0) { rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; } From 4cf9acfc8f1a322576f0666b882d631cfdf714bf Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:11 +0000 Subject: [PATCH 26/30] x86/resctrl: Make resctrl_arch_pseudo_lock_fn() take a plr resctrl_arch_pseudo_lock_fn() has architecture specific behaviour, and takes a struct rdtgroup as an argument. After the filesystem code moves to /fs/, the definition of struct rdtgroup will not be available to the architecture code. The only reason resctrl_arch_pseudo_lock_fn() wants the rdtgroup is for the CLOSID. Embed that in the pseudo_lock_region as a closid, and move the definition of struct pseudo_lock_region to resctrl.h. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-27-james.morse@arm.com --- arch/x86/include/asm/resctrl.h | 2 +- arch/x86/kernel/cpu/resctrl/internal.h | 37 --------------------- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 12 +++---- include/linux/resctrl.h | 39 +++++++++++++++++++++++ 4 files changed, 46 insertions(+), 44 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 86407dbde583e..011bf67a18664 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -204,7 +204,7 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; u64 resctrl_arch_get_prefetch_disable_bits(void); -int resctrl_arch_pseudo_lock_fn(void *_rdtgrp); +int resctrl_arch_pseudo_lock_fn(void *_plr); int resctrl_arch_measure_cycles_lat_fn(void *_plr); int resctrl_arch_measure_l2_residency(void *_plr); int resctrl_arch_measure_l3_residency(void *_plr); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 8d35bb423aadd..0d13006e920b3 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -208,43 +208,6 @@ struct mongroup { u32 rmid; }; -/** - * struct pseudo_lock_region - pseudo-lock region information - * @s: Resctrl schema for the resource to which this - * pseudo-locked region belongs - * @d: RDT domain to which this pseudo-locked region - * belongs - * @cbm: bitmask of the pseudo-locked region - * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread - * completion - * @thread_done: variable used by waitqueue to test if pseudo-locking - * thread completed - * @cpu: core associated with the cache on which the setup code - * will be run - * @line_size: size of the cache lines - * @size: size of pseudo-locked region in bytes - * @kmem: the kernel memory associated with pseudo-locked region - * @minor: minor number of character device associated with this - * region - * @debugfs_dir: pointer to this region's directory in the debugfs - * filesystem - * @pm_reqs: Power management QoS requests related to this region - */ -struct pseudo_lock_region { - struct resctrl_schema *s; - struct rdt_ctrl_domain *d; - u32 cbm; - wait_queue_head_t lock_thread_wq; - int thread_done; - int cpu; - unsigned int line_size; - unsigned int size; - void *kmem; - unsigned int minor; - struct dentry *debugfs_dir; - struct list_head pm_reqs; -}; - /** * struct rdtgroup - store rdtgroup's data in resctrl file system. * @kn: kernfs node diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 90044a01d0003..01fa7890b43f5 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -414,7 +414,7 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) /** * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache - * @_rdtgrp: resource group to which pseudo-lock region belongs + * @_plr: the pseudo-lock region descriptor * * This is the core pseudo-locking flow. * @@ -431,10 +431,9 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -int resctrl_arch_pseudo_lock_fn(void *_rdtgrp) +int resctrl_arch_pseudo_lock_fn(void *_plr) { - struct rdtgroup *rdtgrp = _rdtgrp; - struct pseudo_lock_region *plr = rdtgrp->plr; + struct pseudo_lock_region *plr = _plr; u32 rmid_p, closid_p; unsigned long i; u64 saved_msr; @@ -494,7 +493,8 @@ int resctrl_arch_pseudo_lock_fn(void *_rdtgrp) * pseudo-locked followed by reading of kernel memory to load it * into the cache. */ - __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid); + __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid); + /* * Cache was flushed earlier. Now access kernel memory to read it * into cache region associated with just activated plr->closid. @@ -1312,7 +1312,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) plr->thread_done = 0; - thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, rdtgrp, + thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr, plr->cpu, "pseudo_lock/%u"); if (IS_ERR(thread)) { ret = PTR_ERR(thread); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 914df24ffe483..226cc5c0d765f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -58,6 +58,45 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) +/* + * struct pseudo_lock_region - pseudo-lock region information + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs + * @closid: The closid that this pseudo-locked region uses + * @d: RDT domain to which this pseudo-locked region + * belongs + * @cbm: bitmask of the pseudo-locked region + * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread + * completion + * @thread_done: variable used by waitqueue to test if pseudo-locking + * thread completed + * @cpu: core associated with the cache on which the setup code + * will be run + * @line_size: size of the cache lines + * @size: size of pseudo-locked region in bytes + * @kmem: the kernel memory associated with pseudo-locked region + * @minor: minor number of character device associated with this + * region + * @debugfs_dir: pointer to this region's directory in the debugfs + * filesystem + * @pm_reqs: Power management QoS requests related to this region + */ +struct pseudo_lock_region { + struct resctrl_schema *s; + u32 closid; + struct rdt_ctrl_domain *d; + u32 cbm; + wait_queue_head_t lock_thread_wq; + int thread_done; + int cpu; + unsigned int line_size; + unsigned int size; + void *kmem; + unsigned int minor; + struct dentry *debugfs_dir; + struct list_head pm_reqs; +}; + /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded From 373af4ecfdc922657be081b3feebbd0af8e7fa65 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:12 +0000 Subject: [PATCH 27/30] x86/resctrl: Move RFTYPE flags to be managed by resctrl resctrl_file_fflags_init() is called from the architecture specific code to make the 'thread_throttle_mode' file visible. The architecture specific code has already set the membw.throttle_mode in the rdt_resource. This forces the RFTYPE flags used by resctrl to be exposed to the architecture specific code. This doesn't need to be specific to the architecture, the throttle_mode can be used by resctrl to determine if the 'thread_throttle_mode' file should be visible. This allows the RFTYPE flags to be private to resctrl. Add thread_throttle_mode_init(), and use it to call resctrl_file_fflags_init() from resctrl_init(). This avoids publishing an extra function between the architecture and filesystem code. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-28-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 3 --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index b9b74f53b8eef..e590dd347fa7c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -227,9 +227,6 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) else r->membw.throttle_mode = THREAD_THROTTLE_MAX; - resctrl_file_fflags_init("thread_throttle_mode", - RFTYPE_CTRL_INFO | RFTYPE_RES_MB); - r->alloc_capable = true; return true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e59271515a463..58feba3feefd8 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2064,6 +2064,16 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } +static void thread_throttle_mode_init(void) +{ + struct rdt_resource *r_mba; + + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + if (r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + resctrl_file_fflags_init("thread_throttle_mode", + RFTYPE_CTRL_INFO | RFTYPE_RES_MB); +} + void resctrl_file_fflags_init(const char *config, unsigned long fflags) { struct rftype *rft; @@ -4277,6 +4287,8 @@ int __init resctrl_init(void) rdtgroup_setup_default(); + thread_throttle_mode_init(); + ret = resctrl_mon_resource_init(); if (ret) return ret; From 6c2282d42cb37f081587412f77340138b1df265e Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:13 +0000 Subject: [PATCH 28/30] x86/resctrl: Handle throttle_mode for SMBA resources Now that the visibility of throttle_mode is being managed by resctrl, it should consider resources other than MBA that may have a throttle_mode. SMBA is one such resource. Extend thread_throttle_mode_init() to check SMBA for a throttle_mode. Adding support for multiple resources means it is possible for a platform with both MBA and SMBA, but an undefined throttle_mode on one of them to make the file visible. Add the 'undefined' case to rdt_thread_throttle_mode_show(). Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-29-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 33 +++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 58feba3feefd8..5fc60c9ce28f8 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1188,10 +1188,19 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct resctrl_schema *s = of->kn->parent->priv; struct rdt_resource *r = s->res; - if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) + switch (r->membw.throttle_mode) { + case THREAD_THROTTLE_PER_THREAD: seq_puts(seq, "per-thread\n"); - else + return 0; + case THREAD_THROTTLE_MAX: seq_puts(seq, "max\n"); + return 0; + case THREAD_THROTTLE_UNDEFINED: + seq_puts(seq, "undefined\n"); + return 0; + } + + WARN_ON_ONCE(1); return 0; } @@ -2066,12 +2075,24 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) static void thread_throttle_mode_init(void) { - struct rdt_resource *r_mba; + enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED; + struct rdt_resource *r_mba, *r_smba; r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - if (r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) - resctrl_file_fflags_init("thread_throttle_mode", - RFTYPE_CTRL_INFO | RFTYPE_RES_MB); + if (r_mba->alloc_capable && + r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_mba->membw.throttle_mode; + + r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA); + if (r_smba->alloc_capable && + r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_smba->membw.throttle_mode; + + if (throttle_mode == THREAD_THROTTLE_UNDEFINED) + return; + + resctrl_file_fflags_init("thread_throttle_mode", + RFTYPE_CTRL_INFO | RFTYPE_RES_MB); } void resctrl_file_fflags_init(const char *config, unsigned long fflags) From f62b4e45e0b467cba75ae816338b996c6dc4dafa Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:14 +0000 Subject: [PATCH 29/30] x86/resctrl: Move get_config_index() to a header get_config_index() is used by the architecture specific code to map a CLOSID+type pair to an index in the configuration arrays. MPAM needs to do this too to preserve the ABI to user-space, there is no reason to do it differently. Move the helper to a header file to allow all architectures that either use or emulate CDP to use the same pattern of CLOSID values. Moving this to a header file means it must be marked inline, which matches the existing compiler choice for this static function. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Tested-by: Carl Worth # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-30-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 19 +++---------------- include/linux/resctrl.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 1ecc93282b7d7..0a0ac5f6112ec 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -287,25 +287,12 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } -static u32 get_config_index(u32 closid, enum resctrl_conf_type type) -{ - switch (type) { - default: - case CDP_NONE: - return closid; - case CDP_CODE: - return closid * 2 + 1; - case CDP_DATA: - return closid * 2; - } -} - int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - u32 idx = get_config_index(closid, t); + u32 idx = resctrl_get_config_index(closid, t); struct msr_param msr_param; if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) @@ -342,7 +329,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) if (!cfg->have_new_ctrl) continue; - idx = get_config_index(closid, t); + idx = resctrl_get_config_index(closid, t); if (cfg->new_ctrl == hw_dom->ctrl_val[idx]) continue; hw_dom->ctrl_val[idx] = cfg->new_ctrl; @@ -462,7 +449,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); - u32 idx = get_config_index(closid, type); + u32 idx = resctrl_get_config_index(closid, type); return hw_dom->ctrl_val[idx]; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 226cc5c0d765f..880351ca3dfcb 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -384,6 +384,21 @@ void resctrl_arch_mon_event_config_write(void *config_info); */ void resctrl_arch_mon_event_config_read(void *config_info); +/* For use by arch code to remap resctrl's smaller CDP CLOSID range */ +static inline u32 resctrl_get_config_index(u32 closid, + enum resctrl_conf_type type) +{ + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. From 823beb31e55673bf2fd21374e095eec73a761ee7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 11 Mar 2025 18:37:15 +0000 Subject: [PATCH 30/30] x86/resctrl: Move get_{mon,ctrl}_domain_from_cpu() to live with their callers Each of get_{mon,ctrl}_domain_from_cpu() only has one caller. Once the filesystem code is moved to /fs/, there is no equivalent to core.c. Move these functions to each live next to their caller. This allows them to be made static and the header file entries to be removed. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Reviewed-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Shaopeng Tan Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Link: https://lore.kernel.org/r/20250311183715.16445-31-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 30 -------------------------- arch/x86/kernel/cpu/resctrl/internal.h | 2 -- arch/x86/kernel/cpu/resctrl/monitor.c | 16 ++++++++++++++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 16 ++++++++++++++ 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e590dd347fa7c..cf29681d01e04 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -347,36 +347,6 @@ static void cat_wrmsr(struct msr_param *m) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } -struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_ctrl_domain *d; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->ctrl_domains, hdr.list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) - return d; - } - - return NULL; -} - -struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_mon_domain *d; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->mon_domains, hdr.list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) - return d; - } - - return NULL; -} - u32 resctrl_arch_get_num_closid(struct rdt_resource *r) { return resctrl_to_arch_res(r)->num_closid; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 0d13006e920b3..c44c5b4963558 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -475,8 +475,6 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); -struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r); -struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); void closid_free(int closid); int alloc_rmid(u32 closid); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 83f90128d7688..a93ed7d2a1602 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -718,6 +718,22 @@ void mon_event_count(void *info) rr->err = 0; } +static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, + struct rdt_resource *r) +{ + struct rdt_ctrl_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + /* * Feedback loop for MBA software controller (mba_sc) * diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 5fc60c9ce28f8..c6274d40b217c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4257,6 +4257,22 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) } } +static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, + struct rdt_resource *r) +{ + struct rdt_mon_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + void resctrl_offline_cpu(unsigned int cpu) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);