diff --git a/MAINTAINERS b/MAINTAINERS index 8dc826e00a0d8..54859024f5e06 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19934,6 +19934,7 @@ S: Supported F: Documentation/arch/x86/resctrl* F: arch/x86/include/asm/resctrl.h F: arch/x86/kernel/cpu/resctrl/ +F: include/linux/resctrl*.h F: tools/testing/selftests/resctrl/ READ-COPY UPDATE (RCU) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1665ebaba2514..05b4eca156cf3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -498,6 +498,7 @@ config X86_CPU_RESCTRL depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS select PROC_CPU_RESCTRL if PROC_FS + select RESCTRL_FS_PSEUDO_LOCK help Enable x86 CPU resource control support. @@ -514,6 +515,12 @@ config X86_CPU_RESCTRL Say N if unsure. +config RESCTRL_FS_PSEUDO_LOCK + bool + help + Software mechanism to pin data in a cache portion using + micro-architecture specific knowledge. + config X86_FRED bool "Flexible Return and Event Delivery" depends on X86_64 diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 8b1b6ce1e51b2..011bf67a18664 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -4,8 +4,10 @@ #ifdef CONFIG_X86_CPU_RESCTRL -#include #include +#include +#include +#include /* * This value can never be a valid CLOSID, and is used when mapping a @@ -40,6 +42,7 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); extern bool rdt_alloc_capable; extern bool rdt_mon_capable; +extern unsigned int rdt_mon_features; DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); @@ -79,6 +82,21 @@ static inline void resctrl_arch_disable_mon(void) static_branch_dec_cpuslocked(&rdt_enable_key); } +static inline bool resctrl_arch_is_llc_occupancy_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); +} + +static inline bool resctrl_arch_is_mbm_total_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); +} + +static inline bool resctrl_arch_is_mbm_local_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); +} + /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * @@ -96,8 +114,8 @@ static inline void resctrl_arch_disable_mon(void) static inline void __resctrl_sched_in(struct task_struct *tsk) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); - u32 closid = state->default_closid; - u32 rmid = state->default_rmid; + u32 closid = READ_ONCE(state->default_closid); + u32 rmid = READ_ONCE(state->default_rmid); u32 tmp; /* @@ -132,6 +150,13 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) return val * scale; } +static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, + u32 rmid) +{ + WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid); + WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid); +} + static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) { @@ -178,6 +203,11 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; +u64 resctrl_arch_get_prefetch_disable_bits(void); +int resctrl_arch_pseudo_lock_fn(void *_plr); +int resctrl_arch_measure_cycles_lat_fn(void *_plr); +int resctrl_arch_measure_l2_residency(void *_plr); +int resctrl_arch_measure_l3_residency(void *_plr); void resctrl_cpu_detect(struct cpuinfo_x86 *c); #else diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 4a06c37b9cf11..0c13b0befd8a9 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o +obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 3d1735ed8d1ff..cf29681d01e04 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -43,12 +43,6 @@ static DEFINE_MUTEX(domain_list_lock); */ DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); -/* - * Used to store the max resource name width and max resource data width - * to display the schemata in a tabular format - */ -int max_name_width, max_data_width; - /* * Global boolean for rdt_alloc which is true if any * resource allocation is enabled. @@ -62,7 +56,7 @@ static void mba_wrmsr_amd(struct msr_param *m); #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) -struct rdt_hw_resource rdt_resources_all[] = { +struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { [RDT_RESOURCE_L3] = { .r_resctrl = { @@ -72,9 +66,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .mon_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), .mon_domains = mon_domain_init(RDT_RESOURCE_L3), - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, + .schema_fmt = RESCTRL_SCHEMA_BITMAP, }, .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, @@ -86,9 +78,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "L2", .ctrl_scope = RESCTRL_L2_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, + .schema_fmt = RESCTRL_SCHEMA_BITMAP, }, .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, @@ -100,9 +90,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "MB", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), - .parse_ctrlval = parse_bw, - .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, + .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, [RDT_RESOURCE_SMBA] = @@ -112,9 +100,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "SMBA", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), - .parse_ctrlval = parse_bw, - .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, + .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, }; @@ -127,6 +113,14 @@ u32 resctrl_arch_system_num_rmid_idx(void) return r->num_rmid; } +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) +{ + if (l >= RDT_NUM_RESOURCES) + return NULL; + + return &rdt_resources_all[l].r_resctrl; +} + /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. @@ -161,7 +155,6 @@ static inline void cache_alloc_hsw_probe(void) return; hw_res->num_closid = 4; - r->default_ctrl = max_cbm; r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; r->cache.min_cbm_bits = 2; @@ -174,7 +167,7 @@ static inline void cache_alloc_hsw_probe(void) bool is_mba_sc(struct rdt_resource *r) { if (!r) - return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; + r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); /* * The software controller support is only applicable to MBA resource. @@ -217,7 +210,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; - r->default_ctrl = MAX_MBA_BW; + r->membw.max_bw = MAX_MBA_BW; r->membw.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; @@ -228,16 +221,12 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) return false; r->membw.arch_needs_linear = false; } - r->data_width = 3; if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; else r->membw.throttle_mode = THREAD_THROTTLE_MAX; - resctrl_file_fflags_init("thread_throttle_mode", - RFTYPE_CTRL_INFO | RFTYPE_RES_MB); - r->alloc_capable = true; return true; @@ -256,7 +245,7 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); hw_res->num_closid = edx + 1; - r->default_ctrl = 1 << eax; + r->membw.max_bw = 1 << eax; /* AMD does not use delay */ r->membw.delay_linear = false; @@ -269,8 +258,6 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = 0; r->membw.bw_gran = 1; - /* Max value is 2048, Data width should be 4 in decimal */ - r->data_width = 4; r->alloc_capable = true; @@ -283,14 +270,13 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) union cpuid_0x10_1_eax eax; union cpuid_0x10_x_ecx ecx; union cpuid_0x10_x_edx edx; - u32 ebx; + u32 ebx, default_ctrl; cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; - r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; - r->cache.shareable_bits = ebx & r->default_ctrl; - r->data_width = (r->cache.cbm_len + 3) / 4; + default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; + r->cache.shareable_bits = ebx & default_ctrl; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; r->alloc_capable = true; @@ -337,7 +323,7 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) return MAX_MBA_BW - bw; pr_warn_once("Non Linear delay-bw map not supported but queried\n"); - return r->default_ctrl; + return MAX_MBA_BW; } static void mba_wrmsr_intel(struct msr_param *m) @@ -361,36 +347,6 @@ static void cat_wrmsr(struct msr_param *m) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } -struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_ctrl_domain *d; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->ctrl_domains, hdr.list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) - return d; - } - - return NULL; -} - -struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_mon_domain *d; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->mon_domains, hdr.list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) - return d; - } - - return NULL; -} - u32 resctrl_arch_get_num_closid(struct rdt_resource *r) { return resctrl_to_arch_res(r)->num_closid; @@ -405,36 +361,6 @@ void rdt_ctrl_update(void *arg) hw_res->msr_update(m); } -/* - * rdt_find_domain - Search for a domain id in a resource domain list. - * - * Search the domain list to find the domain id. If the domain id is - * found, return the domain. NULL otherwise. If the domain id is not - * found (and NULL returned) then the first domain with id bigger than - * the input id can be returned to the caller via @pos. - */ -struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, - struct list_head **pos) -{ - struct rdt_domain_hdr *d; - struct list_head *l; - - list_for_each(l, h) { - d = list_entry(l, struct rdt_domain_hdr, list); - /* When id is found, return its domain. */ - if (id == d->id) - return d; - /* Stop searching when finding id's position in sorted list. */ - if (id < d->id) - break; - } - - if (pos) - *pos = l; - - return NULL; -} - static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); @@ -446,7 +372,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) * For Memory Allocation: Set b/w requested to 100% */ for (i = 0; i < hw_res->num_closid; i++, dc++) - *dc = r->default_ctrl; + *dc = resctrl_get_default_ctrl(r); } static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) @@ -494,13 +420,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; - if (is_mbm_total_enabled()) { + if (resctrl_arch_is_mbm_total_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_total); hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_total) return -ENOMEM; } - if (is_mbm_local_enabled()) { + if (resctrl_arch_is_mbm_local_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_local); hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_local) { @@ -545,7 +471,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); + hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos); if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) return; @@ -600,7 +526,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); + hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos); if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) return; @@ -665,7 +591,7 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); + hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL); if (!hdr) { pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", id, cpu, r->name); @@ -711,7 +637,7 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->mon_domains, id, NULL); + hdr = resctrl_find_domain(&r->mon_domains, id, NULL); if (!hdr) { pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", id, cpu, r->name); @@ -786,20 +712,6 @@ static int resctrl_arch_offline_cpu(unsigned int cpu) return 0; } -/* - * Choose a width for the resource name and resource data based on the - * resource that has widest name and cbm. - */ -static __init void rdt_init_padding(void) -{ - struct rdt_resource *r; - - for_each_alloc_capable_rdt_resource(r) { - if (r->data_width > max_data_width) - max_data_width = r->data_width; - } -} - enum { RDT_FLAG_CMT, RDT_FLAG_MBM_TOTAL, @@ -885,6 +797,21 @@ bool __init rdt_cpu_has(int flag) return ret; } +__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) +{ + if (!rdt_cpu_has(X86_FEATURE_BMEC)) + return false; + + switch (evt) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL); + case QOS_L3_MBM_LOCAL_EVENT_ID: + return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL); + default: + return false; + } +} + static __init bool get_mem_config(void) { struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; @@ -963,11 +890,6 @@ static __init bool get_rdt_mon_resources(void) if (!rdt_mon_features) return false; - if (is_mbm_local_enabled()) - mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; - else if (is_mbm_total_enabled()) - mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; - return !rdt_get_mon_l3_config(r); } @@ -1086,7 +1008,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) } } -static int __init resctrl_late_init(void) +static int __init resctrl_arch_late_init(void) { struct rdt_resource *r; int state, ret; @@ -1102,8 +1024,6 @@ static int __init resctrl_late_init(void) if (!get_rdt_resources()) return -ENODEV; - rdt_init_padding(); - state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/resctrl/cat:online:", resctrl_arch_online_cpu, @@ -1111,7 +1031,7 @@ static int __init resctrl_late_init(void) if (state < 0) return state; - ret = rdtgroup_init(); + ret = resctrl_init(); if (ret) { cpuhp_remove_state(state); return ret; @@ -1127,18 +1047,13 @@ static int __init resctrl_late_init(void) return 0; } -late_initcall(resctrl_late_init); +late_initcall(resctrl_arch_late_init); -static void __exit resctrl_exit(void) +static void __exit resctrl_arch_exit(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - cpuhp_remove_state(rdt_online); - rdtgroup_exit(); - - if (r->mon_capable) - rdt_put_mon_l3_config(); + resctrl_exit(); } -__exitcall(resctrl_exit); +__exitcall(resctrl_arch_exit); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 536351159cc23..0a0ac5f6112ec 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,6 +23,15 @@ #include "internal.h" +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + +typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, + struct resctrl_schema *s, + struct rdt_ctrl_domain *d); + /* * Check whether MBA bandwidth percentage value is correct. The value is * checked against the minimum and max bandwidth values specified by the @@ -54,9 +63,9 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } - if (bw < r->membw.min_bw || bw > r->default_ctrl) { + if (bw < r->membw.min_bw || bw > r->membw.max_bw) { rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", - bw, r->membw.min_bw, r->default_ctrl); + bw, r->membw.min_bw, r->membw.max_bw); return false; } @@ -64,8 +73,8 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d) +static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -104,8 +113,9 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, */ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) { - unsigned long first_bit, zero_bit, val; + u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1; unsigned int cbm_len = r->cache.cbm_len; + unsigned long first_bit, zero_bit, val; int ret; ret = kstrtoul(buf, 16, &val); @@ -114,7 +124,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) return false; } - if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { + if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) { rdt_last_cmd_puts("Mask out of range\n"); return false; } @@ -143,8 +153,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d) +static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -210,6 +220,7 @@ static int parse_line(char *line, struct resctrl_schema *s, struct rdtgroup *rdtgrp) { enum resctrl_conf_type t = s->conf_type; + ctrlval_parser_t *parse_ctrlval = NULL; struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; struct rdt_parse_data data; @@ -220,6 +231,18 @@ static int parse_line(char *line, struct resctrl_schema *s, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + parse_ctrlval = &parse_cbm; + break; + case RESCTRL_SCHEMA_RANGE: + parse_ctrlval = &parse_bw; + break; + } + + if (WARN_ON_ONCE(!parse_ctrlval)) + return -EINVAL; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); @@ -240,7 +263,7 @@ static int parse_line(char *line, struct resctrl_schema *s, if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; - if (r->parse_ctrlval(&data, s, d)) + if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { cfg = &d->staged_config[t]; @@ -264,25 +287,12 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } -static u32 get_config_index(u32 closid, enum resctrl_conf_type type) -{ - switch (type) { - default: - case CDP_NONE: - return closid; - case CDP_CODE: - return closid * 2 + 1; - case CDP_DATA: - return closid * 2; - } -} - int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - u32 idx = get_config_index(closid, t); + u32 idx = resctrl_get_config_index(closid, t); struct msr_param msr_param; if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) @@ -319,7 +329,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) if (!cfg->have_new_ctrl) continue; - idx = get_config_index(closid, t); + idx = resctrl_get_config_index(closid, t); if (cfg->new_ctrl == hw_dom->ctrl_val[idx]) continue; hw_dom->ctrl_val[idx] = cfg->new_ctrl; @@ -439,7 +449,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); - u32 idx = get_config_index(closid, type); + u32 idx = resctrl_get_config_index(closid, type); return hw_dom->ctrl_val[idx]; } @@ -465,8 +475,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, r->format_str, dom->hdr.id, max_data_width, - ctrl_val); + seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val); sep = true; } seq_puts(s, "\n"); @@ -537,12 +546,12 @@ ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, rdt_last_cmd_clear(); if (!strcmp(buf, "mbm_local_bytes")) { - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; else ret = -EINVAL; } else if (!strcmp(buf, "mbm_total_bytes")) { - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; else ret = -EINVAL; @@ -588,6 +597,28 @@ int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of, return ret; } +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos) +{ + struct rdt_domain_hdr *d; + struct list_head *l; + + list_for_each(l, h) { + d = list_entry(l, struct rdt_domain_hdr, list); + /* When id is found, return its domain. */ + if (id == d->id) + return d; + /* Stop searching when finding id's position in sorted list. */ + if (id < d->id) + break; + } + + if (pos) + *pos = l; + + return NULL; +} + void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first) @@ -649,7 +680,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) resid = md.u.rid; domid = md.u.domid; evtid = md.u.evtid; - r = &rdt_resources_all[resid].r_resctrl; + r = resctrl_arch_get_resource(resid); if (md.u.sum) { /* @@ -673,7 +704,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * This file provides data from a single domain. Search * the resource to find the domain with "domid". */ - hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + hdr = resctrl_find_domain(&r->mon_domains, domid, NULL); if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { ret = -ENOENT; goto out; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 20c898f09b7e7..c44c5b4963558 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -32,30 +32,6 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) -/* Reads to Local DRAM Memory */ -#define READS_TO_LOCAL_MEM BIT(0) - -/* Reads to Remote DRAM Memory */ -#define READS_TO_REMOTE_MEM BIT(1) - -/* Non-Temporal Writes to Local Memory */ -#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) - -/* Non-Temporal Writes to Remote Memory */ -#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) - -/* Reads to Local Memory the system identifies as "Slow Memory" */ -#define READS_TO_LOCAL_S_MEM BIT(4) - -/* Reads to Remote Memory the system identifies as "Slow Memory" */ -#define READS_TO_REMOTE_S_MEM BIT(5) - -/* Dirty Victims to All Types of Memory */ -#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) - -/* Max event bits supported */ -#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) - /** * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that * aren't marked nohz_full @@ -180,7 +156,6 @@ struct rmid_read { void *arch_mon_ctx; }; -extern unsigned int rdt_mon_features; extern struct list_head resctrl_schema_all; extern bool resctrl_mounted; @@ -233,43 +208,6 @@ struct mongroup { u32 rmid; }; -/** - * struct pseudo_lock_region - pseudo-lock region information - * @s: Resctrl schema for the resource to which this - * pseudo-locked region belongs - * @d: RDT domain to which this pseudo-locked region - * belongs - * @cbm: bitmask of the pseudo-locked region - * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread - * completion - * @thread_done: variable used by waitqueue to test if pseudo-locking - * thread completed - * @cpu: core associated with the cache on which the setup code - * will be run - * @line_size: size of the cache lines - * @size: size of pseudo-locked region in bytes - * @kmem: the kernel memory associated with pseudo-locked region - * @minor: minor number of character device associated with this - * region - * @debugfs_dir: pointer to this region's directory in the debugfs - * filesystem - * @pm_reqs: Power management QoS requests related to this region - */ -struct pseudo_lock_region { - struct resctrl_schema *s; - struct rdt_ctrl_domain *d; - u32 cbm; - wait_queue_head_t lock_thread_wq; - int thread_done; - int cpu; - unsigned int line_size; - unsigned int size; - void *kmem; - unsigned int minor; - struct dentry *debugfs_dir; - struct list_head pm_reqs; -}; - /** * struct rdtgroup - store rdtgroup's data in resctrl file system. * @kn: kernfs node @@ -326,10 +264,7 @@ struct rdtgroup { /* List of all resource groups */ extern struct list_head rdt_all_groups; -extern int max_name_width, max_data_width; - -int __init rdtgroup_init(void); -void __exit rdtgroup_exit(void); +extern int max_name_width; /** * struct rftype - describe each file in the resctrl file system @@ -433,37 +368,6 @@ struct msr_param { u32 high; }; -static inline bool is_llc_occupancy_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); -} - -static inline bool is_mbm_total_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); -} - -static inline bool is_mbm_local_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); -} - -static inline bool is_mbm_enabled(void) -{ - return (is_mbm_total_enabled() || is_mbm_local_enabled()); -} - -static inline bool is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); -} - -struct rdt_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - /** * struct rdt_hw_resource - arch private attributes of a resctrl resource * @r_resctrl: Attributes of the resource used directly by resctrl. @@ -476,8 +380,6 @@ struct rdt_parse_data { * @msr_update: Function pointer to update QOS MSRs * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. - * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth - * Monitoring Event Configuration (BMEC) is supported. * @cdp_enabled: CDP state of this resource * * Members of this structure are either private to the architecture @@ -491,7 +393,6 @@ struct rdt_hw_resource { void (*msr_update)(struct msr_param *m); unsigned int mon_scale; unsigned int mbm_width; - unsigned int mbm_cfg_mask; bool cdp_enabled; }; @@ -500,11 +401,6 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r return container_of(r, struct rdt_hw_resource, r_resctrl); } -int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d); -int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d); - extern struct mutex rdtgroup_mutex; extern struct rdt_hw_resource rdt_resources_all[]; @@ -512,24 +408,6 @@ extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; extern enum resctrl_event_id mba_mbps_default_event; -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - -static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) -{ - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); - - hw_res++; - return &hw_res->r_resctrl; -} - static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) { return rdt_resources_all[l].cdp_enabled; @@ -539,27 +417,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); -/* - * To return the common struct rdt_resource, which is contained in struct - * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. - */ -#define for_each_rdt_resource(r) \ - for (r = &rdt_resources_all[0].r_resctrl; \ - r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \ - r = resctrl_inc(r)) - -#define for_each_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_capable || r->mon_capable) - -#define for_each_alloc_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_capable) - -#define for_each_mon_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->mon_capable) - /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ union cpuid_0x10_1_eax { struct { @@ -604,8 +461,6 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn); int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, umode_t mask); -struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, - struct list_head **pos); ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, @@ -620,28 +475,19 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); -int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); -int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); -int rdt_pseudo_lock_init(void); -void rdt_pseudo_lock_release(void); -int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); -void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); -struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r); -struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); void closid_free(int closid); int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); -void __exit rdt_put_mon_l3_config(void); +void resctrl_mon_resource_exit(void); bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first); +int __init resctrl_mon_resource_init(void); void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); @@ -658,4 +504,45 @@ void resctrl_file_fflags_init(const char *config, unsigned long fflags); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); + +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); +int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); +int rdt_pseudo_lock_init(void); +void rdt_pseudo_lock_release(void); +int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); +void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); +#else +static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) +{ + return false; +} + +static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) +{ + return false; +} + +static inline int rdt_pseudo_lock_init(void) { return 0; } +static inline void rdt_pseudo_lock_release(void) { } +static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ + #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 94a1d97804614..a93ed7d2a1602 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -295,11 +295,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) memset(hw_dom->arch_mbm_local, 0, sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } @@ -365,7 +365,7 @@ static void limbo_release_entry(struct rmid_entry *entry) */ void __check_limbo(struct rdt_mon_domain *d, bool force_free) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); u32 idx_limit = resctrl_arch_system_num_rmid_idx(); struct rmid_entry *entry; u32 idx, cur_idx = 1; @@ -521,7 +521,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_mon_domain *d; u32 idx; @@ -569,7 +569,7 @@ void free_rmid(u32 closid, u32 rmid) entry = __rmid_entry(idx); - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) add_rmid_to_limbo(entry); else list_add_tail(&entry->list, &rmid_free_lru); @@ -718,6 +718,22 @@ void mon_event_count(void *info) rr->err = 0; } +static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, + struct rdt_resource *r) +{ + struct rdt_ctrl_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + /* * Feedback loop for MBA software controller (mba_sc) * @@ -761,7 +777,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) struct rdtgroup *entry; u32 cur_bw, user_bw; - r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); evt_id = rgrp->mba_mbps_event; closid = rgrp->closid; @@ -852,10 +868,10 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, * This is protected from concurrent reads from user as both * the user and overflow handler hold the global mutex. */ - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); } @@ -925,7 +941,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!resctrl_mounted || !resctrl_arch_mon_capable()) goto out_unlock; - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); d = container_of(work, struct rdt_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -1027,7 +1043,7 @@ static int dom_data_init(struct rdt_resource *r) /* * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and * are always allocated. These are used for the rdtgroup_default - * control group, which will be setup later in rdtgroup_init(). + * control group, which will be setup later in resctrl_init(). */ idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID); @@ -1040,10 +1056,13 @@ static int dom_data_init(struct rdt_resource *r) return err; } -static void __exit dom_data_exit(void) +static void dom_data_exit(struct rdt_resource *r) { mutex_lock(&rdtgroup_mutex); + if (!r->mon_capable) + goto out_unlock; + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { kfree(closid_num_dirty_rmid); closid_num_dirty_rmid = NULL; @@ -1052,6 +1071,7 @@ static void __exit dom_data_exit(void) kfree(rmid_ptrs); rmid_ptrs = NULL; +out_unlock: mutex_unlock(&rdtgroup_mutex); } @@ -1081,11 +1101,11 @@ static void l3_mon_evt_init(struct rdt_resource *r) { INIT_LIST_HEAD(&r->evt_list); - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) list_add_tail(&mbm_total_event.list, &r->evt_list); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) list_add_tail(&mbm_local_event.list, &r->evt_list); } @@ -1172,12 +1192,56 @@ static __init int snc_get_config(void) return ret; } +/** + * resctrl_mon_resource_init() - Initialise global monitoring structures. + * + * Allocate and initialise global monitor resources that do not belong to a + * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists. + * Called once during boot after the struct rdt_resource's have been configured + * but before the filesystem is mounted. + * Resctrl's cpuhp callbacks may be called before this point to bring a domain + * online. + * + * Returns 0 for success, or -ENOMEM. + */ +int __init resctrl_mon_resource_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + int ret; + + if (!r->mon_capable) + return 0; + + ret = dom_data_init(r); + if (ret) + return ret; + + l3_mon_evt_init(r); + + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { + mbm_total_event.configurable = true; + resctrl_file_fflags_init("mbm_total_bytes_config", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + } + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { + mbm_local_event.configurable = true; + resctrl_file_fflags_init("mbm_local_bytes_config", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + } + + if (resctrl_arch_is_mbm_local_enabled()) + mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; + else if (resctrl_arch_is_mbm_total_enabled()) + mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; + + return 0; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; - int ret; snc_nodes_per_l3_cache = snc_get_config(); @@ -1207,39 +1271,24 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); - ret = dom_data_init(r); - if (ret) - return ret; - if (rdt_cpu_has(X86_FEATURE_BMEC)) { u32 eax, ebx, ecx, edx; /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); - hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; - - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { - mbm_total_event.configurable = true; - resctrl_file_fflags_init("mbm_total_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); - } - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { - mbm_local_event.configurable = true; - resctrl_file_fflags_init("mbm_local_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); - } + r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - l3_mon_evt_init(r); - r->mon_capable = true; return 0; } -void __exit rdt_put_mon_l3_config(void) +void resctrl_mon_resource_exit(void) { - dom_data_exit(); + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + dom_data_exit(r); } void __init intel_rdt_mbm_apply_quirk(void) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 42cc162f7fc91..01fa7890b43f5 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -61,7 +61,8 @@ static const struct class pseudo_lock_class = { }; /** - * get_prefetch_disable_bits - prefetch disable bits of supported platforms + * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported + * platforms * @void: It takes no parameters. * * Capture the list of platforms that have been validated to support @@ -75,14 +76,16 @@ static const struct class pseudo_lock_class = { * in the SDM. * * When adding a platform here also add support for its cache events to - * measure_cycles_perf_fn() + * resctrl_arch_measure_l*_residency() * * Return: * If platform is supported, the bits to disable hardware prefetchers, 0 * if platform is not supported. */ -static u64 get_prefetch_disable_bits(void) +u64 resctrl_arch_get_prefetch_disable_bits(void) { + prefetch_disable_bits = 0; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) return 0; @@ -98,7 +101,8 @@ static u64 get_prefetch_disable_bits(void) * 3 DCU IP Prefetcher Disable (R/W) * 63:4 Reserved */ - return 0xF; + prefetch_disable_bits = 0xF; + break; case INTEL_ATOM_GOLDMONT: case INTEL_ATOM_GOLDMONT_PLUS: /* @@ -109,10 +113,11 @@ static u64 get_prefetch_disable_bits(void) * 2 DCU Hardware Prefetcher Disable (R/W) * 63:3 Reserved */ - return 0x5; + prefetch_disable_bits = 0x5; + break; } - return 0; + return prefetch_disable_bits; } /** @@ -408,8 +413,8 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) } /** - * pseudo_lock_fn - Load kernel memory into cache - * @_rdtgrp: resource group to which pseudo-lock region belongs + * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache + * @_plr: the pseudo-lock region descriptor * * This is the core pseudo-locking flow. * @@ -426,10 +431,9 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -static int pseudo_lock_fn(void *_rdtgrp) +int resctrl_arch_pseudo_lock_fn(void *_plr) { - struct rdtgroup *rdtgrp = _rdtgrp; - struct pseudo_lock_region *plr = rdtgrp->plr; + struct pseudo_lock_region *plr = _plr; u32 rmid_p, closid_p; unsigned long i; u64 saved_msr; @@ -489,7 +493,8 @@ static int pseudo_lock_fn(void *_rdtgrp) * pseudo-locked followed by reading of kernel memory to load it * into the cache. */ - __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid); + __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid); + /* * Cache was flushed earlier. Now access kernel memory to read it * into cache region associated with just activated plr->closid. @@ -712,8 +717,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * Not knowing the bits to disable prefetching implies that this * platform does not support Cache Pseudo-Locking. */ - prefetch_disable_bits = get_prefetch_disable_bits(); - if (prefetch_disable_bits == 0) { + if (resctrl_arch_get_prefetch_disable_bits() == 0) { rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; } @@ -872,7 +876,8 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) } /** - * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory + * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read + * pseudo-locked memory * @_plr: pseudo-lock region to measure * * There is no deterministic way to test if a memory region is cached. One @@ -885,7 +890,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -static int measure_cycles_lat_fn(void *_plr) +int resctrl_arch_measure_cycles_lat_fn(void *_plr) { struct pseudo_lock_region *plr = _plr; u32 saved_low, saved_high; @@ -1069,7 +1074,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, return 0; } -static int measure_l2_residency(void *_plr) +int resctrl_arch_measure_l2_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1107,7 +1112,7 @@ static int measure_l2_residency(void *_plr) return 0; } -static int measure_l3_residency(void *_plr) +int resctrl_arch_measure_l3_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1205,14 +1210,14 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) plr->cpu = cpu; if (sel == 1) - thread = kthread_run_on_cpu(measure_cycles_lat_fn, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn, + plr, cpu, "pseudo_lock_measure/%u"); else if (sel == 2) - thread = kthread_run_on_cpu(measure_l2_residency, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency, + plr, cpu, "pseudo_lock_measure/%u"); else if (sel == 3) - thread = kthread_run_on_cpu(measure_l3_residency, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency, + plr, cpu, "pseudo_lock_measure/%u"); else goto out; @@ -1307,7 +1312,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) plr->thread_done = 0; - thread = kthread_run_on_cpu(pseudo_lock_fn, rdtgrp, + thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr, plr->cpu, "pseudo_lock/%u"); if (IS_ERR(thread)) { ret = PTR_ERR(thread); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6419e04d8a7b2..c6274d40b217c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -57,6 +57,12 @@ static struct kernfs_node *kn_mongrp; /* Kernel fs node for "mon_data" directory under root */ static struct kernfs_node *kn_mondata; +/* + * Used to store the max resource name width to display the schemata names in + * a tabular format. + */ +int max_name_width; + static struct seq_buf last_cmd_status; static char last_cmd_status_buf[512]; @@ -111,6 +117,18 @@ void rdt_staged_configs_clear(void) } } +static bool resctrl_is_mbm_enabled(void) +{ + return (resctrl_arch_is_mbm_total_enabled() || + resctrl_arch_is_mbm_local_enabled()); +} + +static bool resctrl_is_mbm_event(int e) +{ + return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && + e <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -157,7 +175,8 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && + resctrl_arch_is_llc_occupancy_enabled()) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; @@ -348,13 +367,13 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, * from update_closid_rmid() is protected against __switch_to() because * preemption is disabled. */ -static void update_cpu_closid_rmid(void *info) +void resctrl_arch_sync_cpu_closid_rmid(void *info) { - struct rdtgroup *r = info; + struct resctrl_cpu_defaults *r = info; if (r) { this_cpu_write(pqr_state.default_closid, r->closid); - this_cpu_write(pqr_state.default_rmid, r->mon.rmid); + this_cpu_write(pqr_state.default_rmid, r->rmid); } /* @@ -369,11 +388,20 @@ static void update_cpu_closid_rmid(void *info) * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, * * Per task closids/rmids must have been set up before calling this function. + * @r may be NULL. */ static void update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) { - on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); + struct resctrl_cpu_defaults defaults, *p = NULL; + + if (r) { + defaults.closid = r->closid; + defaults.rmid = r->mon.rmid; + p = &defaults; + } + + on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1); } static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, @@ -971,7 +999,7 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct resctrl_schema *s = of->kn->parent->priv; struct rdt_resource *r = s->res; - seq_printf(seq, "%x\n", r->default_ctrl); + seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r)); return 0; } @@ -1160,10 +1188,19 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct resctrl_schema *s = of->kn->parent->priv; struct rdt_resource *r = s->res; - if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) + switch (r->membw.throttle_mode) { + case THREAD_THROTTLE_PER_THREAD: seq_puts(seq, "per-thread\n"); - else + return 0; + case THREAD_THROTTLE_MAX: seq_puts(seq, "max\n"); + return 0; + case THREAD_THROTTLE_UNDEFINED: + seq_puts(seq, "undefined\n"); + return 0; + } + + WARN_ON_ONCE(1); return 0; } @@ -1425,7 +1462,8 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, goto out; } rdtgrp->mode = RDT_MODE_EXCLUSIVE; - } else if (!strcmp(buf, "pseudo-locksetup")) { + } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) && + !strcmp(buf, "pseudo-locksetup")) { ret = rdtgroup_locksetup_enter(rdtgrp); if (ret) goto out; @@ -1552,11 +1590,6 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, return ret; } -struct mon_config_info { - u32 evtid; - u32 mon_config; -}; - #define INVALID_CONFIG_INDEX UINT_MAX /** @@ -1581,31 +1614,32 @@ static inline unsigned int mon_event_config_index_get(u32 evtid) } } -static void mon_event_config_read(void *info) +void resctrl_arch_mon_event_config_read(void *_config_info) { - struct mon_config_info *mon_info = info; + struct resctrl_mon_config_info *config_info = _config_info; unsigned int index; u64 msrval; - index = mon_event_config_index_get(mon_info->evtid); + index = mon_event_config_index_get(config_info->evtid); if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); + pr_warn_once("Invalid event id %d\n", config_info->evtid); return; } rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); /* Report only the valid event configuration bits */ - mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; + config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; } -static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info) +static void mondata_config_read(struct resctrl_mon_config_info *mon_info) { - smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1); + smp_call_function_any(&mon_info->d->hdr.cpu_mask, + resctrl_arch_mon_event_config_read, mon_info, 1); } static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { - struct mon_config_info mon_info; + struct resctrl_mon_config_info mon_info; struct rdt_mon_domain *dom; bool sep = false; @@ -1616,9 +1650,11 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (sep) seq_puts(s, ";"); - memset(&mon_info, 0, sizeof(struct mon_config_info)); + memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); + mon_info.r = r; + mon_info.d = dom; mon_info.evtid = evtid; - mondata_config_read(dom, &mon_info); + mondata_config_read(&mon_info); seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config); sep = true; @@ -1651,30 +1687,32 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, return 0; } -static void mon_event_config_write(void *info) +void resctrl_arch_mon_event_config_write(void *_config_info) { - struct mon_config_info *mon_info = info; + struct resctrl_mon_config_info *config_info = _config_info; unsigned int index; - index = mon_event_config_index_get(mon_info->evtid); + index = mon_event_config_index_get(config_info->evtid); if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); + pr_warn_once("Invalid event id %d\n", config_info->evtid); return; } - wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); + wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0); } static void mbm_config_write_domain(struct rdt_resource *r, struct rdt_mon_domain *d, u32 evtid, u32 val) { - struct mon_config_info mon_info = {0}; + struct resctrl_mon_config_info mon_info = {0}; /* * Read the current config value first. If both are the same then * no need to write it again. */ + mon_info.r = r; + mon_info.d = d; mon_info.evtid = evtid; - mondata_config_read(d, &mon_info); + mondata_config_read(&mon_info); if (mon_info.mon_config == val) return; @@ -1686,7 +1724,7 @@ static void mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write, + smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write, &mon_info, 1); /* @@ -1703,7 +1741,6 @@ static void mbm_config_write_domain(struct rdt_resource *r, static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); char *dom_str = NULL, *id_str; unsigned long dom_id, val; struct rdt_mon_domain *d; @@ -1730,9 +1767,9 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) } /* Value from user cannot be more than the supported set of events */ - if ((val & hw_res->mbm_cfg_mask) != val) { + if ((val & r->mbm_cfg_mask) != val) { rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - hw_res->mbm_cfg_mask); + r->mbm_cfg_mask); return -EINVAL; } @@ -2036,6 +2073,28 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } +static void thread_throttle_mode_init(void) +{ + enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED; + struct rdt_resource *r_mba, *r_smba; + + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + if (r_mba->alloc_capable && + r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_mba->membw.throttle_mode; + + r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA); + if (r_smba->alloc_capable && + r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_smba->membw.throttle_mode; + + if (throttle_mode == THREAD_THROTTLE_UNDEFINED) + return; + + resctrl_file_fflags_init("thread_throttle_mode", + RFTYPE_CTRL_INFO | RFTYPE_RES_MB); +} + void resctrl_file_fflags_init(const char *config, unsigned long fflags) { struct rftype *rft; @@ -2164,6 +2223,20 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, return ret; } +static unsigned long fflags_from_resource(struct rdt_resource *r) +{ + switch (r->rid) { + case RDT_RESOURCE_L3: + case RDT_RESOURCE_L2: + return RFTYPE_RES_CACHE; + case RDT_RESOURCE_MBA: + case RDT_RESOURCE_SMBA: + return RFTYPE_RES_MB; + } + + return WARN_ON_ONCE(1); +} + static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { struct resctrl_schema *s; @@ -2184,14 +2257,14 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - fflags = r->fflags | RFTYPE_CTRL_INFO; + fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } for_each_mon_capable_rdt_resource(r) { - fflags = r->fflags | RFTYPE_MON_INFO; + fflags = fflags_from_resource(r) | RFTYPE_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); if (ret) @@ -2255,7 +2328,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; + return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -2345,10 +2418,10 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, */ static bool supports_mba_mbps(void) { - struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - return (is_mbm_enabled() && + return (resctrl_is_mbm_enabled() && r->alloc_capable && is_mba_linear() && r->ctrl_scope == rmbm->mon_scope); } @@ -2359,7 +2432,7 @@ static bool supports_mba_mbps(void) */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); u32 num_closid = resctrl_arch_get_num_closid(r); struct rdt_ctrl_domain *d; unsigned long fflags; @@ -2596,6 +2669,20 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type if (cl > max_name_width) max_name_width = cl; + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + s->fmt_str = "%d=%x"; + break; + case RESCTRL_SCHEMA_RANGE: + s->fmt_str = "%d=%u"; + break; + } + + if (WARN_ON_ONCE(!s->fmt_str)) { + kfree(s); + return -EINVAL; + } + INIT_LIST_HEAD(&s->list); list_add(&s->list, &resctrl_schema_all); @@ -2712,8 +2799,8 @@ static int rdt_get_tree(struct fs_context *fc) if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) resctrl_mounted = true; - if (is_mbm_enabled()) { - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + if (resctrl_is_mbm_enabled()) { + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); list_for_each_entry(dom, &r->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); @@ -2823,7 +2910,7 @@ static int rdt_init_fs_context(struct fs_context *fc) return 0; } -static int reset_all_ctrls(struct rdt_resource *r) +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_ctrl_domain *hw_dom; @@ -2847,12 +2934,12 @@ static int reset_all_ctrls(struct rdt_resource *r) hw_dom = resctrl_to_arch_ctrl_dom(d); for (i = 0; i < hw_res->num_closid; i++) - hw_dom->ctrl_val[i] = r->default_ctrl; + hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r); msr_param.dom = d; smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } - return 0; + return; } /* @@ -2971,9 +3058,10 @@ static void rdt_kill_sb(struct super_block *sb) rdt_disable_ctx(); - /*Put everything back to default values. */ + /* Put everything back to default values. */ for_each_alloc_capable_rdt_resource(r) - reset_all_ctrls(r); + resctrl_arch_reset_all_ctrls(r); + rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; @@ -3080,7 +3168,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, if (ret) return ret; - if (!do_sum && is_mbm_event(mevt->evtid)) + if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); } @@ -3382,7 +3470,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) } cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = r->default_ctrl; + cfg->new_ctrl = resctrl_get_default_ctrl(r); cfg->have_new_ctrl = true; } } @@ -3696,14 +3784,21 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + u32 closid, rmid; int cpu; /* Give any tasks back to the parent group */ rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); - /* Update per cpu rmid of the moved CPUs first */ + /* + * Update per cpu closid/rmid of the moved CPUs first. + * Note: the closid will not change, but the arch code still needs it. + */ + closid = prdtgrp->closid; + rmid = prdtgrp->mon.rmid; for_each_cpu(cpu, &rdtgrp->cpu_mask) - per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); + /* * Update the MSR on moved CPUs and CPUs which have moved * task running on them. @@ -3736,6 +3831,7 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { + u32 closid, rmid; int cpu; /* Give any tasks back to the default group */ @@ -3746,10 +3842,10 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); /* Update per cpu closid and rmid of the moved CPUs first */ - for_each_cpu(cpu, &rdtgrp->cpu_mask) { - per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; - per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; - } + closid = rdtgroup_default.closid; + rmid = rdtgroup_default.mon.rmid; + for_each_cpu(cpu, &rdtgrp->cpu_mask) + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); /* * Update the MSR on moved CPUs and CPUs which have moved @@ -3950,7 +4046,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) + if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) seq_puts(seq, ",mba_MBps"); if (resctrl_debug) @@ -4029,9 +4125,9 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d if (resctrl_mounted && resctrl_arch_mon_capable()) rmdir_mondata_subdir_allrdtgrp(r, d); - if (is_mbm_enabled()) + if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); - if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { + if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { /* * When a package is going down, forcefully * decrement rmid->ebusy. There is no way to know @@ -4049,17 +4145,30 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d mutex_unlock(&rdtgroup_mutex); } +/** + * domain_setup_mon_state() - Initialise domain monitoring structures. + * @r: The resource for the newly online domain. + * @d: The newly online domain. + * + * Allocate monitor resources that belong to this domain. + * Called when the first CPU of a domain comes online, regardless of whether + * the filesystem is mounted. + * During boot this may be called before global allocations have been made by + * resctrl_mon_resource_init(). + * + * Returns 0 for success, or -ENOMEM. + */ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; - if (is_llc_occupancy_enabled()) { + if (resctrl_arch_is_llc_occupancy_enabled()) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (is_mbm_total_enabled()) { + if (resctrl_arch_is_mbm_total_enabled()) { tsize = sizeof(*d->mbm_total); d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_total) { @@ -4067,7 +4176,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain return -ENOMEM; } } - if (is_mbm_local_enabled()) { + if (resctrl_arch_is_mbm_local_enabled()) { tsize = sizeof(*d->mbm_local); d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_local) { @@ -4106,13 +4215,13 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) if (err) goto out_unlock; - if (is_mbm_enabled()) { + if (resctrl_is_mbm_enabled()) { INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); /* @@ -4148,9 +4257,25 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) } } +static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, + struct rdt_resource *r) +{ + struct rdt_mon_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + void resctrl_offline_cpu(unsigned int cpu) { - struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; @@ -4167,12 +4292,12 @@ void resctrl_offline_cpu(unsigned int cpu) d = get_mon_domain_from_cpu(cpu, l3); if (d) { - if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { + if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0, cpu); } - if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && - has_busy_rmid(d)) { + if (resctrl_arch_is_llc_occupancy_enabled() && + cpu == d->cqm_work_cpu && has_busy_rmid(d)) { cancel_delayed_work(&d->cqm_limbo); cqm_setup_limbo_handler(d, 0, cpu); } @@ -4183,14 +4308,14 @@ void resctrl_offline_cpu(unsigned int cpu) } /* - * rdtgroup_init - rdtgroup initialization + * resctrl_init - resctrl filesystem initialization * * Setup resctrl file system including set up root, create mount point, - * register rdtgroup filesystem, and initialize files under root directory. + * register resctrl filesystem, and initialize files under root directory. * * Return: 0 on success or -errno */ -int __init rdtgroup_init(void) +int __init resctrl_init(void) { int ret = 0; @@ -4199,10 +4324,18 @@ int __init rdtgroup_init(void) rdtgroup_setup_default(); - ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + thread_throttle_mode_init(); + + ret = resctrl_mon_resource_init(); if (ret) return ret; + ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + if (ret) { + resctrl_mon_resource_exit(); + return ret; + } + ret = register_filesystem(&rdt_fs_type); if (ret) goto cleanup_mountpoint; @@ -4234,13 +4367,16 @@ int __init rdtgroup_init(void) cleanup_mountpoint: sysfs_remove_mount_point(fs_kobj, "resctrl"); + resctrl_mon_resource_exit(); return ret; } -void __exit rdtgroup_exit(void) +void __exit resctrl_exit(void) { debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); sysfs_remove_mount_point(fs_kobj, "resctrl"); + + resctrl_mon_resource_exit(); } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716e..880351ca3dfcb 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,7 @@ #include #include #include +#include /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 @@ -25,6 +26,24 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX +/* Walk all possible resources, with variants for only controls or monitors. */ +#define for_each_rdt_resource(_r) \ + for ((_r) = resctrl_arch_get_resource(0); \ + (_r) && (_r)->rid < RDT_NUM_RESOURCES; \ + (_r) = resctrl_arch_get_resource((_r)->rid + 1)) + +#define for_each_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable || (r)->mon_capable) + +#define for_each_alloc_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable) + +#define for_each_mon_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->mon_capable) + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -40,13 +59,42 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) /* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. + * struct pseudo_lock_region - pseudo-lock region information + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs + * @closid: The closid that this pseudo-locked region uses + * @d: RDT domain to which this pseudo-locked region + * belongs + * @cbm: bitmask of the pseudo-locked region + * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread + * completion + * @thread_done: variable used by waitqueue to test if pseudo-locking + * thread completed + * @cpu: core associated with the cache on which the setup code + * will be run + * @line_size: size of the cache lines + * @size: size of pseudo-locked region in bytes + * @kmem: the kernel memory associated with pseudo-locked region + * @minor: minor number of character device associated with this + * region + * @debugfs_dir: pointer to this region's directory in the debugfs + * filesystem + * @pm_reqs: Power management QoS requests related to this region */ -enum resctrl_event_id { - QOS_L3_OCCUP_EVENT_ID = 0x01, - QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, - QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +struct pseudo_lock_region { + struct resctrl_schema *s; + u32 closid; + struct rdt_ctrl_domain *d; + u32 cbm; + wait_queue_head_t lock_thread_wq; + int thread_done; + int cpu; + unsigned int line_size; + unsigned int size; + void *kmem; + unsigned int minor; + struct dentry *debugfs_dir; + struct list_head pm_reqs; }; /** @@ -155,6 +203,7 @@ enum membw_throttle_mode { /** * struct resctrl_membw - Memory bandwidth allocation related data * @min_bw: Minimum memory bandwidth percentage user can request + * @max_bw: Maximum memory bandwidth value, used as the reset value * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale * @arch_needs_linear: True if we can't configure non-linear resources @@ -165,6 +214,7 @@ enum membw_throttle_mode { */ struct resctrl_membw { u32 min_bw; + u32 max_bw; u32 bw_gran; u32 delay_linear; bool arch_needs_linear; @@ -173,7 +223,6 @@ struct resctrl_membw { u32 *mb_map; }; -struct rdt_parse_data; struct resctrl_schema; enum resctrl_scope { @@ -182,6 +231,16 @@ enum resctrl_scope { RESCTRL_L3_NODE, }; +/** + * enum resctrl_schema_fmt - The format user-space provides for a schema. + * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. + * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + */ +enum resctrl_schema_fmt { + RESCTRL_SCHEMA_BITMAP, + RESCTRL_SCHEMA_RANGE, +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource @@ -195,12 +254,10 @@ enum resctrl_scope { * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. - * @data_width: Character width of data when displaying - * @default_ctrl: Specifies default cache cbm or memory B/W percent. - * @format_str: Per resource format string to show domain value - * @parse_ctrlval: Per resource function pointer to parse control values + * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events - * @fflags: flags to choose base and info files + * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth + * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { @@ -215,22 +272,25 @@ struct rdt_resource { struct list_head ctrl_domains; struct list_head mon_domains; char *name; - int data_width; - u32 default_ctrl; - const char *format_str; - int (*parse_ctrlval)(struct rdt_parse_data *data, - struct resctrl_schema *s, - struct rdt_ctrl_domain *d); + enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; - unsigned long fflags; + unsigned int mbm_cfg_mask; bool cdp_capable; }; +/* + * Get the resource that exists at this level. If the level is not supported + * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES + * will return NULL. + */ +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); + /** * struct resctrl_schema - configuration abilities of a resource presented to * user-space * @list: Member of resctrl_schema_all. * @name: The name to use in the "schemata" file. + * @fmt_str: Format string to show domain value. * @conf_type: Whether this schema is specific to code/data. * @res: The resource structure exported by the architecture to describe * the hardware that is configured by this schema. @@ -241,16 +301,104 @@ struct rdt_resource { struct resctrl_schema { struct list_head list; char name[8]; + const char *fmt_str; enum resctrl_conf_type conf_type; struct rdt_resource *res; u32 num_closid; }; +struct resctrl_cpu_defaults { + u32 closid; + u32 rmid; +}; + +struct resctrl_mon_config_info { + struct rdt_resource *r; + struct rdt_mon_domain *d; + u32 evtid; + u32 mon_config; +}; + +/** + * resctrl_arch_sync_cpu_closid_rmid() - Refresh this CPU's CLOSID and RMID. + * Call via IPI. + * @info: If non-NULL, a pointer to a struct resctrl_cpu_defaults + * specifying the new CLOSID and RMID for tasks in the default + * resctrl ctrl and mon group when running on this CPU. If NULL, + * this CPU is not re-assigned to a different default group. + * + * Propagates reassignment of CPUs and/or tasks to different resctrl groups + * when requested by the resctrl core code. + * + * This function records the per-cpu defaults specified by @info (if any), + * and then reconfigures the CPU's hardware CLOSID and RMID for subsequent + * execution based on @current, in the same way as during a task switch. + */ +void resctrl_arch_sync_cpu_closid_rmid(void *info); + +/** + * resctrl_get_default_ctrl() - Return the default control value for this + * resource. + * @r: The resource whose default control type is queried. + */ +static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r) +{ + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + return BIT_MASK(r->cache.cbm_len) - 1; + case RESCTRL_SCHEMA_RANGE: + return r->membw.max_bw; + } + + return WARN_ON_ONCE(1); +} + /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); + +/** + * resctrl_arch_mon_event_config_write() - Write the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and writes the + * event config_info->mon_config into hardware. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_write(void *config_info); + +/** + * resctrl_arch_mon_event_config_read() - Read the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and reads the + * hardware config value into config_info->mon_config. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_read(void *config_info); + +/* For use by arch code to remap resctrl's smaller CDP CLOSID range */ +static inline u32 resctrl_get_config_index(u32 closid, + enum resctrl_conf_type type) +{ + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. @@ -313,6 +461,20 @@ static inline void resctrl_arch_rmid_read_context_check(void) might_sleep(); } +/** + * resctrl_find_domain() - Search for a domain id in a resource domain list. + * @h: The domain list to search. + * @id: The domain id to search for. + * @pos: A pointer to position in the list id should be inserted. + * + * Search the domain list to find the domain id. If the domain id is + * found, return the domain. NULL otherwise. If the domain id is not + * found (and NULL returned) then the first domain with id bigger than + * the input id can be returned to the caller via @pos. + */ +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos); + /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. @@ -340,7 +502,19 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, */ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); +/** + * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its + * default. + * @r: The resctrl resource to reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; +int __init resctrl_init(void); +void __exit resctrl_exit(void); + #endif /* _RESCTRL_H */ diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h new file mode 100644 index 0000000000000..f26450b3326b1 --- /dev/null +++ b/include/linux/resctrl_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Arm Ltd. + * Based on arch/x86/kernel/cpu/resctrl/internal.h + */ + +#ifndef __LINUX_RESCTRL_TYPES_H +#define __LINUX_RESCTRL_TYPES_H + +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + +/* Max event bits supported */ +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) + +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + +#endif /* __LINUX_RESCTRL_TYPES_H */