Skip to content

Commit

Permalink
Merge tag 'x86_cache_for_v6.9_rc1' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull resource control updates from Borislav Petkov:

 - Rework different aspects of the resctrl code like adding
   arch-specific accessors and splitting the locking, in order to
   accomodate ARM's MPAM implementation of hw resource control and be
   able to use the same filesystem control interface like on x86. Work
   by James Morse

 - Improve the memory bandwidth throttling heuristic to handle workloads
   with not too regular load levels which end up penalized unnecessarily

 - Use CPUID to detect the memory bandwidth enforcement limit on AMD

 - The usual set of fixes

* tag 'x86_cache_for_v6.9_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (30 commits)
  x86/resctrl: Remove lockdep annotation that triggers false positive
  x86/resctrl: Separate arch and fs resctrl locks
  x86/resctrl: Move domain helper migration into resctrl_offline_cpu()
  x86/resctrl: Add CPU offline callback for resctrl work
  x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but CPU
  x86/resctrl: Add CPU online callback for resctrl work
  x86/resctrl: Add helpers for system wide mon/alloc capable
  x86/resctrl: Make rdt_enable_key the arch's decision to switch
  x86/resctrl: Move alloc/mon static keys into helpers
  x86/resctrl: Make resctrl_mounted checks explicit
  x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read()
  x86/resctrl: Allow resctrl_arch_rmid_read() to sleep
  x86/resctrl: Queue mon_event_read() instead of sending an IPI
  x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow
  x86/resctrl: Move CLOSID/RMID matching and setting to use helpers
  x86/resctrl: Allocate the cleanest CLOSID by searching closid_num_dirty_rmid
  x86/resctrl: Use __set_bit()/__clear_bit() instead of open coding
  x86/resctrl: Track the number of dirty RMID a CLOSID has
  x86/resctrl: Allow RMID allocation to be scoped by CLOSID
  x86/resctrl: Access per-rmid structures by index
  ...
  • Loading branch information
Linus Torvalds committed Mar 12, 2024
2 parents bfdb395 + c0d848f commit 2edfd10
Show file tree
Hide file tree
Showing 9 changed files with 946 additions and 339 deletions.
90 changes: 90 additions & 0 deletions arch/x86/include/asm/resctrl.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
#include <linux/sched.h>
#include <linux/jump_label.h>

/*
* This value can never be a valid CLOSID, and is used when mapping a
* (closid, rmid) pair to an index and back. On x86 only the RMID is
* needed. The index is a software defined value.
*/
#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0)

/**
* struct resctrl_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
Expand All @@ -31,10 +38,47 @@ struct resctrl_pqr_state {

DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);

extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;

DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);

static inline bool resctrl_arch_alloc_capable(void)
{
return rdt_alloc_capable;
}

static inline void resctrl_arch_enable_alloc(void)
{
static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
static_branch_inc_cpuslocked(&rdt_enable_key);
}

static inline void resctrl_arch_disable_alloc(void)
{
static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
static_branch_dec_cpuslocked(&rdt_enable_key);
}

static inline bool resctrl_arch_mon_capable(void)
{
return rdt_mon_capable;
}

static inline void resctrl_arch_enable_mon(void)
{
static_branch_enable_cpuslocked(&rdt_mon_enable_key);
static_branch_inc_cpuslocked(&rdt_enable_key);
}

static inline void resctrl_arch_disable_mon(void)
{
static_branch_disable_cpuslocked(&rdt_mon_enable_key);
static_branch_dec_cpuslocked(&rdt_enable_key);
}

/*
* __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
Expand Down Expand Up @@ -88,12 +132,58 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
return val * scale;
}

static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
u32 closid, u32 rmid)
{
WRITE_ONCE(tsk->closid, closid);
WRITE_ONCE(tsk->rmid, rmid);
}

static inline bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
{
return READ_ONCE(tsk->closid) == closid;
}

static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
u32 rmid)
{
return READ_ONCE(tsk->rmid) == rmid;
}

static inline void resctrl_sched_in(struct task_struct *tsk)
{
if (static_branch_likely(&rdt_enable_key))
__resctrl_sched_in(tsk);
}

static inline u32 resctrl_arch_system_num_rmid_idx(void)
{
/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
return boot_cpu_data.x86_cache_max_rmid + 1;
}

static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
{
*rmid = idx;
*closid = X86_RESCTRL_EMPTY_CLOSID;
}

static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
{
return rmid;
}

/* x86 can always read an rmid, nothing needs allocating */
struct rdt_resource;
static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
{
might_sleep();
return NULL;
};

static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
void *ctx) { };

void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
Expand Down
111 changes: 52 additions & 59 deletions arch/x86/kernel/cpu/resctrl/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#define pr_fmt(fmt) "resctrl: " fmt

#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/cacheinfo.h>
Expand All @@ -25,8 +26,15 @@
#include <asm/resctrl.h>
#include "internal.h"

/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
/*
* rdt_domain structures are kfree()d when their last CPU goes offline,
* and allocated when the first CPU in a new domain comes online.
* The rdt_resource's domain list is updated when this happens. Readers of
* the domain list must either take cpus_read_lock(), or rely on an RCU
* read-side critical section, to avoid observing concurrent modification.
* All writers take this mutex:
*/
static DEFINE_MUTEX(domain_list_lock);

/*
* The cached resctrl_pqr_state is strictly per CPU and can never be
Expand Down Expand Up @@ -136,15 +144,15 @@ static inline void cache_alloc_hsw_probe(void)
{
struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
struct rdt_resource *r = &hw_res->r_resctrl;
u32 l, h, max_cbm = BIT_MASK(20) - 1;
u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;

if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
return;

rdmsr(MSR_IA32_L3_CBM_BASE, l, h);
rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0);

/* If all the bits were set in MSR, return success */
if (l != max_cbm)
if (l3_cbm_0 != max_cbm)
return;

hw_res->num_closid = 4;
Expand Down Expand Up @@ -231,19 +239,17 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx, subleaf;
u32 eax, ebx, ecx, edx, subleaf;

/*
* Query CPUID_Fn80000020_EDX_x01 for MBA and
* CPUID_Fn80000020_EDX_x02 for SMBA
*/
subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1;

cpuid_count(0x80000020, subleaf, &eax.full, &ebx, &ecx, &edx.full);
hw_res->num_closid = edx.split.cos_max + 1;
r->default_ctrl = MAX_MBA_BW_AMD;
cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
hw_res->num_closid = edx + 1;
r->default_ctrl = 1 << eax;

/* AMD does not use delay */
r->membw.delay_linear = false;
Expand Down Expand Up @@ -512,6 +518,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
struct rdt_domain *d;
int err;

lockdep_assert_held(&domain_list_lock);

d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
Expand Down Expand Up @@ -545,11 +553,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}

list_add_tail(&d->list, add_pos);
list_add_tail_rcu(&d->list, add_pos);

err = resctrl_online_domain(r, d);
if (err) {
list_del(&d->list);
list_del_rcu(&d->list);
synchronize_rcu();
domain_free(hw_dom);
}
}
Expand All @@ -560,6 +569,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;

lockdep_assert_held(&domain_list_lock);

d = rdt_find_domain(r, id, NULL);
if (IS_ERR_OR_NULL(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
Expand All @@ -570,7 +581,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
resctrl_offline_domain(r, d);
list_del(&d->list);
list_del_rcu(&d->list);
synchronize_rcu();

/*
* rdt_domain "d" is going to be freed below, so clear
Expand All @@ -582,73 +594,47 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)

return;
}

if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
mbm_setup_overflow_handler(d, 0);
}
if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
has_busy_rmid(r, d)) {
cancel_delayed_work(&d->cqm_limbo);
cqm_setup_limbo_handler(d, 0);
}
}
}

static void clear_closid_rmid(int cpu)
{
struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);

state->default_closid = 0;
state->default_rmid = 0;
state->cur_closid = 0;
state->cur_rmid = 0;
wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
state->default_closid = RESCTRL_RESERVED_CLOSID;
state->default_rmid = RESCTRL_RESERVED_RMID;
state->cur_closid = RESCTRL_RESERVED_CLOSID;
state->cur_rmid = RESCTRL_RESERVED_RMID;
wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
RESCTRL_RESERVED_CLOSID);
}

static int resctrl_online_cpu(unsigned int cpu)
static int resctrl_arch_online_cpu(unsigned int cpu)
{
struct rdt_resource *r;

mutex_lock(&rdtgroup_mutex);
mutex_lock(&domain_list_lock);
for_each_capable_rdt_resource(r)
domain_add_cpu(cpu, r);
/* The cpu is set in default rdtgroup after online. */
cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
mutex_unlock(&domain_list_lock);

clear_closid_rmid(cpu);
mutex_unlock(&rdtgroup_mutex);
resctrl_online_cpu(cpu);

return 0;
}

static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
{
struct rdtgroup *cr;

list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
break;
}
}
}

static int resctrl_offline_cpu(unsigned int cpu)
static int resctrl_arch_offline_cpu(unsigned int cpu)
{
struct rdtgroup *rdtgrp;
struct rdt_resource *r;

mutex_lock(&rdtgroup_mutex);
resctrl_offline_cpu(cpu);

mutex_lock(&domain_list_lock);
for_each_capable_rdt_resource(r)
domain_remove_cpu(cpu, r);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
clear_childcpus(rdtgrp, cpu);
break;
}
}
mutex_unlock(&domain_list_lock);

clear_closid_rmid(cpu);
mutex_unlock(&rdtgroup_mutex);

return 0;
}
Expand Down Expand Up @@ -968,7 +954,8 @@ static int __init resctrl_late_init(void)

state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"x86/resctrl/cat:online:",
resctrl_online_cpu, resctrl_offline_cpu);
resctrl_arch_online_cpu,
resctrl_arch_offline_cpu);
if (state < 0)
return state;

Expand All @@ -992,8 +979,14 @@ late_initcall(resctrl_late_init);

static void __exit resctrl_exit(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;

cpuhp_remove_state(rdt_online);

rdtgroup_exit();

if (r->mon_capable)
rdt_put_mon_l3_config();
}

__exitcall(resctrl_exit);
Loading

0 comments on commit 2edfd10

Please sign in to comment.