Skip to content

Commit

Permalink
x86/resctrl: Separate arch and fs resctrl locks
Browse files Browse the repository at this point in the history
resctrl has one mutex that is taken by the architecture-specific code, and the
filesystem parts. The two interact via cpuhp, where the architecture code
updates the domain list. Filesystem handlers that walk the domains list should
not run concurrently with the cpuhp callback modifying the list.

Exposing a lock from the filesystem code means the interface is not cleanly
defined, and creates the possibility of cross-architecture lock ordering
headaches. The interaction only exists so that certain filesystem paths are
serialised against CPU hotplug. The CPU hotplug code already has a mechanism to
do this using cpus_read_lock().

MPAM's monitors have an overflow interrupt, so it needs to be possible to walk
the domains list in irq context. RCU is ideal for this, but some paths need to
be able to sleep to allocate memory.

Because resctrl_{on,off}line_cpu() take the rdtgroup_mutex as part of a cpuhp
callback, cpus_read_lock() must always be taken first.
rdtgroup_schemata_write() already does this.

Most of the filesystem code's domain list walkers are currently protected by
the rdtgroup_mutex taken in rdtgroup_kn_lock_live().  The exceptions are
rdt_bit_usage_show() and the mon_config helpers which take the lock directly.

Make the domain list protected by RCU. An architecture-specific lock prevents
concurrent writers. rdt_bit_usage_show() could walk the domain list using RCU,
but to keep all the filesystem operations the same, this is changed to call
cpus_read_lock().  The mon_config helpers send multiple IPIs, take the
cpus_read_lock() in these cases.

The other filesystem list walkers need to be able to sleep.  Add
cpus_read_lock() to rdtgroup_kn_lock_live() so that the cpuhp callbacks can't
be invoked when file system operations are occurring.

Add lockdep_assert_cpus_held() in the cases where the rdtgroup_kn_lock_live()
call isn't obvious.

Resctrl's domain online/offline calls now need to take the rdtgroup_mutex
themselves.

  [ bp: Fold in a build fix: https://lore.kernel.org/r/87zfvwieli.ffs@tglx ]

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64
Link: https://lore.kernel.org/r/20240213184438.16675-25-james.morse@arm.com
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
  • Loading branch information
James Morse authored and Borislav Petkov (AMD) committed Feb 19, 2024
1 parent eeff1d4 commit fb70081
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 28 deletions.
44 changes: 34 additions & 10 deletions arch/x86/kernel/cpu/resctrl/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#define pr_fmt(fmt) "resctrl: " fmt

#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/cacheinfo.h>
Expand All @@ -25,8 +26,15 @@
#include <asm/resctrl.h>
#include "internal.h"

/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
/*
* rdt_domain structures are kfree()d when their last CPU goes offline,
* and allocated when the first CPU in a new domain comes online.
* The rdt_resource's domain list is updated when this happens. Readers of
* the domain list must either take cpus_read_lock(), or rely on an RCU
* read-side critical section, to avoid observing concurrent modification.
* All writers take this mutex:
*/
static DEFINE_MUTEX(domain_list_lock);

/*
* The cached resctrl_pqr_state is strictly per CPU and can never be
Expand Down Expand Up @@ -354,6 +362,15 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
{
struct rdt_domain *d;

/*
* Walking r->domains, ensure it can't race with cpuhp.
* Because this is called via IPI by rdt_ctrl_update(), assertions
* about locks this thread holds will lead to false positives. Check
* someone is holding the CPUs lock.
*/
if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && IS_ENABLED(CONFIG_LOCKDEP))
WARN_ON_ONCE(!lockdep_is_cpus_held());

list_for_each_entry(d, &r->domains, list) {
/* Find the domain that contains this CPU */
if (cpumask_test_cpu(cpu, &d->cpu_mask))
Expand Down Expand Up @@ -510,6 +527,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
struct rdt_domain *d;
int err;

lockdep_assert_held(&domain_list_lock);

d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
Expand Down Expand Up @@ -543,11 +562,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}

list_add_tail(&d->list, add_pos);
list_add_tail_rcu(&d->list, add_pos);

err = resctrl_online_domain(r, d);
if (err) {
list_del(&d->list);
list_del_rcu(&d->list);
synchronize_rcu();
domain_free(hw_dom);
}
}
Expand All @@ -558,6 +578,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;

lockdep_assert_held(&domain_list_lock);

d = rdt_find_domain(r, id, NULL);
if (IS_ERR_OR_NULL(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
Expand All @@ -568,7 +590,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
resctrl_offline_domain(r, d);
list_del(&d->list);
list_del_rcu(&d->list);
synchronize_rcu();

/*
* rdt_domain "d" is going to be freed below, so clear
Expand Down Expand Up @@ -598,13 +621,13 @@ static int resctrl_arch_online_cpu(unsigned int cpu)
{
struct rdt_resource *r;

mutex_lock(&rdtgroup_mutex);
mutex_lock(&domain_list_lock);
for_each_capable_rdt_resource(r)
domain_add_cpu(cpu, r);
clear_closid_rmid(cpu);
mutex_unlock(&domain_list_lock);

clear_closid_rmid(cpu);
resctrl_online_cpu(cpu);
mutex_unlock(&rdtgroup_mutex);

return 0;
}
Expand All @@ -613,13 +636,14 @@ static int resctrl_arch_offline_cpu(unsigned int cpu)
{
struct rdt_resource *r;

mutex_lock(&rdtgroup_mutex);
resctrl_offline_cpu(cpu);

mutex_lock(&domain_list_lock);
for_each_capable_rdt_resource(r)
domain_remove_cpu(cpu, r);
mutex_unlock(&domain_list_lock);

clear_closid_rmid(cpu);
mutex_unlock(&rdtgroup_mutex);

return 0;
}
Expand Down
15 changes: 12 additions & 3 deletions arch/x86/kernel/cpu/resctrl/ctrlmondata.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ static int parse_line(char *line, struct resctrl_schema *s,
struct rdt_domain *d;
unsigned long dom_id;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
(r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
Expand Down Expand Up @@ -316,6 +319,9 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
struct rdt_domain *d;
u32 idx;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;

Expand Down Expand Up @@ -381,11 +387,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
return -EINVAL;
buf[nbytes - 1] = '\0';

cpus_read_lock();
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
cpus_read_unlock();
return -ENOENT;
}
rdt_last_cmd_clear();
Expand Down Expand Up @@ -447,7 +451,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
out:
rdt_staged_configs_clear();
rdtgroup_kn_unlock(of->kn);
cpus_read_unlock();
return ret ?: nbytes;
}

Expand All @@ -467,6 +470,9 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
bool sep = false;
u32 ctrl_val;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

seq_printf(s, "%*s:", max_name_width, schema->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
Expand Down Expand Up @@ -537,6 +543,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
{
int cpu;

/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

/*
* Setup the parameters to pass to mon_event_count() to read the data.
*/
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/kernel/cpu/resctrl/monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/

#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/slab.h>
Expand Down Expand Up @@ -472,6 +473,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)

lockdep_assert_held(&rdtgroup_mutex);

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

entry->busy = 0;
Expand Down Expand Up @@ -778,6 +782,7 @@ void cqm_handle_limbo(struct work_struct *work)
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
struct rdt_domain *d;

cpus_read_lock();
mutex_lock(&rdtgroup_mutex);

d = container_of(work, struct rdt_domain, cqm_limbo.work);
Expand All @@ -792,6 +797,7 @@ void cqm_handle_limbo(struct work_struct *work)
}

mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
}

/**
Expand Down Expand Up @@ -823,6 +829,7 @@ void mbm_handle_overflow(struct work_struct *work)
struct rdt_resource *r;
struct rdt_domain *d;

cpus_read_lock();
mutex_lock(&rdtgroup_mutex);

/*
Expand Down Expand Up @@ -856,6 +863,7 @@ void mbm_handle_overflow(struct work_struct *work)

out_unlock:
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
}

/**
Expand Down
3 changes: 3 additions & 0 deletions arch/x86/kernel/cpu/resctrl/pseudo_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,9 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
struct rdt_domain *d_i;
bool ret = false;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
return true;

Expand Down
Loading

0 comments on commit fb70081

Please sign in to comment.