Skip to content

Commit

Permalink
sched/isolation: Move isolcpus= handling to the housekeeping code
Browse files Browse the repository at this point in the history
We want to centralize the isolation features, to be done by the housekeeping
subsystem and scheduler domain isolation is a significant part of it.

No intended behaviour change, we just reuse the housekeeping cpumask
and core code.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Frederic Weisbecker authored and Ingo Molnar committed Oct 27, 2017
1 parent 6f1982f commit edb9382
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 59 deletions.
11 changes: 10 additions & 1 deletion drivers/base/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <linux/cpufeature.h>
#include <linux/tick.h>
#include <linux/pm_qos.h>
#include <linux/sched/isolation.h>

#include "base.h"

Expand Down Expand Up @@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev,
struct device_attribute *attr, char *buf)
{
int n = 0, len = PAGE_SIZE-2;
cpumask_var_t isolated;

n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map));
if (!alloc_cpumask_var(&isolated, GFP_KERNEL))
return -ENOMEM;

cpumask_andnot(isolated, cpu_possible_mask,
housekeeping_cpumask(HK_FLAG_DOMAIN));
n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated));

free_cpumask_var(isolated);

return n;
}
Expand Down
2 changes: 0 additions & 2 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,6 @@ struct task_group;
/* Task command name length: */
#define TASK_COMM_LEN 16

extern cpumask_var_t cpu_isolated_map;

extern void scheduler_tick(void);

#define MAX_SCHEDULE_TIMEOUT LONG_MAX
Expand Down
1 change: 1 addition & 0 deletions include/linux/sched/isolation.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ enum hk_flags {
HK_FLAG_MISC = (1 << 2),
HK_FLAG_SCHED = (1 << 3),
HK_FLAG_TICK = (1 << 4),
HK_FLAG_DOMAIN = (1 << 5),
};

#ifdef CONFIG_CPU_ISOLATION
Expand Down
15 changes: 5 additions & 10 deletions kernel/cgroup/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#include <linux/backing-dev.h>
#include <linux/sort.h>
#include <linux/oom.h>

#include <linux/sched/isolation.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
Expand Down Expand Up @@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
Expand All @@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
dattr = NULL;
csa = NULL;

if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
goto done;
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);

/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
Expand All @@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
update_domain_attr_tree(dattr, &top_cpuset);
}
cpumask_and(doms[0], top_cpuset.effective_cpus,
non_isolated_cpus);
housekeeping_cpumask(HK_FLAG_DOMAIN));

goto done;
}
Expand All @@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
*/
if (!cpumask_empty(cp->cpus_allowed) &&
!(is_sched_load_balance(cp) &&
cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
cpumask_intersects(cp->cpus_allowed,
housekeeping_cpumask(HK_FLAG_DOMAIN))))
continue;

if (is_sched_load_balance(cp))
Expand Down Expand Up @@ -789,7 +785,7 @@ static int generate_sched_domains(cpumask_var_t **domains,

if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
cpumask_and(dp, dp, non_isolated_cpus);
cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
if (dattr)
update_domain_attr_tree(dattr + nslot, b);

Expand All @@ -802,7 +798,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
BUG_ON(nslot != ndoms);

done:
free_cpumask_var(non_isolated_cpus);
kfree(csa);

/*
Expand Down
16 changes: 1 addition & 15 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,6 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;

/* CPUs with isolated domains */
cpumask_var_t cpu_isolated_map;

/*
* __task_rq_lock - lock the rq @p resides on.
*/
Expand Down Expand Up @@ -5735,10 +5732,6 @@ static inline void sched_init_smt(void) { }

void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;

alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);

sched_init_numa();

/*
Expand All @@ -5748,16 +5741,12 @@ void __init sched_init_smp(void)
*/
mutex_lock(&sched_domains_mutex);
sched_init_domains(cpu_active_mask);
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
if (cpumask_empty(non_isolated_cpus))
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);

/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
BUG();
sched_init_granularity();
free_cpumask_var(non_isolated_cpus);

init_sched_rt_class();
init_sched_dl_class();
Expand Down Expand Up @@ -5961,9 +5950,6 @@ void __init sched_init(void)
calc_load_update = jiffies + LOAD_FREQ;

#ifdef CONFIG_SMP
/* May be allocated at isolcpus cmdline parse time */
if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
set_cpu_rq_start_time(smp_processor_id());
#endif
Expand Down
63 changes: 50 additions & 13 deletions kernel/sched/isolation.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,32 +63,69 @@ void __init housekeeping_init(void)
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}

#ifdef CONFIG_NO_HZ_FULL
static int __init housekeeping_nohz_full_setup(char *str)
static int __init housekeeping_setup(char *str, enum hk_flags flags)
{
cpumask_var_t non_housekeeping_mask;
int err;

alloc_bootmem_cpumask_var(&non_housekeeping_mask);
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
err = cpulist_parse(str, non_housekeeping_mask);
if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) {
pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}

alloc_bootmem_cpumask_var(&housekeeping_mask);
cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask);

if (cpumask_empty(housekeeping_mask))
cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
if (!housekeeping_flags) {
alloc_bootmem_cpumask_var(&housekeeping_mask);
cpumask_andnot(housekeeping_mask,
cpu_possible_mask, non_housekeeping_mask);
if (cpumask_empty(housekeeping_mask))
cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
} else {
cpumask_var_t tmp;

alloc_bootmem_cpumask_var(&tmp);
cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
if (!cpumask_equal(tmp, housekeeping_mask)) {
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
free_bootmem_cpumask_var(tmp);
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}
free_bootmem_cpumask_var(tmp);
}

housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER |
HK_FLAG_RCU | HK_FLAG_MISC;
if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
tick_nohz_full_setup(non_housekeeping_mask);
} else {
pr_warn("Housekeeping: nohz unsupported."
" Build with CONFIG_NO_HZ_FULL\n");
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}
}

tick_nohz_full_setup(non_housekeeping_mask);
housekeeping_flags |= flags;

free_bootmem_cpumask_var(non_housekeeping_mask);

return 1;
}

static int __init housekeeping_nohz_full_setup(char *str)
{
unsigned int flags;

flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;

return housekeeping_setup(str, flags);
}
__setup("nohz_full=", housekeeping_nohz_full_setup);
#endif

static int __init housekeeping_isolcpus_setup(char *str)
{
return housekeeping_setup(str, HK_FLAG_DOMAIN);
}
__setup("isolcpus=", housekeeping_isolcpus_setup);
24 changes: 6 additions & 18 deletions kernel/sched/topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*/
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/sched/isolation.h>

#include "sched.h"

Expand Down Expand Up @@ -469,21 +470,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
update_top_cache_domain(cpu);
}

/* Setup the mask of CPUs configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
{
int ret;

alloc_bootmem_cpumask_var(&cpu_isolated_map);
ret = cpulist_parse(str, cpu_isolated_map);
if (ret || cpumask_last(cpu_isolated_map) >= nr_cpu_ids) {
pr_err("sched: Error, all isolcpus= values must be between 0 and %u - ignoring them.\n", nr_cpu_ids-1);
return 0;
}
return 1;
}
__setup("isolcpus=", isolated_cpu_setup);

struct s_data {
struct sched_domain ** __percpu sd;
struct root_domain *rd;
Expand Down Expand Up @@ -1792,7 +1778,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
doms_cur = alloc_sched_domains(ndoms_cur);
if (!doms_cur)
doms_cur = &fallback_doms;
cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
err = build_sched_domains(doms_cur[0], NULL);
register_sched_domain_sysctl();

Expand Down Expand Up @@ -1875,7 +1861,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
doms_new = alloc_sched_domains(1);
if (doms_new) {
n = 1;
cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
cpumask_and(doms_new[0], cpu_active_mask,
housekeeping_cpumask(HK_FLAG_DOMAIN));
}
} else {
n = ndoms_new;
Expand All @@ -1898,7 +1885,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
if (!doms_new) {
n = 0;
doms_new = &fallback_doms;
cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
cpumask_and(doms_new[0], cpu_active_mask,
housekeeping_cpumask(HK_FLAG_DOMAIN));
}

/* Build new domains: */
Expand Down

0 comments on commit edb9382

Please sign in to comment.