Skip to content

Commit

Permalink
sched/rt: Make RT capacity-aware
Browse files Browse the repository at this point in the history
Capacity Awareness refers to the fact that on heterogeneous systems
(like Arm big.LITTLE), the capacity of the CPUs is not uniform, hence
when placing tasks we need to be aware of this difference of CPU
capacities.

In such scenarios we want to ensure that the selected CPU has enough
capacity to meet the requirement of the running task. Enough capacity
means here that capacity_orig_of(cpu) >= task.requirement.

The definition of task.requirement is dependent on the scheduling class.

For CFS, utilization is used to select a CPU that has >= capacity value
than the cfs_task.util.

	capacity_orig_of(cpu) >= cfs_task.util

DL isn't capacity aware at the moment but can make use of the bandwidth
reservation to implement that in a similar manner CFS uses utilization.
The following patchset implements that:

https://lore.kernel.org/lkml/20190506044836.2914-1-luca.abeni@santannapisa.it/

	capacity_orig_of(cpu)/SCHED_CAPACITY >= dl_deadline/dl_runtime

For RT we don't have a per task utilization signal and we lack any
information in general about what performance requirement the RT task
needs. But with the introduction of uclamp, RT tasks can now control
that by setting uclamp_min to guarantee a minimum performance point.

ATM the uclamp value are only used for frequency selection; but on
heterogeneous systems this is not enough and we need to ensure that the
capacity of the CPU is >= uclamp_min. Which is what implemented here.

	capacity_orig_of(cpu) >= rt_task.uclamp_min

Note that by default uclamp.min is 1024, which means that RT tasks will
always be biased towards the big CPUs, which make for a better more
predictable behavior for the default case.

Must stress that the bias acts as a hint rather than a definite
placement strategy. For example, if all big cores are busy executing
other RT tasks we can't guarantee that a new RT task will be placed
there.

On non-heterogeneous systems the original behavior of RT should be
retained. Similarly if uclamp is not selected in the config.

[ mingo: Minor edits to comments. ]

Signed-off-by: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20191009104611.15363-1-qais.yousef@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Qais Yousef authored and Ingo Molnar committed Dec 25, 2019
1 parent 1d42509 commit 804d402
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 18 deletions.
25 changes: 23 additions & 2 deletions kernel/sched/cpupri.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ static int convert_prio(int prio)
* @cp: The cpupri context
* @p: The task
* @lowest_mask: A mask to fill in with selected CPUs (or NULL)
* @fitness_fn: A pointer to a function to do custom checks whether the CPU
* fits a specific criteria so that we only return those CPUs.
*
* Note: This function returns the recommended CPUs as calculated during the
* current invocation. By the time the call returns, the CPUs may have in
Expand All @@ -57,7 +59,8 @@ static int convert_prio(int prio)
* Return: (int)bool - CPUs were found
*/
int cpupri_find(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask)
struct cpumask *lowest_mask,
bool (*fitness_fn)(struct task_struct *p, int cpu))
{
int idx = 0;
int task_pri = convert_prio(p->prio);
Expand Down Expand Up @@ -98,6 +101,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
continue;

if (lowest_mask) {
int cpu;

cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);

/*
Expand All @@ -108,7 +113,23 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
* condition, simply act as though we never hit this
* priority level and continue on.
*/
if (cpumask_any(lowest_mask) >= nr_cpu_ids)
if (cpumask_empty(lowest_mask))
continue;

if (!fitness_fn)
return 1;

/* Ensure the capacity of the CPUs fit the task */
for_each_cpu(cpu, lowest_mask) {
if (!fitness_fn(p, cpu))
cpumask_clear_cpu(cpu, lowest_mask);
}

/*
* If no CPU at the current priority can fit the task
* continue looking
*/
if (cpumask_empty(lowest_mask))
continue;
}

Expand Down
4 changes: 3 additions & 1 deletion kernel/sched/cpupri.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ struct cpupri {
};

#ifdef CONFIG_SMP
int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask);
int cpupri_find(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask,
bool (*fitness_fn)(struct task_struct *p, int cpu));
void cpupri_set(struct cpupri *cp, int cpu, int pri);
int cpupri_init(struct cpupri *cp);
void cpupri_cleanup(struct cpupri *cp);
Expand Down
83 changes: 68 additions & 15 deletions kernel/sched/rt.c
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,45 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
return rt_se->on_rq;
}

#ifdef CONFIG_UCLAMP_TASK
/*
* Verify the fitness of task @p to run on @cpu taking into account the uclamp
* settings.
*
* This check is only important for heterogeneous systems where uclamp_min value
* is higher than the capacity of a @cpu. For non-heterogeneous system this
* function will always return true.
*
* The function will return true if the capacity of the @cpu is >= the
* uclamp_min and false otherwise.
*
* Note that uclamp_min will be clamped to uclamp_max if uclamp_min
* > uclamp_max.
*/
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned int min_cap;
unsigned int max_cap;
unsigned int cpu_cap;

/* Only heterogeneous systems can benefit from this check */
if (!static_branch_unlikely(&sched_asym_cpucapacity))
return true;

min_cap = uclamp_eff_value(p, UCLAMP_MIN);
max_cap = uclamp_eff_value(p, UCLAMP_MAX);

cpu_cap = capacity_orig_of(cpu);

return cpu_cap >= min(min_cap, max_cap);
}
#else
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
#endif

#ifdef CONFIG_RT_GROUP_SCHED

static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
Expand Down Expand Up @@ -1391,6 +1430,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
bool test;

/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
Expand Down Expand Up @@ -1422,10 +1462,16 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*
* We take into account the capacity of the CPU to ensure it fits the
* requirement of the task - which is only important on heterogeneous
* systems like big.LITTLE.
*/
if (curr && unlikely(rt_task(curr)) &&
(curr->nr_cpus_allowed < 2 ||
curr->prio <= p->prio)) {
test = curr &&
unlikely(rt_task(curr)) &&
(curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);

if (test || !rt_task_fits_capacity(p, cpu)) {
int target = find_lowest_rq(p);

/*
Expand All @@ -1449,15 +1495,15 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
* let's hope p can move out.
*/
if (rq->curr->nr_cpus_allowed == 1 ||
!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
!cpupri_find(&rq->rd->cpupri, rq->curr, NULL, NULL))
return;

/*
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
*/
if (p->nr_cpus_allowed != 1
&& cpupri_find(&rq->rd->cpupri, p, NULL))
if (p->nr_cpus_allowed != 1 &&
cpupri_find(&rq->rd->cpupri, p, NULL, NULL))
return;

/*
Expand Down Expand Up @@ -1601,7 +1647,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
cpumask_test_cpu(cpu, p->cpus_ptr))
cpumask_test_cpu(cpu, p->cpus_ptr) &&
rt_task_fits_capacity(p, cpu))
return 1;

return 0;
Expand Down Expand Up @@ -1643,7 +1690,8 @@ static int find_lowest_rq(struct task_struct *task)
if (task->nr_cpus_allowed == 1)
return -1; /* No other targets possible */

if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask,
rt_task_fits_capacity))
return -1; /* No targets found */

/*
Expand Down Expand Up @@ -2147,12 +2195,14 @@ static void pull_rt_task(struct rq *this_rq)
*/
static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) &&
(rq->curr->nr_cpus_allowed < 2 ||
rq->curr->prio <= p->prio))
bool need_to_push = !task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) &&
(rq->curr->nr_cpus_allowed < 2 ||
rq->curr->prio <= p->prio);

if (need_to_push || !rt_task_fits_capacity(p, cpu_of(rq)))
push_rt_tasks(rq);
}

Expand Down Expand Up @@ -2224,7 +2274,10 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
*/
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
bool need_to_push = rq->rt.overloaded ||
!rt_task_fits_capacity(p, cpu_of(rq));

if (p->nr_cpus_allowed > 1 && need_to_push)
rt_queue_push_tasks(rq);
#endif /* CONFIG_SMP */
if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
Expand Down

0 comments on commit 804d402

Please sign in to comment.