Skip to content

Commit

Permalink
sched/deadline: speed up SCHED_DEADLINE pushes with a push-heap
Browse files Browse the repository at this point in the history
Data from tests confirmed that the original active load balancing
logic didn't scale neither in the number of CPU nor in the number of
tasks (as sched_rt does).

Here we provide a global data structure to keep track of deadlines
of the running tasks in the system. The structure is composed by
a bitmask showing the free CPUs and a max-heap, needed when the system
is heavily loaded.

The implementation and concurrent access scheme are kept simple by
design. However, our measurements show that we can compete with sched_rt
on large multi-CPUs machines [1].

Only the push path is addressed, the extension to use this structure
also for pull decisions is straightforward. However, we are currently
evaluating different (in order to decrease/avoid contention) data
structures to solve possibly both problems. We are also going to re-run
tests considering recent changes inside cpupri [2].

 [1] http://retis.sssup.it/~jlelli/papers/Ospert11Lelli.pdf
 [2] http://www.spinics.net/lists/linux-rt-users/msg06778.html

Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-14-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Juri Lelli authored and Ingo Molnar committed Jan 13, 2014
1 parent 332ac17 commit 6bfd6d7
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 40 deletions.
2 changes: 1 addition & 1 deletion kernel/sched/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ endif
obj-y += core.o proc.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o completion.o
obj-$(CONFIG_SMP) += cpupri.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
Expand Down
3 changes: 3 additions & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -5287,6 +5287,7 @@ static void free_rootdomain(struct rcu_head *rcu)
struct root_domain *rd = container_of(rcu, struct root_domain, rcu);

cpupri_cleanup(&rd->cpupri);
cpudl_cleanup(&rd->cpudl);
free_cpumask_var(rd->dlo_mask);
free_cpumask_var(rd->rto_mask);
free_cpumask_var(rd->online);
Expand Down Expand Up @@ -5345,6 +5346,8 @@ static int init_rootdomain(struct root_domain *rd)
goto free_dlo_mask;

init_dl_bw(&rd->dl_bw);
if (cpudl_init(&rd->cpudl) != 0)
goto free_dlo_mask;

if (cpupri_init(&rd->cpupri) != 0)
goto free_rto_mask;
Expand Down
216 changes: 216 additions & 0 deletions kernel/sched/cpudeadline.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* kernel/sched/cpudl.c
*
* Global CPU deadline management
*
* Author: Juri Lelli <j.lelli@sssup.it>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/

#include <linux/gfp.h>
#include <linux/kernel.h>
#include "cpudeadline.h"

static inline int parent(int i)
{
return (i - 1) >> 1;
}

static inline int left_child(int i)
{
return (i << 1) + 1;
}

static inline int right_child(int i)
{
return (i << 1) + 2;
}

static inline int dl_time_before(u64 a, u64 b)
{
return (s64)(a - b) < 0;
}

void cpudl_exchange(struct cpudl *cp, int a, int b)
{
int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;

swap(cp->elements[a], cp->elements[b]);
swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]);
}

void cpudl_heapify(struct cpudl *cp, int idx)
{
int l, r, largest;

/* adapted from lib/prio_heap.c */
while(1) {
l = left_child(idx);
r = right_child(idx);
largest = idx;

if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
cp->elements[l].dl))
largest = l;
if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
cp->elements[r].dl))
largest = r;
if (largest == idx)
break;

/* Push idx down the heap one level and bump one up */
cpudl_exchange(cp, largest, idx);
idx = largest;
}
}

void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
{
WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID);

if (dl_time_before(new_dl, cp->elements[idx].dl)) {
cp->elements[idx].dl = new_dl;
cpudl_heapify(cp, idx);
} else {
cp->elements[idx].dl = new_dl;
while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
cp->elements[idx].dl)) {
cpudl_exchange(cp, idx, parent(idx));
idx = parent(idx);
}
}
}

static inline int cpudl_maximum(struct cpudl *cp)
{
return cp->elements[0].cpu;
}

/*
* cpudl_find - find the best (later-dl) CPU in the system
* @cp: the cpudl max-heap context
* @p: the task
* @later_mask: a mask to fill in with the selected CPUs (or NULL)
*
* Returns: int - best CPU (heap maximum if suitable)
*/
int cpudl_find(struct cpudl *cp, struct task_struct *p,
struct cpumask *later_mask)
{
int best_cpu = -1;
const struct sched_dl_entity *dl_se = &p->dl;

if (later_mask && cpumask_and(later_mask, cp->free_cpus,
&p->cpus_allowed) && cpumask_and(later_mask,
later_mask, cpu_active_mask)) {
best_cpu = cpumask_any(later_mask);
goto out;
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
best_cpu = cpudl_maximum(cp);
if (later_mask)
cpumask_set_cpu(best_cpu, later_mask);
}

out:
WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1);

return best_cpu;
}

/*
* cpudl_set - update the cpudl max-heap
* @cp: the cpudl max-heap context
* @cpu: the target cpu
* @dl: the new earliest deadline for this cpu
*
* Notes: assumes cpu_rq(cpu)->lock is locked
*
* Returns: (void)
*/
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
{
int old_idx, new_cpu;
unsigned long flags;

WARN_ON(cpu > num_present_cpus());

raw_spin_lock_irqsave(&cp->lock, flags);
old_idx = cp->cpu_to_idx[cpu];
if (!is_valid) {
/* remove item */
if (old_idx == IDX_INVALID) {
/*
* Nothing to remove if old_idx was invalid.
* This could happen if a rq_offline_dl is
* called for a CPU without -dl tasks running.
*/
goto out;
}
new_cpu = cp->elements[cp->size - 1].cpu;
cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
cp->elements[old_idx].cpu = new_cpu;
cp->size--;
cp->cpu_to_idx[new_cpu] = old_idx;
cp->cpu_to_idx[cpu] = IDX_INVALID;
while (old_idx > 0 && dl_time_before(
cp->elements[parent(old_idx)].dl,
cp->elements[old_idx].dl)) {
cpudl_exchange(cp, old_idx, parent(old_idx));
old_idx = parent(old_idx);
}
cpumask_set_cpu(cpu, cp->free_cpus);
cpudl_heapify(cp, old_idx);

goto out;
}

if (old_idx == IDX_INVALID) {
cp->size++;
cp->elements[cp->size - 1].dl = 0;
cp->elements[cp->size - 1].cpu = cpu;
cp->cpu_to_idx[cpu] = cp->size - 1;
cpudl_change_key(cp, cp->size - 1, dl);
cpumask_clear_cpu(cpu, cp->free_cpus);
} else {
cpudl_change_key(cp, old_idx, dl);
}

out:
raw_spin_unlock_irqrestore(&cp->lock, flags);
}

/*
* cpudl_init - initialize the cpudl structure
* @cp: the cpudl max-heap context
*/
int cpudl_init(struct cpudl *cp)
{
int i;

memset(cp, 0, sizeof(*cp));
raw_spin_lock_init(&cp->lock);
cp->size = 0;
for (i = 0; i < NR_CPUS; i++)
cp->cpu_to_idx[i] = IDX_INVALID;
if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL))
return -ENOMEM;
cpumask_setall(cp->free_cpus);

return 0;
}

/*
* cpudl_cleanup - clean up the cpudl structure
* @cp: the cpudl max-heap context
*/
void cpudl_cleanup(struct cpudl *cp)
{
/*
* nothing to do for the moment
*/
}
33 changes: 33 additions & 0 deletions kernel/sched/cpudeadline.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#ifndef _LINUX_CPUDL_H
#define _LINUX_CPUDL_H

#include <linux/sched.h>

#define IDX_INVALID -1

struct array_item {
u64 dl;
int cpu;
};

struct cpudl {
raw_spinlock_t lock;
int size;
int cpu_to_idx[NR_CPUS];
struct array_item elements[NR_CPUS];
cpumask_var_t free_cpus;
};


#ifdef CONFIG_SMP
int cpudl_find(struct cpudl *cp, struct task_struct *p,
struct cpumask *later_mask);
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
int cpudl_init(struct cpudl *cp);
void cpudl_cleanup(struct cpudl *cp);
#else
#define cpudl_set(cp, cpu, dl) do { } while (0)
#define cpudl_init() do { } while (0)
#endif /* CONFIG_SMP */

#endif /* _LINUX_CPUDL_H */
Loading

0 comments on commit 6bfd6d7

Please sign in to comment.