Skip to content

Commit

Permalink
sched: introduce se->vruntime
Browse files Browse the repository at this point in the history
introduce se->vruntime as a sum of weighted delta-exec's, and use that
as the key into the tree.

the idea to use absolute virtual time as the basic metric of scheduling
has been first raised by William Lee Irwin, advanced by Tong Li and first
prototyped by Roman Zippel in the "Really Fair Scheduler" (RFS) patchset.

also see:

   http://lkml.org/lkml/2007/9/2/76

for a simpler variant of this patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
  • Loading branch information
Ingo Molnar committed Oct 15, 2007
1 parent 08e2388 commit e9acbff
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 33 deletions.
1 change: 1 addition & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,7 @@ struct sched_entity {

u64 exec_start;
u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime;
u64 wait_start_fair;
u64 sleep_start_fair;
Expand Down
1 change: 1 addition & 0 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ struct cfs_rq {

s64 fair_clock;
u64 exec_clock;
u64 min_vruntime;
s64 wait_runtime;
u64 sleeper_bonus;
unsigned long wait_runtime_overruns, wait_runtime_underruns;
Expand Down
81 changes: 48 additions & 33 deletions kernel/sched_fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,16 @@ unsigned int sysctl_sched_runtime_limit __read_mostly;
*/
enum {
SCHED_FEAT_FAIR_SLEEPERS = 1,
SCHED_FEAT_SLEEPER_AVG = 2,
SCHED_FEAT_SLEEPER_LOAD_AVG = 4,
SCHED_FEAT_START_DEBIT = 8,
SCHED_FEAT_SKIP_INITIAL = 16,
SCHED_FEAT_NEW_FAIR_SLEEPERS = 2,
SCHED_FEAT_SLEEPER_AVG = 4,
SCHED_FEAT_SLEEPER_LOAD_AVG = 8,
SCHED_FEAT_START_DEBIT = 16,
SCHED_FEAT_SKIP_INITIAL = 32,
};

const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_FAIR_SLEEPERS *1 |
SCHED_FEAT_FAIR_SLEEPERS *0 |
SCHED_FEAT_NEW_FAIR_SLEEPERS *1 |
SCHED_FEAT_SLEEPER_AVG *0 |
SCHED_FEAT_SLEEPER_LOAD_AVG *1 |
SCHED_FEAT_START_DEBIT *1 |
Expand Down Expand Up @@ -145,6 +147,19 @@ static inline struct task_struct *task_of(struct sched_entity *se)
* Scheduling class tree data structure manipulation methods:
*/

static inline void
set_leftmost(struct cfs_rq *cfs_rq, struct rb_node *leftmost)
{
struct sched_entity *se;

cfs_rq->rb_leftmost = leftmost;
if (leftmost) {
se = rb_entry(leftmost, struct sched_entity, run_node);
cfs_rq->min_vruntime = max(se->vruntime,
cfs_rq->min_vruntime);
}
}

/*
* Enqueue an entity into the rb-tree:
*/
Expand Down Expand Up @@ -180,7 +195,7 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* used):
*/
if (leftmost)
cfs_rq->rb_leftmost = &se->run_node;
set_leftmost(cfs_rq, &se->run_node);

rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
Expand All @@ -195,7 +210,8 @@ static void
__dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->rb_leftmost == &se->run_node)
cfs_rq->rb_leftmost = rb_next(&se->run_node);
set_leftmost(cfs_rq, rb_next(&se->run_node));

rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
update_load_sub(&cfs_rq->load, se->load.weight);
cfs_rq->nr_running--;
Expand Down Expand Up @@ -336,14 +352,20 @@ static inline void
__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
unsigned long delta_exec)
{
unsigned long delta, delta_fair, delta_mine;
unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
struct load_weight *lw = &cfs_rq->load;
unsigned long load = lw->weight;

schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));

curr->sum_exec_runtime += delta_exec;
cfs_rq->exec_clock += delta_exec;
delta_exec_weighted = delta_exec;
if (unlikely(curr->load.weight != NICE_0_LOAD)) {
delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
&curr->load);
}
curr->vruntime += delta_exec_weighted;

if (unlikely(!load))
return;
Expand Down Expand Up @@ -413,8 +435,6 @@ calc_weighted(unsigned long delta, struct sched_entity *se)
*/
static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
s64 key;

/*
* Are we enqueueing a waiting task? (for current tasks
* a dequeue/enqueue event is a NOP)
Expand All @@ -424,28 +444,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
/*
* Update the key:
*/
key = cfs_rq->fair_clock;

/*
* Optimize the common nice 0 case:
*/
if (likely(se->load.weight == NICE_0_LOAD)) {
key -= se->wait_runtime;
} else {
u64 tmp;

if (se->wait_runtime < 0) {
tmp = -se->wait_runtime;
key += (tmp * se->load.inv_weight) >>
(WMULT_SHIFT - NICE_0_SHIFT);
} else {
tmp = se->wait_runtime;
key -= (tmp * se->load.inv_weight) >>
(WMULT_SHIFT - NICE_0_SHIFT);
}
}

se->fair_key = key;
se->fair_key = se->vruntime;
}

/*
Expand Down Expand Up @@ -615,8 +614,22 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
*/
update_curr(cfs_rq);

if (wakeup)
if (wakeup) {
u64 min_runtime, latency;

min_runtime = cfs_rq->min_vruntime;
min_runtime += sysctl_sched_latency/2;

if (sched_feat(NEW_FAIR_SLEEPERS)) {
latency = calc_weighted(sysctl_sched_latency, se);
if (min_runtime > latency)
min_runtime -= latency;
}

se->vruntime = max(se->vruntime, min_runtime);

enqueue_sleeper(cfs_rq, se);
}

update_stats_enqueue(cfs_rq, se);
__enqueue_entity(cfs_rq, se);
Expand Down Expand Up @@ -1155,6 +1168,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
if (sched_feat(START_DEBIT))
se->wait_runtime = -(sched_granularity(cfs_rq) / 2);

se->vruntime = cfs_rq->min_vruntime;
update_stats_enqueue(cfs_rq, se);
__enqueue_entity(cfs_rq, se);
resched_task(rq->curr);
}
Expand Down

0 comments on commit e9acbff

Please sign in to comment.