Skip to content

Commit

Permalink
[PATCH] sched: improve migration accuracy
Browse files Browse the repository at this point in the history
Co-opt rq->timestamp_last_tick to maintain a cache_hot_time evaluation
reference timestamp at both tick and sched times to prevent said reference,
formerly rq->timestamp_last_tick, from being behind task->last_ran at
evaluation time, and to move said reference closer to current time on the
remote processor, intent being to improve cache hot evaluation and
timestamp adjustment accuracy for task migration.

Fix minor sched_time double accounting error which occurs when a task
passing through schedule() does not schedule off, and takes the next timer
tick.

[kenneth.w.chen@intel.com: cleanup]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Don Mullis <dwm@meer.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Mike Galbraith authored and Linus Torvalds committed Dec 10, 2006
1 parent 08c183f commit b18ec80
Showing 1 changed file with 20 additions and 21 deletions.
41 changes: 20 additions & 21 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ struct rq {
unsigned long nr_uninterruptible;

unsigned long expired_timestamp;
unsigned long long timestamp_last_tick;
/* Cached timestamp set by update_cpu_clock() */
unsigned long long most_recent_timestamp;
struct task_struct *curr, *idle;
unsigned long next_balance;
struct mm_struct *prev_mm;
Expand Down Expand Up @@ -944,8 +945,8 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
if (!local) {
/* Compensate for drifting sched_clock */
struct rq *this_rq = this_rq();
now = (now - this_rq->timestamp_last_tick)
+ rq->timestamp_last_tick;
now = (now - this_rq->most_recent_timestamp)
+ rq->most_recent_timestamp;
}
#endif

Expand Down Expand Up @@ -1689,8 +1690,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
* Not the local CPU - must adjust timestamp. This should
* get optimised away in the !CONFIG_SMP case.
*/
p->timestamp = (p->timestamp - this_rq->timestamp_last_tick)
+ rq->timestamp_last_tick;
p->timestamp = (p->timestamp - this_rq->most_recent_timestamp)
+ rq->most_recent_timestamp;
__activate_task(p, rq);
if (TASK_PREEMPTS_CURR(p, rq))
resched_task(rq->curr);
Expand Down Expand Up @@ -2068,8 +2069,8 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array,
set_task_cpu(p, this_cpu);
inc_nr_running(p, this_rq);
enqueue_task(p, this_array);
p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ this_rq->timestamp_last_tick;
p->timestamp = (p->timestamp - src_rq->most_recent_timestamp)
+ this_rq->most_recent_timestamp;
/*
* Note that idle threads have a prio of MAX_PRIO, for this test
* to be always true for them.
Expand Down Expand Up @@ -2105,10 +2106,15 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
* 2) too many balance attempts have failed.
*/

if (sd->nr_balance_failed > sd->cache_nice_tries)
if (sd->nr_balance_failed > sd->cache_nice_tries) {
#ifdef CONFIG_SCHEDSTATS
if (task_hot(p, rq->most_recent_timestamp, sd))
schedstat_inc(sd, lb_hot_gained[idle]);
#endif
return 1;
}

if (task_hot(p, rq->timestamp_last_tick, sd))
if (task_hot(p, rq->most_recent_timestamp, sd))
return 0;
return 1;
}
Expand Down Expand Up @@ -2206,11 +2212,6 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
goto skip_bitmap;
}

#ifdef CONFIG_SCHEDSTATS
if (task_hot(tmp, busiest->timestamp_last_tick, sd))
schedstat_inc(sd, lb_hot_gained[idle]);
#endif

pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
pulled++;
rem_load_move -= tmp->load_weight;
Expand Down Expand Up @@ -2971,7 +2972,8 @@ EXPORT_PER_CPU_SYMBOL(kstat);
static inline void
update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
{
p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
p->sched_time += now - p->last_ran;
p->last_ran = rq->most_recent_timestamp = now;
}

/*
Expand All @@ -2984,8 +2986,7 @@ unsigned long long current_sched_time(const struct task_struct *p)
unsigned long flags;

local_irq_save(flags);
ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
ns = p->sched_time + sched_clock() - ns;
ns = p->sched_time + sched_clock() - p->last_ran;
local_irq_restore(flags);

return ns;
Expand Down Expand Up @@ -3176,8 +3177,6 @@ void scheduler_tick(void)

update_cpu_clock(p, rq, now);

rq->timestamp_last_tick = now;

if (p == rq->idle)
/* Task on the idle queue */
wake_priority_sleeper(rq);
Expand Down Expand Up @@ -5032,8 +5031,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
* afterwards, and pretending it was a local activate.
* This way is cleaner and logically correct.
*/
p->timestamp = p->timestamp - rq_src->timestamp_last_tick
+ rq_dest->timestamp_last_tick;
p->timestamp = p->timestamp - rq_src->most_recent_timestamp
+ rq_dest->most_recent_timestamp;
deactivate_task(p, rq_src);
__activate_task(p, rq_dest);
if (TASK_PREEMPTS_CURR(p, rq_dest))
Expand Down

0 comments on commit b18ec80

Please sign in to comment.