Skip to content

Commit

Permalink
posixtimers, sched: Fix posix clock monotonicity
Browse files Browse the repository at this point in the history
Impact: Regression fix (against clock_gettime() backwarding bug)

This patch re-introduces a couple of functions, task_sched_runtime
and thread_group_sched_runtime, which was once removed at the
time of 2.6.28-rc1.

These functions protect the sampling of thread/process clock with
rq lock.  This rq lock is required not to update rq->clock during
the sampling.

i.e.
  The clock_gettime() may return
   ((accounted runtime before update) + (delta after update))
  that is less than what it should be.

v2 -> v3:
	- Rename static helper function __task_delta_exec()
	  to do_task_delta_exec() since -tip tree already has
	  a __task_delta_exec() of different version.

v1 -> v2:
	- Revises comments of function and patch description.
	- Add note about accuracy of thread group's runtime.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org	[2.6.28.x][2.6.29.x]
LKML-Reference: <49D1CC93.4080401@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Hidetoshi Seto authored and Ingo Molnar committed Apr 1, 2009
1 parent 13b8bd0 commit c5f8d99
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 11 deletions.
7 changes: 4 additions & 3 deletions kernel/posix-cpu-timers.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
cpu->cpu = virt_ticks(p);
break;
case CPUCLOCK_SCHED:
cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
cpu->sched = task_sched_runtime(p);
break;
}
return 0;
Expand All @@ -240,18 +240,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
{
struct task_cputime cputime;

thread_group_cputime(p, &cputime);
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
thread_group_cputime(p, &cputime);
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
break;
case CPUCLOCK_VIRT:
thread_group_cputime(p, &cputime);
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
cpu->sched = thread_group_sched_runtime(p);
break;
}
return 0;
Expand Down
65 changes: 57 additions & 8 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -4139,26 +4139,75 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
EXPORT_PER_CPU_SYMBOL(kstat);

/*
* Return any ns on the sched_clock that have not yet been banked in
* Return any ns on the sched_clock that have not yet been accounted in
* @p in case that task is currently running.
*
* Called with task_rq_lock() held on @rq.
*/
static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
{
u64 ns = 0;

if (task_current(rq, p)) {
update_rq_clock(rq);
ns = rq->clock - p->se.exec_start;
if ((s64)ns < 0)
ns = 0;
}

return ns;
}

unsigned long long task_delta_exec(struct task_struct *p)
{
unsigned long flags;
struct rq *rq;
u64 ns = 0;

rq = task_rq_lock(p, &flags);
ns = do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);

if (task_current(rq, p)) {
u64 delta_exec;
return ns;
}

update_rq_clock(rq);
delta_exec = rq->clock - p->se.exec_start;
if ((s64)delta_exec > 0)
ns = delta_exec;
}
/*
* Return accounted runtime for the task.
* In case the task is currently running, return the runtime plus current's
* pending runtime that have not been accounted yet.
*/
unsigned long long task_sched_runtime(struct task_struct *p)
{
unsigned long flags;
struct rq *rq;
u64 ns = 0;

rq = task_rq_lock(p, &flags);
ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);

return ns;
}

/*
* Return sum_exec_runtime for the thread group.
* In case the task is currently running, return the sum plus current's
* pending runtime that have not been accounted yet.
*
* Note that the thread group might have other running tasks as well,
* so the return value not includes other pending runtime that other
* running tasks might have.
*/
unsigned long long thread_group_sched_runtime(struct task_struct *p)
{
struct task_cputime totals;
unsigned long flags;
struct rq *rq;
u64 ns;

rq = task_rq_lock(p, &flags);
thread_group_cputime(p, &totals);
ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);

return ns;
Expand Down

0 comments on commit c5f8d99

Please sign in to comment.