Skip to content

Commit

Permalink
sched/tracing: Add a new tracepoint for sleeptime
Browse files Browse the repository at this point in the history
If CONFIG_SCHEDSTATS is defined, the kernel maintains
information about how long the task was sleeping or
in the case of iowait, blocking in the kernel before
getting woken up.

This will be useful for sleep time profiling.

Note: this information is only provided for sched_fair.
Other scheduling classes may choose to provide this in
the future.

Note: the delay includes the time spent on the runqueue
as well.

Signed-off-by: Arun Sharma <asharma@fb.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@fb.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Arun Sharma authored and Ingo Molnar committed Dec 23, 2011
1 parent 664dfa6 commit 1ac9bc6
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 2 deletions.
50 changes: 50 additions & 0 deletions include/trace/events/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);

#ifdef CREATE_TRACE_POINTS
static inline u64 trace_get_sleeptime(struct task_struct *tsk)
{
#ifdef CONFIG_SCHEDSTATS
u64 block, sleep;

block = tsk->se.statistics.block_start;
sleep = tsk->se.statistics.sleep_start;
tsk->se.statistics.block_start = 0;
tsk->se.statistics.sleep_start = 0;

return block ? block : sleep ? sleep : 0;
#else
return 0;
#endif
}
#endif

/*
* Tracepoint for accounting sleeptime (time the task is sleeping
* or waiting for I/O).
*/
TRACE_EVENT(sched_stat_sleeptime,

TP_PROTO(struct task_struct *tsk, u64 now),

TP_ARGS(tsk, now),

TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( u64, sleeptime )
),

TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->sleeptime = trace_get_sleeptime(tsk);
__entry->sleeptime = __entry->sleeptime ?
now - __entry->sleeptime : 0;
)
TP_perf_assign(
__perf_count(__entry->sleeptime);
),

TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
__entry->comm, __entry->pid,
(unsigned long long)__entry->sleeptime)
);

/*
* Tracepoint for showing priority inheritance modifying a tasks
* priority.
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1937,6 +1937,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
trace_sched_stat_sleeptime(current, rq->clock);

fire_sched_in_preempt_notifiers(current);
if (mm)
Expand Down
2 changes: 0 additions & 2 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(delta > se->statistics.sleep_max))
se->statistics.sleep_max = delta;

se->statistics.sleep_start = 0;
se->statistics.sum_sleep_runtime += delta;

if (tsk) {
Expand All @@ -1020,7 +1019,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(delta > se->statistics.block_max))
se->statistics.block_max = delta;

se->statistics.block_start = 0;
se->statistics.sum_sleep_runtime += delta;

if (tsk) {
Expand Down

0 comments on commit 1ac9bc6

Please sign in to comment.