diff --git a/[refs] b/[refs] index d19afe05d3db..8624b850a115 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 5beda5f6e4e4523e8dbe596bf163a01b45776808 +refs/heads/master: 5c35d69fb60b1dc49595f5b9a2c7158283e9eaf3 diff --git a/trunk/Documentation/trace/ftrace-design.txt b/trunk/Documentation/trace/ftrace-design.txt index 79fcafc7fd64..dc52bd442c92 100644 --- a/trunk/Documentation/trace/ftrace-design.txt +++ b/trunk/Documentation/trace/ftrace-design.txt @@ -247,13 +247,6 @@ You need very few things to get the syscalls tracing in an arch. - Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. -- If the system call table on this arch is more complicated than a simple array - of addresses of the system calls, implement an arch_syscall_addr to return - the address of a given system call. -- If the symbol names of the system calls do not match the function names on - this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and - implement arch_syscall_match_sym_name with the appropriate logic to return - true if the function name corresponds with the symbol name. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/trunk/Documentation/trace/ftrace.txt b/trunk/Documentation/trace/ftrace.txt index 67f1cc473257..557c1edeccaf 100644 --- a/trunk/Documentation/trace/ftrace.txt +++ b/trunk/Documentation/trace/ftrace.txt @@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files: tracers listed here can be configured by echoing their name into current_tracer. - tracing_on: + tracing_enabled: - This sets or displays whether writing to the trace - ring buffer is enabled. Echo 0 into this file to disable - the tracer or 1 to enable it. + This sets or displays whether the current_tracer + is activated and tracing or not. Echo 0 into this + file to disable the tracer or 1 to enable it. trace: @@ -202,6 +202,10 @@ Here is the list of current tracers that may be configured. to draw a graph of function calls similar to C code source. + "sched_switch" + + Traces the context switches and wakeups between tasks. + "irqsoff" Traces the areas that disable interrupts and saves @@ -269,6 +273,39 @@ format, the function name that was traced "path_put" and the parent function that called this function "path_walk". The timestamp is the time at which the function was entered. +The sched_switch tracer also includes tracing of task wakeups +and context switches. + + ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S + ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S + ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R + events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R + kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R + ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R + +Wake ups are represented by a "+" and the context switches are +shown as "==>". The format is: + + Context switches: + + Previous task Next Task + + :: ==> :: + + Wake ups: + + Current task Task waking up + + :: + :: + +The prio is the internal kernel priority, which is the inverse +of the priority that is usually displayed by user-space tools. +Zero represents the highest priority (99). Prio 100 starts the +"nice" priorities with 100 being equal to nice -20 and 139 being +nice 19. The prio "140" is reserved for the idle task which is +the lowest priority thread (pid 0). + + Latency trace format -------------------- @@ -454,6 +491,79 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] latencies, as described in "Latency trace format". +sched_switch +------------ + +This tracer simply records schedule switches. Here is an example +of how to use it. + + # echo sched_switch > current_tracer + # echo 1 > tracing_enabled + # sleep 1 + # echo 0 > tracing_enabled + # cat trace + +# tracer: sched_switch +# +# TASK-PID CPU# TIMESTAMP FUNCTION +# | | | | | + bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R + bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R + sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R + bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S + bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R + sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R + bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D + bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R + -0 [00] 240.132589: 0:140:R + 4:115:S + -0 [00] 240.132591: 0:140:R ==> 4:115:R + ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R + -0 [00] 240.132598: 0:140:R + 4:115:S + -0 [00] 240.132599: 0:140:R ==> 4:115:R + ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R + sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R + [...] + + +As we have discussed previously about this format, the header +shows the name of the trace and points to the options. The +"FUNCTION" is a misnomer since here it represents the wake ups +and context switches. + +The sched_switch file only lists the wake ups (represented with +'+') and context switches ('==>') with the previous task or +current task first followed by the next task or task waking up. +The format for both of these is PID:KERNEL-PRIO:TASK-STATE. +Remember that the KERNEL-PRIO is the inverse of the actual +priority with zero (0) being the highest priority and the nice +values starting at 100 (nice -20). Below is a quick chart to map +the kernel priority to user land priorities. + + Kernel Space User Space + =============================================================== + 0(high) to 98(low) user RT priority 99(high) to 1(low) + with SCHED_RR or SCHED_FIFO + --------------------------------------------------------------- + 99 sched_priority is not used in scheduling + decisions(it must be specified as 0) + --------------------------------------------------------------- + 100(high) to 139(low) user nice -20(high) to 19(low) + --------------------------------------------------------------- + 140 idle task priority + --------------------------------------------------------------- + +The task states are: + + R - running : wants to run, may not actually be running + S - sleep : process is waiting to be woken up (handles signals) + D - disk sleep (uninterruptible sleep) : process must be woken up + (ignores signals) + T - stopped : process suspended + t - traced : process is being traced (with something like gdb) + Z - zombie : process waiting to be cleaned up + X - unknown + + ftrace_enabled -------------- @@ -497,10 +607,10 @@ an example: # echo irqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_on + # echo 1 > tracing_enabled # ls -ltr [...] - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: irqsoff # @@ -605,10 +715,10 @@ is much like the irqsoff tracer. # echo preemptoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_on + # echo 1 > tracing_enabled # ls -ltr [...] - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: preemptoff # @@ -753,10 +863,10 @@ tracers. # echo preemptirqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_on + # echo 1 > tracing_enabled # ls -ltr [...] - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: preemptirqsoff # @@ -916,9 +1026,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under # echo wakeup > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_on + # echo 1 > tracing_enabled # chrt -f 5 sleep 1 - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: wakeup # @@ -1030,9 +1140,9 @@ ftrace_enabled is set; otherwise this tracer is a nop. # sysctl kernel.ftrace_enabled=1 # echo function > current_tracer - # echo 1 > tracing_on + # echo 1 > tracing_enabled # usleep 1 - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: function # @@ -1070,7 +1180,7 @@ int trace_fd; [...] int main(int argc, char *argv[]) { [...] - trace_fd = open(tracing_file("tracing_on"), O_WRONLY); + trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); [...] if (condition_hit()) { write(trace_fd, "0", 1); @@ -1521,9 +1631,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt: # echo sys_nanosleep hrtimer_interrupt \ > set_ftrace_filter # echo function > current_tracer - # echo 1 > tracing_on + # echo 1 > tracing_enabled # usleep 1 - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: ftrace # @@ -1769,9 +1879,9 @@ different. The trace is live. # echo function > current_tracer # cat trace_pipe > /tmp/trace.out & [1] 4153 - # echo 1 > tracing_on + # echo 1 > tracing_enabled # usleep 1 - # echo 0 > tracing_on + # echo 0 > tracing_enabled # cat trace # tracer: function # diff --git a/trunk/include/linux/syscalls.h b/trunk/include/linux/syscalls.h index a17fcea2ca58..98664db1be47 100644 --- a/trunk/include/linux/syscalls.h +++ b/trunk/include/linux/syscalls.h @@ -132,11 +132,11 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) \ - *__event_enter_##sname = &event_enter_##sname; + *__event_enter_##sname = &event_enter_##sname; \ + __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ @@ -146,11 +146,11 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) \ - *__event_exit_##sname = &event_exit_##sname; + *__event_exit_##sname = &event_exit_##sname; \ + __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ @@ -158,7 +158,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __syscall_meta_##sname = { \ .name = "sys"#sname, \ - .syscall_nr = -1, /* Filled in at boot */ \ .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ @@ -176,7 +175,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __syscall_meta__##sname = { \ .name = "sys_"#sname, \ - .syscall_nr = -1, /* Filled in at boot */ \ .nb_args = 0, \ .enter_event = &event_enter__##sname, \ .exit_event = &event_exit__##sname, \ diff --git a/trunk/kernel/trace/ring_buffer.c b/trunk/kernel/trace/ring_buffer.c index 7739893a1d0a..bd1c35a4fbcc 100644 --- a/trunk/kernel/trace/ring_buffer.c +++ b/trunk/kernel/trace/ring_buffer.c @@ -2163,14 +2163,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp, - sched_clock_stable ? "" : - "If you just came from a suspend/resume,\n" - "please switch to the trace global clock:\n" - " echo global > /sys/kernel/debug/tracing/trace_clock\n"); + (unsigned long long)cpu_buffer->write_stamp); add_timestamp = 1; } } diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c index 8dc8da6733f9..dc53ecb80589 100644 --- a/trunk/kernel/trace/trace.c +++ b/trunk/kernel/trace/trace.c @@ -2710,10 +2710,6 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); if (tracer_enabled ^ val) { - - /* Only need to warn if this is used to change the state */ - WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); - if (val) { tracer_enabled = 1; if (current_trace->start) diff --git a/trunk/kernel/trace/trace_sched_switch.c b/trunk/kernel/trace/trace_sched_switch.c index 7e62c0a18456..8f758d070c43 100644 --- a/trunk/kernel/trace/trace_sched_switch.c +++ b/trunk/kernel/trace/trace_sched_switch.c @@ -247,3 +247,51 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr) ctx_trace = tr; } +static void stop_sched_trace(struct trace_array *tr) +{ + tracing_stop_sched_switch_record(); +} + +static int sched_switch_trace_init(struct trace_array *tr) +{ + ctx_trace = tr; + tracing_reset_online_cpus(tr); + tracing_start_sched_switch_record(); + return 0; +} + +static void sched_switch_trace_reset(struct trace_array *tr) +{ + if (sched_ref) + stop_sched_trace(tr); +} + +static void sched_switch_trace_start(struct trace_array *tr) +{ + sched_stopped = 0; +} + +static void sched_switch_trace_stop(struct trace_array *tr) +{ + sched_stopped = 1; +} + +static struct tracer sched_switch_trace __read_mostly = +{ + .name = "sched_switch", + .init = sched_switch_trace_init, + .reset = sched_switch_trace_reset, + .start = sched_switch_trace_start, + .stop = sched_switch_trace_stop, + .wait_pipe = poll_wait_pipe, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_sched_switch, +#endif +}; + +__init static int init_sched_switch_trace(void) +{ + return register_tracer(&sched_switch_trace); +} +device_initcall(init_sched_switch_trace); + diff --git a/trunk/kernel/trace/trace_syscalls.c b/trunk/kernel/trace/trace_syscalls.c index ee7b5a0bb9f8..5c9fe08d2093 100644 --- a/trunk/kernel/trace/trace_syscalls.c +++ b/trunk/kernel/trace/trace_syscalls.c @@ -60,19 +60,6 @@ extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; -#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) -{ - /* - * Only compare after the "sys" prefix. Archs that use - * syscall wrappers may have syscalls symbols aliases prefixed - * with "SyS" instead of "sys", leading to an unwanted - * mismatch. - */ - return !strcmp(sym + 3, name + 3); -} -#endif - static __init struct syscall_metadata * find_syscall_meta(unsigned long syscall) { @@ -85,11 +72,14 @@ find_syscall_meta(unsigned long syscall) stop = __stop_syscalls_metadata; kallsyms_lookup(syscall, NULL, NULL, NULL, str); - if (arch_syscall_match_sym_name(str, "sys_ni_syscall")) - return NULL; - for ( ; start < stop; start++) { - if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) return *start; } return NULL; @@ -369,7 +359,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) @@ -387,7 +377,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); sys_refcount_enter--; @@ -403,7 +393,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) @@ -421,7 +411,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); sys_refcount_exit--; @@ -434,14 +424,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) int init_syscall_trace(struct ftrace_event_call *call) { int id; - int num; - - num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (num < 0 || num >= NR_syscalls) { - pr_debug("syscall %s metadata not mapped, disabling ftrace event\n", - ((struct syscall_metadata *)call->data)->name); - return -ENOSYS; - } if (set_syscall_print_fmt(call) < 0) return -ENOMEM; @@ -456,7 +438,7 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } -unsigned long __init __weak arch_syscall_addr(int nr) +unsigned long __init arch_syscall_addr(int nr) { return (unsigned long)sys_call_table[nr]; } diff --git a/trunk/scripts/kconfig/streamline_config.pl b/trunk/scripts/kconfig/streamline_config.pl index a4fe923c0131..fd81fc33d633 100644 --- a/trunk/scripts/kconfig/streamline_config.pl +++ b/trunk/scripts/kconfig/streamline_config.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w # -# Copyright 2005-2009 - Steven Rostedt +# Copywrite 2005-2009 - Steven Rostedt # Licensed under the terms of the GNU GPL License version 2 # # It's simple enough to figure out how this works. diff --git a/trunk/tools/perf/Makefile b/trunk/tools/perf/Makefile index bc4d9bf8a556..ffd1047cd93b 100644 --- a/trunk/tools/perf/Makefile +++ b/trunk/tools/perf/Makefile @@ -637,6 +637,7 @@ else LIB_H += util/ui/libslang.h LIB_H += util/ui/progress.h LIB_H += util/ui/util.h + LIB_H += util/ui/ui.h endif endif diff --git a/trunk/tools/perf/util/ui/browser.c b/trunk/tools/perf/util/ui/browser.c index 8bc010edca25..60d6c815e1db 100644 --- a/trunk/tools/perf/util/ui/browser.c +++ b/trunk/tools/perf/util/ui/browser.c @@ -1,4 +1,5 @@ #include "libslang.h" +#include "ui.h" #include #include #include @@ -178,6 +179,7 @@ int ui_browser__show(struct ui_browser *self, const char *title, if (self->sb == NULL) return -1; + pthread_mutex_lock(&ui__lock); SLsmg_gotorc(0, 0); ui_browser__set_color(self, NEWT_COLORSET_ROOT); slsmg_write_nstring(title, self->width); @@ -188,25 +190,30 @@ int ui_browser__show(struct ui_browser *self, const char *title, va_start(ap, helpline); ui_helpline__vpush(helpline, ap); va_end(ap); + pthread_mutex_unlock(&ui__lock); return 0; } void ui_browser__hide(struct ui_browser *self) { + pthread_mutex_lock(&ui__lock); newtFormDestroy(self->form); self->form = NULL; ui_helpline__pop(); + pthread_mutex_unlock(&ui__lock); } int ui_browser__refresh(struct ui_browser *self) { int row; + pthread_mutex_lock(&ui__lock); newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); row = self->refresh(self); ui_browser__set_color(self, HE_COLORSET_NORMAL); SLsmg_fill_region(self->y + row, self->x, self->height - row, self->width, ' '); + pthread_mutex_unlock(&ui__lock); return 0; } diff --git a/trunk/tools/perf/util/ui/helpline.c b/trunk/tools/perf/util/ui/helpline.c index 8d79daa4458a..f36d2ff509ed 100644 --- a/trunk/tools/perf/util/ui/helpline.c +++ b/trunk/tools/perf/util/ui/helpline.c @@ -5,6 +5,7 @@ #include "../debug.h" #include "helpline.h" +#include "ui.h" void ui_helpline__pop(void) { @@ -55,7 +56,8 @@ int ui_helpline__show_help(const char *format, va_list ap) int ret; static int backlog; - ret = vsnprintf(ui_helpline__last_msg + backlog, + pthread_mutex_lock(&ui__lock); + ret = vsnprintf(ui_helpline__last_msg + backlog, sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; @@ -64,6 +66,7 @@ int ui_helpline__show_help(const char *format, va_list ap) newtRefresh(); backlog = 0; } + pthread_mutex_unlock(&ui__lock); return ret; } diff --git a/trunk/tools/perf/util/ui/setup.c b/trunk/tools/perf/util/ui/setup.c index fbf1a145492f..ee46d671db59 100644 --- a/trunk/tools/perf/util/ui/setup.c +++ b/trunk/tools/perf/util/ui/setup.c @@ -6,6 +6,9 @@ #include "../debug.h" #include "browser.h" #include "helpline.h" +#include "ui.h" + +pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; static void newt_suspend(void *d __used) { diff --git a/trunk/tools/perf/util/ui/ui.h b/trunk/tools/perf/util/ui/ui.h new file mode 100644 index 000000000000..d264e059c829 --- /dev/null +++ b/trunk/tools/perf/util/ui/ui.h @@ -0,0 +1,8 @@ +#ifndef _PERF_UI_H_ +#define _PERF_UI_H_ 1 + +#include + +extern pthread_mutex_t ui__lock; + +#endif /* _PERF_UI_H_ */ diff --git a/trunk/tools/testing/ktest/ktest.pl b/trunk/tools/testing/ktest/ktest.pl index ba7c63af6f3b..e1c62eeb88f5 100755 --- a/trunk/tools/testing/ktest/ktest.pl +++ b/trunk/tools/testing/ktest/ktest.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w # -# Copyright 2010 - Steven Rostedt , Red Hat Inc. +# Copywrite 2010 - Steven Rostedt , Red Hat Inc. # Licensed under the terms of the GNU GPL License version 2 #