From c564f0db92b7f8d734ce530e42a540e12ae3d583 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 4 May 2017 22:58:14 +0800 Subject: [PATCH 01/23] perf report: Remove unnecessary check in annotate_browser_write() In annotate_browser_write(), if (dl->offset != -1 && percent_max != 0.0) { if (percent_max != 0.0) { ... } ... } The second check of (percent_max != 0.0) is not necessary, remove it. Signed-off-by: Yao Jin Acked-by: Milian Wolff Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/1493909895-9668-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d990ad08a3c69..52c1e8d672b5c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -132,21 +132,17 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (dl->offset != -1 && percent_max != 0.0) { - if (percent_max != 0.0) { - for (i = 0; i < ab->nr_events; i++) { - ui_browser__set_percent_color(browser, - bdl->samples[i].percent, - current_entry); - if (annotate_browser__opts.show_total_period) { - ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].nr); - } else { - ui_browser__printf(browser, "%6.2f ", - bdl->samples[i].percent); - } + for (i = 0; i < ab->nr_events; i++) { + ui_browser__set_percent_color(browser, + bdl->samples[i].percent, + current_entry); + if (annotate_browser__opts.show_total_period) { + ui_browser__printf(browser, "%6" PRIu64 " ", + bdl->samples[i].nr); + } else { + ui_browser__printf(browser, "%6.2f ", + bdl->samples[i].percent); } - } else { - ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); } } else { ui_browser__set_percent_color(browser, 0, current_entry); From ec27ae1892f7f8119ce82535ffcc2889ea3bb3d8 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 4 May 2017 22:58:15 +0800 Subject: [PATCH 02/23] perf annotate browser: Display titles in left frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The annotate browser is divided into 2 frames. Left frame contains 3 columns (some platforms only have one column). For example: │26 int compute_flag() │27 { 22.80 1.20 │ sub $0x8,%rsp │25 int i; │ │27 i = rand() % 2; 22.78 1.20 1 │ → callq rand@plt While it's hard for user to understand what the data is. This patch adds the titles "Percent", "IPC" and "Cycle" on columns. Percent IPC Cycle │ │25 __attribute__((noinline)) │26 int compute_flag() │27 { 22.80 1.20 │ sub $0x8,%rsp │25 int i; │ │27 i = rand() % 2; 22.78 1.20 1 │ → callq rand@plt The titles are displayed at row 0 of annotate browser if row 0 doesn't have values of percent, ipc and cycle. Signed-off-by: Yao Jin Acked-by: Milian Wolff Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/1493909895-9668-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 52c1e8d672b5c..7a03389b7a03b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -125,12 +125,21 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int int i, pcnt_width = annotate_browser__pcnt_width(ab); double percent_max = 0.0; char bf[256]; + bool show_title = false; for (i = 0; i < ab->nr_events; i++) { if (bdl->samples[i].percent > percent_max) percent_max = bdl->samples[i].percent; } + if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) { + if (ab->have_cycles) { + if (dl->ipc == 0.0 && dl->cycles == 0) + show_title = true; + } else + show_title = true; + } + if (dl->offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, @@ -146,18 +155,27 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } else { ui_browser__set_percent_color(browser, 0, current_entry); - ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + + if (!show_title) + ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + else + ui_browser__printf(browser, "%*s", 7, "Percent"); } if (ab->have_cycles) { if (dl->ipc) ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); - else + else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); + else + ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); + if (dl->cycles) ui_browser__printf(browser, "%*" PRIu64 " ", CYCLES_WIDTH - 1, dl->cycles); - else + else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); + else + ui_browser__printf(browser, "%*s ", CYCLES_WIDTH - 1, "Cycle"); } SLsmg_write_char(' '); From 8c1cedb4466809f9d741a4088314783cb88680a9 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Mon, 8 May 2017 20:07:30 +0900 Subject: [PATCH 03/23] perf config: Invert an if statement to reduce nesting in cmd_config() Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1494241650-32210-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 44 +++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 80668fa7556ef..75459668edb24 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -204,31 +204,33 @@ int cmd_config(int argc, const char **argv) } break; default: - if (argc) { - for (i = 0; argv[i]; i++) { - char *var, *value; - char *arg = strdup(argv[i]); - - if (!arg) { - pr_err("%s: strdup failed\n", __func__); - ret = -1; - break; - } + if (!argc) { + usage_with_options(config_usage, config_options); + break; + } - if (parse_config_arg(arg, &var, &value) < 0) { - free(arg); - ret = -1; - break; - } + for (i = 0; argv[i]; i++) { + char *var, *value; + char *arg = strdup(argv[i]); - if (value == NULL) - ret = show_spec_config(set, var); - else - ret = set_config(set, config_filename, var, value); + if (!arg) { + pr_err("%s: strdup failed\n", __func__); + ret = -1; + break; + } + + if (parse_config_arg(arg, &var, &value) < 0) { free(arg); + ret = -1; + break; } - } else - usage_with_options(config_usage, config_options); + + if (value == NULL) + ret = show_spec_config(set, var); + else + ret = set_config(set, config_filename, var, value); + free(arg); + } } perf_config_set__delete(set); From 36ce565114b4e7e3b83f40309675f6b1720957e4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 2 Jun 2017 08:48:10 -0700 Subject: [PATCH 04/23] perf script: Allow adding and removing fields With 'perf script' it is common that we just want to add or remove a field. Currently this requires figuring out the long list of default fields and specifying them first, and then adding/removing the new field. This patch adds a new + - syntax to merely add or remove fields, that allows more succint and clearer command lines For example to remove the comm field from PMU samples: Previously $ perf script -F tid,cpu,time,event,sym,ip,dso,period | head -1 swapper 0 [000] 504345.383126: 1 cycles: ffffffff90060c66 native_write_msr ([kernel.kallsyms]) with the new syntax perf script -F -comm | head -1 0 [000] 504345.383126: 1 cycles: ffffffff90060c66 native_write_msr ([kernel.kallsyms]) The new syntax cannot be mixed with normal overriding. v2: Fix example in description. Use tid vs pid. No functional changes. v3: Don't skip initialization when user specified explicit type. v4: Rebase. Remove empty line. Committer testing: # perf record -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.748 MB perf.data (14 samples) ] Without a explicit field list specified via -F, defaults to: # perf script | head -2 perf 6338 [000] 18467.058607: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) swapper 0 [001] 18467.058617: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) # Which is equivalent to: # perf script -F comm,tid,cpu,time,period,event,ip,sym,dso | head -2 perf 6338 [000] 18467.058607: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) swapper 0 [001] 18467.058617: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) # So if we want to remove the comm, as in your original example, we would have to figure out the default field list and remove ' comm' from it: # perf script -F tid,cpu,time,period,event,ip,sym,dso | head -2 6338 [000] 18467.058607: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) 0 [001] 18467.058617: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) # With your patch this becomes simpler, one can remove fields by prefixing them with '-': # perf script -F -comm | head -2 6338 [000] 18467.058607: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) 0 [001] 18467.058617: 1 cycles: ffffffff89060c36 native_write_msr (/lib/modules/4.11.0-rc8+/build/vmlinux) # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Milian Wolff Link: http://lkml.kernel.org/r/20170602154810.15875-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 8 +++++ tools/perf/builtin-script.c | 37 ++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 3517e204a2b30..3eca8c0d3c7b9 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -130,6 +130,14 @@ OPTIONS i.e., the specified fields apply to all event types if the type string is not given. + In addition to overriding fields, it is also possible to add or remove + fields from the defaults. For example + + -F -cpu,+insn + + removes the cpu field and adds the insn field. Adding/removing fields + cannot be mixed with normal overriding. + The arguments are processed in the order received. A later usage can reset a prior request. e.g.: diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4761b0d7fcb5b..afa84debc5c4d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1727,6 +1727,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused, int rc = 0; char *str = strdup(arg); int type = -1; + enum { DEFAULT, SET, ADD, REMOVE } change = DEFAULT; if (!str) return -ENOMEM; @@ -1772,6 +1773,10 @@ static int parse_output_fields(const struct option *opt __maybe_unused, goto out; } + /* Don't override defaults for +- */ + if (strchr(str, '+') || strchr(str, '-')) + goto parse; + if (output_set_by_user()) pr_warning("Overriding previous field request for all events.\n"); @@ -1782,13 +1787,30 @@ static int parse_output_fields(const struct option *opt __maybe_unused, } } +parse: for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) { + if (*tok == '+') { + if (change == SET) + goto out_badmix; + change = ADD; + tok++; + } else if (*tok == '-') { + if (change == SET) + goto out_badmix; + change = REMOVE; + tok++; + } else { + if (change != SET && change != DEFAULT) + goto out_badmix; + change = SET; + } + for (i = 0; i < imax; ++i) { if (strcmp(tok, all_output_options[i].str) == 0) break; } if (i == imax && strcmp(tok, "flags") == 0) { - print_flags = true; + print_flags = change == REMOVE ? false : true; continue; } if (i == imax) { @@ -1805,8 +1827,12 @@ static int parse_output_fields(const struct option *opt __maybe_unused, if (output[j].invalid_fields & all_output_options[i].field) { pr_warning("\'%s\' not valid for %s events. Ignoring.\n", all_output_options[i].str, event_type(j)); - } else - output[j].fields |= all_output_options[i].field; + } else { + if (change == REMOVE) + output[j].fields &= ~all_output_options[i].field; + else + output[j].fields |= all_output_options[i].field; + } } } else { if (output[type].invalid_fields & all_output_options[i].field) { @@ -1826,7 +1852,11 @@ static int parse_output_fields(const struct option *opt __maybe_unused, "Events will not be displayed.\n", event_type(type)); } } + goto out; +out_badmix: + fprintf(stderr, "Cannot mix +-field with overridden fields\n"); + rc = -EINVAL; out: free(str); return rc; @@ -2444,6 +2474,7 @@ int cmd_script(int argc, const char **argv) symbol__config_symfs), OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " + "+field to add and -field to remove." "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," From 6c3466435b03fb84647f5ad413f98f2ccb12b5c2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 11:39:15 -0300 Subject: [PATCH 05/23] tools: Adopt __noreturn from kernel sources To have a more compact way to specify that a function doesn't return, instead of the open coded: __attribute__((noreturn)) And use it instead of the tools/perf/ specific variation, NORETURN. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-l0y144qzixcy5t4c6i7pdiqj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 2 ++ tools/perf/util/scripting-engines/trace-event-python.c | 3 ++- tools/perf/util/usage.c | 6 +++--- tools/perf/util/util.h | 10 ++++------ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 825d44f89a290..a3deb74cb0704 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -19,3 +19,5 @@ /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) + +#define __noreturn __attribute__((noreturn)) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 40de3cb40d210..57b7a00e6f167 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "../../perf.h" @@ -84,7 +85,7 @@ struct tables { static struct tables tables_global; -static void handler_call_die(const char *handler_name) NORETURN; +static void handler_call_die(const char *handler_name) __noreturn; static void handler_call_die(const char *handler_name) { PyErr_Print(); diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 996046a66fe51..aacb65e079aa8 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -16,13 +16,13 @@ static void report(const char *prefix, const char *err, va_list params) fprintf(stderr, " %s%s\n", prefix, msg); } -static NORETURN void usage_builtin(const char *err) +static __noreturn void usage_builtin(const char *err) { fprintf(stderr, "\n Usage: %s\n", err); exit(129); } -static NORETURN void die_builtin(const char *err, va_list params) +static __noreturn void die_builtin(const char *err, va_list params) { report(" Fatal: ", err, params); exit(128); @@ -40,7 +40,7 @@ static void warn_builtin(const char *warn, va_list params) /* If we are in a dlopen()ed .so write to a global variable would segfault * (ugh), so keep things static. */ -static void (*usage_routine)(const char *err) NORETURN = usage_builtin; +static void (*usage_routine)(const char *err) __noreturn = usage_builtin; static void (*error_routine)(const char *err, va_list params) = error_builtin; static void (*warn_routine)(const char *err, va_list params) = warn_builtin; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 5dfb9bb6482d3..024b108dbbf60 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,20 +11,18 @@ #include #include #include +#include #include -#ifdef __GNUC__ -#define NORETURN __attribute__((__noreturn__)) -#else -#define NORETURN +#ifndef __GNUC__ #ifndef __attribute__ #define __attribute__(x) #endif #endif /* General helper functions */ -void usage(const char *err) NORETURN; -void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); +void usage(const char *err) __noreturn; +void die(const char *err, ...) __noreturn __attribute__((format (printf, 1, 2))); int error(const char *err, ...) __attribute__((format (printf, 1, 2))); void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); From afaed6d3e4aa56e939b496aafa5c97852e223122 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 11:57:54 -0300 Subject: [PATCH 06/23] tools: Adopt __printf from kernel sources To have a more compact way to ask the compiler to perform printf like vargargs validation. v2: Fixed up build on arm, squashing a patch by Kim Phillips, thanks! Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Kim Phillips Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dopkqmmuqs04cxzql0024nnu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 2 ++ tools/perf/arch/arm/util/cs-etm.c | 4 ++-- tools/perf/util/cache.h | 3 ++- tools/perf/util/debug.h | 11 ++++++----- tools/perf/util/intel-pt-decoder/intel-pt-log.h | 4 ++-- tools/perf/util/probe-event.h | 4 ++-- tools/perf/util/strbuf.h | 4 ++-- tools/perf/util/util.h | 12 +++--------- 8 files changed, 21 insertions(+), 23 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index a3deb74cb0704..f531b258ff945 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -21,3 +21,5 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #define __noreturn __attribute__((noreturn)) + +#define __printf(a, b) __attribute__((format(printf, a, b))) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 29361d9b635a6..02a649bfec3c6 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -583,8 +584,7 @@ static FILE *cs_device__open_file(const char *name) } -static __attribute__((format(printf, 2, 3))) -int cs_device__print_file(const char *name, const char *fmt, ...) +static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...) { va_list args; FILE *file; diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 0328f297a7483..0175765c05b92 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -5,6 +5,7 @@ #include #include "../ui/ui.h" +#include #include #define CMD_EXEC_PATH "--exec-path" @@ -24,6 +25,6 @@ static inline int is_absolute_path(const char *path) return path[0] == '/'; } -char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +char *mkpath(const char *fmt, ...) __printf(1, 2); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8a23ea1a71c7c..c818bdb1c1aba 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -4,6 +4,7 @@ #include #include +#include #include "event.h" #include "../ui/helpline.h" #include "../ui/progress.h" @@ -40,16 +41,16 @@ extern int debug_data_convert; #define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */ -int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int dump_printf(const char *fmt, ...) __printf(1, 2); void trace_event(union perf_event *event); -int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); -int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); +int ui__error(const char *format, ...) __printf(1, 2); +int ui__warning(const char *format, ...) __printf(1, 2); void pr_stat(const char *fmt, ...); -int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); -int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4); +int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5); int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index debe751dc3d68..45b64f93f3588 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -16,6 +16,7 @@ #ifndef INCLUDE__INTEL_PT_LOG_H__ #define INCLUDE__INTEL_PT_LOG_H__ +#include #include #include @@ -34,8 +35,7 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip); -__attribute__((format(printf, 1, 2))) -void __intel_pt_log(const char *fmt, ...); +void __intel_pt_log(const char *fmt, ...) __printf(1, 2); #define intel_pt_log(fmt, ...) \ do { \ diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 373842656fb64..5812947418dd7 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -1,6 +1,7 @@ #ifndef _PROBE_EVENT_H #define _PROBE_EVENT_H +#include #include #include "intlist.h" @@ -171,8 +172,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct symbol *sym); /* If there is no space to write, returns -E2BIG. */ -int e_snprintf(char *str, size_t size, const char *format, ...) - __attribute__((format(printf, 3, 4))); +int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 318424ea561d1..802d743378afa 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -42,6 +42,7 @@ #include #include #include +#include #include extern char strbuf_slopbuf[]; @@ -85,8 +86,7 @@ static inline int strbuf_addstr(struct strbuf *sb, const char *s) { return strbuf_add(sb, s, strlen(s)); } -__attribute__((format(printf,2,3))) -int strbuf_addf(struct strbuf *sb, const char *fmt, ...); +int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3); /* XXX: if read fails, any partial read is undone */ ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 024b108dbbf60..21c6db173bcc4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -14,17 +14,11 @@ #include #include -#ifndef __GNUC__ -#ifndef __attribute__ -#define __attribute__(x) -#endif -#endif - /* General helper functions */ void usage(const char *err) __noreturn; -void die(const char *err, ...) __noreturn __attribute__((format (printf, 1, 2))); -int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +void die(const char *err, ...) __noreturn __printf(1, 2); +int error(const char *err, ...) __printf(1, 2); +void warning(const char *err, ...) __printf(1, 2); void set_warning_routine(void (*routine)(const char *err, va_list params)); From 3ee350fb8a2b30fe47dd9e3b299dff0178fc8c88 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 11:57:54 -0300 Subject: [PATCH 07/23] tools: Adopt __scanf from kernel sources To have a more compact way to ask the compiler to perform scanf like argument validation. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yzqrhfjrn26lqqtwf55egg0h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 1 + tools/perf/util/pmu.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index f531b258ff945..2846094aad4d0 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -23,3 +23,4 @@ #define __noreturn __attribute__((noreturn)) #define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ea7f450dc6092..389e9729331f4 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -2,6 +2,7 @@ #define __PMU_H #include +#include #include #include #include "evsel.h" @@ -83,8 +84,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag); bool pmu_have_event(const char *pname, const char *name); -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, - ...) __attribute__((format(scanf, 3, 4))); +int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); int perf_pmu__test(void); From 0353631aa73e5e468fae1cd699bf860b59ba100d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 12:18:27 -0300 Subject: [PATCH 08/23] perf tools: Use __maybe_unused consistently Instead of defining __unused or redefining __maybe_unused. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-4eleto5pih31jw1q4dypm9pf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 +- tools/perf/jvmti/jvmti_agent.h | 2 -- tools/perf/jvmti/libjvmti.c | 5 +++-- tools/perf/pmu-events/jevents.c | 4 ---- tools/perf/util/evsel.c | 3 ++- tools/perf/util/header.c | 3 ++- 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 27de0c8c5c19a..469d65b211228 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -700,7 +700,7 @@ static inline uint32_t lfsr_32(uint32_t lfsr) * kernel (KSM, zero page, etc.) cannot optimize away RAM * accesses: */ -static inline u64 access_data(u64 *data __attribute__((unused)), u64 val) +static inline u64 access_data(u64 *data, u64 val) { if (g->p.data_reads) val += *data; diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h index bedf5d0ba9ff9..c53a41f48b634 100644 --- a/tools/perf/jvmti/jvmti_agent.h +++ b/tools/perf/jvmti/jvmti_agent.h @@ -5,8 +5,6 @@ #include #include -#define __unused __attribute__((unused)) - #if defined(__cplusplus) extern "C" { #endif diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index 5612641c69b40..6d710904c8379 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -238,7 +239,7 @@ code_generated_cb(jvmtiEnv *jvmti, } JNIEXPORT jint JNICALL -Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) +Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __maybe_unused) { jvmtiEventCallbacks cb; jvmtiCapabilities caps1; @@ -313,7 +314,7 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) } JNIEXPORT void JNICALL -Agent_OnUnload(JavaVM *jvm __unused) +Agent_OnUnload(JavaVM *jvm __maybe_unused) { int ret; diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index baa073f383347..bd0aabb2bd0fa 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -48,10 +48,6 @@ #include "json.h" #include "jevents.h" -#ifndef __maybe_unused -#define __maybe_unused __attribute__((unused)) -#endif - int verbose; char *prog; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cda44b0e821c6..7f78f27f53824 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1441,7 +1442,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, } static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __attribute__((unused))) + void *priv __maybe_unused) { return fprintf(fp, " %-32s %s\n", name, val); } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b5baff3007bbd..76ed7d03e500f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -1274,7 +1275,7 @@ read_event_desc(struct perf_header *ph, int fd) } static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __attribute__((unused))) + void *priv __maybe_unused) { return fprintf(fp, ", %s = %s", name, val); } From 9dd4ca470e03334f95cc96529ba090921aac8eab Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 11:39:15 -0300 Subject: [PATCH 09/23] tools: Adopt noinline from kernel sources To have a more compact way to ask the compiler not to inline a function and to make tools/ source code look like kernel code. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-bis4pqxegt6gbm5dlqs937tn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 2 ++ tools/include/linux/compiler.h | 4 ++++ tools/perf/tests/bp_signal.c | 3 +-- tools/perf/tests/bp_signal_overflow.c | 3 +-- tools/perf/tests/dwarf-unwind.c | 15 +++++---------- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 2846094aad4d0..c13e6f7d5a2a3 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -20,6 +20,8 @@ /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define noinline __attribute__((noinline)) + #define __noreturn __attribute__((noreturn)) #define __printf(a, b) __attribute__((format(printf, a, b))) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 23299d7e71602..8b129e314c7e1 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -17,6 +17,10 @@ # define __always_inline inline __attribute__((always_inline)) #endif +#ifndef noinline +#define noinline +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 8ba2c4618fe90..39bbb97cd30aa 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -62,8 +62,7 @@ static void __test_function(volatile long *ptr) } #endif -__attribute__ ((noinline)) -static int test_function(void) +static noinline int test_function(void) { __test_function(&the_var); the_var++; diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c index 89f92fa67cc4c..3b1ac6f31b154 100644 --- a/tools/perf/tests/bp_signal_overflow.c +++ b/tools/perf/tests/bp_signal_overflow.c @@ -28,8 +28,7 @@ static int overflows; -__attribute__ ((noinline)) -static int test_function(void) +static noinline int test_function(void) { return time(NULL); } diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index dfe5c89e2049f..3e56d08f79956 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -76,8 +76,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -__attribute__ ((noinline)) -static int unwind_thread(struct thread *thread) +static noinline int unwind_thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -108,8 +107,7 @@ static int unwind_thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -__attribute__ ((noinline)) -static int compare(void *p1, void *p2) +static noinline int compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -128,8 +126,7 @@ static int compare(void *p1, void *p2) return p1 - p2; } -__attribute__ ((noinline)) -static int krava_3(struct thread *thread) +static noinline int krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -147,14 +144,12 @@ static int krava_3(struct thread *thread) return global_unwind_retval; } -__attribute__ ((noinline)) -static int krava_2(struct thread *thread) +static noinline int krava_2(struct thread *thread) { return krava_3(thread); } -__attribute__ ((noinline)) -static int krava_1(struct thread *thread) +static noinline int krava_1(struct thread *thread) { return krava_2(thread); } From c9f5da742fa3dfebc49d03deb312522e5db643ed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 11:39:15 -0300 Subject: [PATCH 10/23] tools: Adopt __packed from kernel sources To have a more compact way to ask the compiler to not insert alignment paddings in a struct, making tools/ look more like kernel source code. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-byp46nr7hsxvvyc9oupfb40q@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 2 ++ tools/perf/util/genelf_debug.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index c13e6f7d5a2a3..0f57a48272ab3 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -22,6 +22,8 @@ #define noinline __attribute__((noinline)) +#define __packed __attribute__((packed)) + #define __noreturn __attribute__((noreturn)) #define __printf(a, b) __attribute__((format(printf, a, b))) diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c index 5980f7d256b17..40789d8603d00 100644 --- a/tools/perf/util/genelf_debug.c +++ b/tools/perf/util/genelf_debug.c @@ -11,6 +11,7 @@ * @remark Copyright 2007 OProfile authors * @author Philippe Elie */ +#include #include #include #include @@ -125,7 +126,7 @@ struct debug_line_header { * and filesize, last entry is followed by en empty string. */ /* follow the first program statement */ -} __attribute__((packed)); +} __packed; /* DWARF 2 spec talk only about one possible compilation unit header while * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not @@ -138,7 +139,7 @@ struct compilation_unit_header { uhalf version; uword debug_abbrev_offset; ubyte pointer_size; -} __attribute__((packed)); +} __packed; #define DW_LNS_num_opcode (DW_LNS_set_isa + 1) From 5c97cac63ac24c78c8126958a453774e49e706dd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Jun 2017 11:39:15 -0300 Subject: [PATCH 11/23] tools: Adopt __aligned from kernel sources To have a more compact way to ask the compiler to use a specific alignment, making tools/ look more like kernel source code. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-8jiem6ubg9rlpbs7c2p900no@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 1 + tools/perf/util/evlist.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 0f57a48272ab3..bd39b2090ad1b 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -26,5 +26,6 @@ #define __noreturn __attribute__((noreturn)) +#define __aligned(x) __attribute__((aligned(x))) #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 94cea4398a13a..8d601fbdd8d64 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,6 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 +#include #include #include #include @@ -34,7 +35,7 @@ struct perf_mmap { refcount_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; static inline size_t From 0c788d4726c916544490c17cdbfd1ae2a6347fa8 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 15 Jun 2017 12:55:21 -0500 Subject: [PATCH 12/23] perf coresight: Remove superfluous check before use The cs_etm_evsel variable is guaranteed to be set at this point in cs_etm_recording_options(). Signed-off-by: Kim Phillips Acked-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20170615125521.80cc128dc856bc1f2e61b730@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 02a649bfec3c6..7ce3d1a251337 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -203,19 +203,18 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); - if (cs_etm_evsel) { - /* - * To obtain the auxtrace buffer file descriptor, the auxtrace - * event must come first. - */ - perf_evlist__to_front(evlist, cs_etm_evsel); - /* - * In the case of per-cpu mmaps, we need the CPU on the - * AUX event. - */ - if (!cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(cs_etm_evsel, CPU); - } + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, cs_etm_evsel); + + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(cs_etm_evsel, CPU); /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { From d3cef7fe5151eabcd97ad8f9e595ec55f6ffb318 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 16 Jun 2017 11:23:39 -0500 Subject: [PATCH 13/23] perf intel-pt/bts: Remove unused SAMPLE_SIZE defines and bts priv array These defines were probably dragged in from sampling support in earlier patches. They can be put back when needed. Signed-off-by: Kim Phillips Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170616112339.3fb6986e4ff33e353008244b@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-bts.c | 4 ---- tools/perf/arch/x86/util/intel-pt.c | 4 ---- tools/perf/util/intel-bts.c | 2 -- 3 files changed, 10 deletions(-) diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index af2bce7a2cd60..781df40b29660 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -35,10 +35,6 @@ #define KiB_MASK(x) (KiB(x) - 1) #define MiB_MASK(x) (MiB(x) - 1) -#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) - -#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) - struct intel_bts_snapshot_ref { void *ref_buf; size_t ref_offset; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f630de0206a17..6fe667b3269ee 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -40,10 +40,6 @@ #define KiB_MASK(x) (KiB(x) - 1) #define MiB_MASK(x) (MiB(x) - 1) -#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) - -#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) - #define INTEL_PT_PSB_PERIOD_NEAR 256 struct intel_pt_snapshot_ref { diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index b2834ac7b1f55..218ee2bac9a5c 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -866,8 +866,6 @@ static void intel_bts_print_info(u64 *arr, int start, int finish) fprintf(stdout, intel_bts_info_fmts[i], arr[i]); } -u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; - int intel_bts_process_auxtrace_info(union perf_event *event, struct perf_session *session) { From dcaa394807ac219d8597d25bad3fe1bc6c86123b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 19 Jun 2017 10:55:56 +0800 Subject: [PATCH 14/23] perf annotate: Return arch from symbol__disassemble() and save it in browser In annotate browser, we will add support to check fused instructions. While this is x86-specific feature so we need the annotate browser to know what the arch it runs on. symbol__disassemble() has figured out the arch. This patch just lets the arch return from symbol__disassemble and save the arch in annotate browser. Signed-off-by: Yao Jin Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1497840958-4759-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 6 +++++- tools/perf/ui/gtk/annotate.c | 3 ++- tools/perf/util/annotate.c | 10 ++++++++-- tools/perf/util/annotate.h | 4 +++- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 10b6362ca0bf7..2bcfa46913c84 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, NULL, 0); + err = symbol__disassemble(sym, map, NULL, 0, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 7a03389b7a03b..27f41f28dcb49 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -46,12 +46,15 @@ static struct annotate_browser_opt { .jump_arrows = true, }; +struct arch; + struct annotate_browser { struct ui_browser b; struct rb_root entries; struct rb_node *curr_hot; struct disasm_line *selection; struct disasm_line **offsets; + struct arch *arch; int nr_events; u64 start; int nr_asm_entries; @@ -1070,7 +1073,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl); + err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + sizeof_bdl, &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index e99ba86158d29..d903fd493416b 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -168,7 +168,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0); + err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + 0, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ddbd56df91878..be1caabb92906 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1379,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize) +int symbol__disassemble(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1405,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na if (arch == NULL) return -ENOTSUP; + if (parch) + *parch = arch; + if (arch->init) { err = arch->init(arch); if (err) { @@ -1901,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0) + if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + 0, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 948aa8e6fd394..21055034aedd6 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize); +int symbol__disassemble(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, From 9b57fb7e35957c6838f89f4ed7e3f8433a4bbfc5 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 21 Jun 2017 02:32:03 +0800 Subject: [PATCH 15/23] perf test llvm: Avoid error when PROFILE_ALL_BRANCHES is set The 'if' keyword is a define that expands to complex code when CONFIG_PROFILE_ALL_BRANCHES is selected, which causes a 'perf test LLVM' failure like: $ ./perf test LLVM 35: LLVM search and compile : 35.1: Basic BPF llvm compile : Ok 35.2: kbuild searching : Ok 35.3: Compile source for BPF prologue generation: FAILED! 35.4: Compile source for BPF relocation : Skip The only affected test case is bpf-script-test-prologue.c because it uses kernel headers and has 'if' inside. This patch undefines 'if' to make it passes perf test. More detailed analysis from a message in this thread, also by Wang: The problem is caused by following relocation information: $ readelf -a ./llvmsubtest3 ... [ 5] _ftrace_branch PROGBITS 0000000000000000 00000260 00000000000000a0 0000000000000000 WA 0 0 4 ... Relocation section '.relfunc=null_lseek file->f_mode offset orig' at offset 0x490 contains 4 entries: Offset Info Type Sym. Value Sym. Name 000000000038 000b00000001 unrecognized: 1 0000000000000000 _ftrace_branch 0000000000b0 000b00000001 unrecognized: 1 0000000000000000 _ftrace_branch 000000000128 000b00000001 unrecognized: 1 0000000000000000 _ftrace_branch 0000000001c0 000b00000001 unrecognized: 1 0000000000000000 _ftrace_branch Relocation section '.rel_ftrace_branch' at offset 0x4d0 contains 8 entries: Offset Info Type Sym. Value Sym. Name 000000000000 000200000001 unrecognized: 1 0000000000000000 .L__func__.bpf_func__n 000000000008 000100000001 unrecognized: 1 0000000000000015 .L.str 000000000028 000200000001 unrecognized: 1 0000000000000000 .L__func__.bpf_func__n 000000000030 000100000001 unrecognized: 1 0000000000000015 .L.str 000000000050 000200000001 unrecognized: 1 0000000000000000 .L__func__.bpf_func__n 000000000058 000100000001 unrecognized: 1 0000000000000015 .L.str 000000000078 000200000001 unrecognized: 1 0000000000000000 .L__func__.bpf_func__n 000000000080 000100000001 unrecognized: 1 0000000000000015 .L.str ... So I think the failure is because you enabled CONFIG_PROFILE_ALL_BRANCHES. I can reproduce your buggy result by selecting CONFIG_PROFILE_ALL_BRANCHES in my kbuild: $ ./perf test LLVM 35: LLVM search and compile : 35.1: Basic BPF llvm compile : Ok 35.2: kbuild searching : Ok 35.3: Compile source for BPF prologue generation: FAILED! 35.4: Compile source for BPF relocation : Skip Simply undef CONFIG_PROFILE_ALL_BRANCHES in clang opts not working because it is introduced by "#include ", which override cmdline options. So I think the best way is to undefine 'if' inside BPF script. Reported-and-Tested-by: Thomas-Mich Richter Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Hendrik Brueckner Cc: Zefan Li Link: http://lkml.kernel.org/r/20170620183203.2517-1-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-test-prologue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index 7230e62c70fcf..b4ebc75e25aef 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -10,6 +10,15 @@ #include +/* + * If CONFIG_PROFILE_ALL_BRANCHES is selected, + * 'if' is redefined after include kernel header. + * Recover 'if' for BPF object code. + */ +#ifdef if +# undef if +#endif + #define FMODE_READ 0x1 #define FMODE_WRITE 0x2 From 55b9b50811ca459e4688543b688b7b2b85ec5ea8 Mon Sep 17 00:00:00 2001 From: Mark Santaniello Date: Mon, 19 Jun 2017 09:38:24 -0700 Subject: [PATCH 16/23] perf script: Support -F brstack,dso and brstacksym,dso Perf script can report the dso for "addr" and "ip" fields. This adds the same support for the "brstack" and "brstacksym" fields. This can be helpful for AutoFDO: we can ignore LBR entries unless the source and target address are both in the target module we are about to build. I built a small test akin to "while(1) { do_nothing(); }" where the do_nothing function is loaded from a dso: $ cat burncpu.cpp #include int main() { void* handle = dlopen("./dso.so", RTLD_LAZY); if (!handle) return -1; typedef void (*fp)(); fp do_nothing = (fp) dlsym(handle, "do_nothing"); while(1) { do_nothing(); } } $ cat dso.cpp extern "C" void do_nothing() {} $ cat build.sh #!/bin/bash g++ -shared dso.cpp -o dso.so g++ burncpu.cpp -o burncpu -ldl I sampled the execution with perf record -b. Using the new perf script functionality I can easily find cases where there was a transition from one dso to another: $ perf record -a -b -- sleep 5 [ perf record: Woken up 55 times to write data ] [ perf record: Captured and wrote 18.815 MB perf.data (43593 samples) ] $ perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 $ perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0 Signed-off-by: Mark Santaniello Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170619163825.2012979-1-marksan@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 61 +++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index afa84debc5c4d..3c21089f52736 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -298,10 +298,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected.\n"); return -EINVAL; } - if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { - pr_err("Display of DSO requested but neither sample IP nor " - "sample address\nis selected. Hence, no addresses to convert " - "to DSO.\n"); + if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) { + pr_err("Display of DSO requested but none of sample IP, sample address, " + "brstack\nor brstacksym are selected. Hence, no addresses to " + "convert to DSO.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -514,18 +515,43 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(struct perf_sample *sample) +static void print_sample_brstack(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; - u64 i; + struct addr_location alf, alt; + u64 i, from, to; if (!(br && br->nr)) return; for (i = 0; i < br->nr; i++) { - printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ", - br->entries[i].from, - br->entries[i].to, + from = br->entries[i].from; + to = br->entries[i].to; + + if (PRINT_FIELD(DSO)) { + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + } + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + + printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', br->entries[i].flags.abort? 'A' : '-', @@ -534,7 +560,8 @@ static void print_sample_brstack(struct perf_sample *sample) } static void print_sample_brstacksym(struct perf_sample *sample, - struct thread *thread) + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; @@ -559,8 +586,18 @@ static void print_sample_brstacksym(struct perf_sample *sample, alt.sym = map__find_symbol(alt.map, alt.addr); symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } putchar('/'); symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', @@ -1187,9 +1224,9 @@ static void process_event(struct perf_script *script, print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(sample); + print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(sample, thread); + print_sample_brstacksym(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); From 106dacd86f042968e0bb974490fcb9cd017cd03a Mon Sep 17 00:00:00 2001 From: Mark Santaniello Date: Mon, 19 Jun 2017 09:38:25 -0700 Subject: [PATCH 17/23] perf script: Support -F brstackoff,dso The idea here is to make AutoFDO easier in cloud environment with ASLR. It's easiest to show how this is useful by example. I built a small test akin to "while(1) { do_nothing(); }" where the do_nothing function is loaded from a dso: $ cat burncpu.cpp #include int main() { void* handle = dlopen("./dso.so", RTLD_LAZY); if (!handle) return -1; typedef void (*fp)(); fp do_nothing = (fp) dlsym(handle, "do_nothing"); while(1) { do_nothing(); } } $ cat dso.cpp extern "C" void do_nothing() {} $ cat build.sh #!/bin/bash g++ -shared dso.cpp -o dso.so g++ burncpu.cpp -o burncpu -ldl I sampled the execution of this program with perf record -b. Using the existing "brstack,dso", we get absolute addresses that are affected by ASLR, and could be different on different hosts. The address does not uniquely identify a branch/target in the binary: $ perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 Using the existing "brstacksym,dso" is a little better, because the symbol plus offset and dso name *does* uniquely identify a branch/target in the binary. Ultimately, however, AutoFDO wants a simple offset into the binary, so we'd have to undo all the work perf did to symbolize in the first place: $ perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0 With the new "brstackoff,dso" we get what we need: a simple offset into a specific dso/binary that uniquely identifies a branch/target: $ perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 Signed-off-by: Mark Santaniello Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170619163825.2012979-2-marksan@fb.com [ Updated documentation about 'brstackoff' using text from above ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 4 +- tools/perf/builtin-script.c | 56 ++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 3eca8c0d3c7b9..e2468ed6a307d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -211,6 +211,8 @@ OPTIONS is printed. This is the full execution path leading to the sample. This is only supported when the sample was recorded with perf record -b or -j any. + The brstackoff field will print an offset into a specific dso/binary. + -k:: --vmlinux=:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3c21089f52736..db5261c3f7199 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -85,6 +85,7 @@ enum perf_output_field { PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, PERF_OUTPUT_BRSTACKINSN = 1U << 23, + PERF_OUTPUT_BRSTACKOFF = 1U << 24, }; struct output_option { @@ -115,6 +116,7 @@ struct output_option { {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, + {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, }; /* default set to maintain compatibility with current format */ @@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return -EINVAL; } if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && - !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) { - pr_err("Display of DSO requested but none of sample IP, sample address, " - "brstack\nor brstacksym are selected. Hence, no addresses to " - "convert to DSO.\n"); + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) { + pr_err("Display of DSO requested but no address to convert. Select\n" + "sample IP, sample address, brstack, brstacksym, or brstackoff.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +static void print_sample_brstackoff(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) +{ + struct branch_stack *br = sample->branch_stack; + struct addr_location alf, alt; + u64 i, from, to; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + from = br->entries[i].from; + to = br->entries[i].to; + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + if (alf.map && !alf.map->dso->adjust_symbols) + from = map__map_ip(alf.map, from); + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + if (alt.map && !alt.map->dso->adjust_symbols) + to = map__map_ip(alt.map, to); + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + printf("/%c/%c/%c/%d ", + mispred_str(br->entries + i), + br->entries[i].flags.in_tx ? 'X' : '-', + br->entries[i].flags.abort ? 'A' : '-', + br->entries[i].flags.cycles); + } +} #define MAXBB 16384UL static int grab_bb(u8 *buffer, u64 start, u64 end, @@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script, print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) print_sample_brstacksym(sample, thread, attr); + else if (PRINT_FIELD(BRSTACKOFF)) + print_sample_brstackoff(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); From e7bd9ba20a9ec7024a0566a93c22b9571a48939a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 18 Jun 2017 23:22:59 +0900 Subject: [PATCH 18/23] perf ftrace: Show error message when fails to set ftrace files It'd be better for debugging to show an error message when it fails to setup ftrace for some reason. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170618142302.25390-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 9e0b35cd0eeae..966a94fa8200b 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -61,6 +61,7 @@ static int __write_tracing_file(const char *name, const char *val, bool append) int fd, ret = -1; ssize_t size = strlen(val); int flags = O_WRONLY; + char errbuf[512]; file = get_tracing_file(name); if (!file) { @@ -75,14 +76,16 @@ static int __write_tracing_file(const char *name, const char *val, bool append) fd = open(file, flags); if (fd < 0) { - pr_debug("cannot open tracing file: %s\n", name); + pr_debug("cannot open tracing file: %s: %s\n", + name, str_error_r(errno, errbuf, sizeof(errbuf))); goto out; } if (write(fd, val, size) == size) ret = 0; else - pr_debug("write '%s' to tracing/%s failed\n", val, name); + pr_debug("write '%s' to tracing/%s failed: %s\n", + val, name, str_error_r(errno, errbuf, sizeof(errbuf))); close(fd); out: From 29681bc5bb4326c2f9eac5dc68d8fad3e88b4bb5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 18 Jun 2017 23:23:00 +0900 Subject: [PATCH 19/23] perf ftrace: Move setup_pager before opening trace_pipe The 'perf ftrace' command fails to reset tracer after finishing recording like below: $ sudo perf ftrace -v hello write 'nop' to tracing/current_tracer failed: Device or resource busy ... This is because the trace_pipe file is open in pager process. Move the pager setup to before opening the file. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Fixes: 583359646fde ("perf ftrace: Use pager for displaying result") Link: http://lkml.kernel.org/r/20170618142302.25390-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 966a94fa8200b..982b98ee639ed 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -231,6 +231,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + setup_pager(); + trace_file = get_tracing_file("trace_pipe"); if (!trace_file) { pr_err("failed to open trace_pipe\n"); @@ -254,8 +256,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_close_fd; } - setup_pager(); - perf_evlist__start_workload(ftrace->evlist); while (!done) { From 78b83e8b12b4467540ca501c7c019e9d46051957 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 18 Jun 2017 23:23:01 +0900 Subject: [PATCH 20/23] perf ftrace: Add option for function filtering The -T/--trace-funcs and -N/--notrace-funcs options are to specify functions to enable/disable tracing dynamically. The -G/--graph-funcs and -g/--nograph-funcs options are to set filters for function graph tracer. For example, to trace fault handling functions only: $ sudo perf ftrace -T *fault hello 0) | __do_page_fault() { 0) | handle_mm_fault() { 0) 2.117 us | __handle_mm_fault(); 0) 3.627 us | } 0) 7.811 us | } 0) | __do_page_fault() { 0) | handle_mm_fault() { 0) 2.014 us | __handle_mm_fault(); 0) 2.424 us | } 0) 2.951 us | } ... To trace all functions executed in __do_page_fault: $ sudo perf ftrace -G __do_page_fault hello 2) | __do_page_fault() { 3) 0.060 us | down_read_trylock(); 3) | find_vma() { 3) 0.075 us | vmacache_find(); 3) 0.053 us | vmacache_update(); 3) 1.246 us | } 3) | handle_mm_fault() { 3) 0.063 us | __rcu_read_lock(); 3) 0.056 us | mem_cgroup_from_task(); 3) 0.057 us | __rcu_read_unlock(); 3) | __handle_mm_fault() { 3) | filemap_map_pages() { 3) 0.058 us | __rcu_read_lock(); 3) | alloc_set_pte() { ... But don't want to show details in handle_mm_fault: $ sudo perf ftrace -G __do_page_fault -g handle_mm_fault hello 3) | __do_page_fault() { 3) 0.049 us | down_read_trylock(); 3) | find_vma() { 3) 0.048 us | vmacache_find(); 3) 0.041 us | vmacache_update(); 3) 0.680 us | } 3) 0.036 us | up_read(); 3) 4.547 us | } /* __do_page_fault */ ... Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170618142302.25390-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 30 ++++++ tools/perf/builtin-ftrace.c | 117 +++++++++++++++++++++-- 2 files changed, 141 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 6e6a8b22c8594..78d6126ca4856 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -48,6 +48,36 @@ OPTIONS Ranges of CPUs are specified with -: 0-2. Default is to trace on all online CPUs. +-T:: +--trace-funcs=:: + Only trace functions given by the argument. Multiple functions + can be given by using this option more than once. The function + argument also can be a glob pattern. It will be passed to + 'set_ftrace_filter' in tracefs. + +-N:: +--notrace-funcs=:: + Do not trace functions given by the argument. Like -T option, + this can be used more than once to specify multiple functions + (or glob patterns). It will be passed to 'set_ftrace_notrace' + in tracefs. + +-G:: +--graph-funcs=:: + Set graph filter on the given function (or a glob pattern). + This is useful for the function_graph tracer only and enables + tracing for functions executed from the given function. + This can be used more than once to specify multiple functions. + It will be passed to 'set_graph_function' in tracefs. + +-g:: +--nograph-funcs=:: + Set graph notrace filter on the given function (or a glob pattern). + Like -G option, this is useful for the function_graph tracer only + and disables tracing for function executed from the given function. + This can be used more than once to specify multiple functions. + It will be passed to 'set_graph_notrace' in tracefs. + SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 982b98ee639ed..3285375ce3c23 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -28,9 +28,18 @@ #define DEFAULT_TRACER "function_graph" struct perf_ftrace { - struct perf_evlist *evlist; - struct target target; - const char *tracer; + struct perf_evlist *evlist; + struct target target; + const char *tracer; + struct list_head filters; + struct list_head notrace; + struct list_head graph_funcs; + struct list_head nograph_funcs; +}; + +struct filter_entry { + struct list_head list; + char name[]; }; static bool done; @@ -104,6 +113,7 @@ static int append_tracing_file(const char *name, const char *val) } static int reset_tracing_cpu(void); +static void reset_tracing_filters(void); static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { @@ -119,6 +129,7 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (reset_tracing_cpu() < 0) return -1; + reset_tracing_filters(); return 0; } @@ -184,6 +195,48 @@ static int reset_tracing_cpu(void) return ret; } +static int __set_tracing_filter(const char *filter_file, struct list_head *funcs) +{ + struct filter_entry *pos; + + list_for_each_entry(pos, funcs, list) { + if (append_tracing_file(filter_file, pos->name) < 0) + return -1; + } + + return 0; +} + +static int set_tracing_filters(struct perf_ftrace *ftrace) +{ + int ret; + + ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters); + if (ret < 0) + return ret; + + ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace); + if (ret < 0) + return ret; + + ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs); + if (ret < 0) + return ret; + + /* old kernels do not have this filter */ + __set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs); + + return ret; +} + +static void reset_tracing_filters(void) +{ + write_tracing_file("set_ftrace_filter", " "); + write_tracing_file("set_ftrace_notrace", " "); + write_tracing_file("set_graph_function", " "); + write_tracing_file("set_graph_notrace", " "); +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -226,6 +279,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_filters(ftrace) < 0) { + pr_err("failed to set tracing filters\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -310,6 +368,32 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) return -1; } +static int parse_filter_func(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct list_head *head = opt->value; + struct filter_entry *entry; + + entry = malloc(sizeof(*entry) + strlen(str) + 1); + if (entry == NULL) + return -ENOMEM; + + strcpy(entry->name, str); + list_add_tail(&entry->list, head); + + return 0; +} + +static void delete_filter_func(struct list_head *head) +{ + struct filter_entry *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, head, list) { + list_del(&pos->list); + free(pos); + } +} + int cmd_ftrace(int argc, const char **argv) { int ret; @@ -333,9 +417,22 @@ int cmd_ftrace(int argc, const char **argv) "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "list of cpus to monitor"), + OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", + "trace given functions only", parse_filter_func), + OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", + "do not trace given functions", parse_filter_func), + OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", + "Set graph filter on given functions", parse_filter_func), + OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", + "Set nograph filter on given functions", parse_filter_func), OPT_END() }; + INIT_LIST_HEAD(&ftrace.filters); + INIT_LIST_HEAD(&ftrace.notrace); + INIT_LIST_HEAD(&ftrace.graph_funcs); + INIT_LIST_HEAD(&ftrace.nograph_funcs); + ret = perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; @@ -351,12 +448,14 @@ int cmd_ftrace(int argc, const char **argv) target__strerror(&ftrace.target, ret, errbuf, 512); pr_err("%s\n", errbuf); - return -EINVAL; + goto out_delete_filters; } ftrace.evlist = perf_evlist__new(); - if (ftrace.evlist == NULL) - return -ENOMEM; + if (ftrace.evlist == NULL) { + ret = -ENOMEM; + goto out_delete_filters; + } ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target); if (ret < 0) @@ -367,5 +466,11 @@ int cmd_ftrace(int argc, const char **argv) out_delete_evlist: perf_evlist__delete(ftrace.evlist); +out_delete_filters: + delete_filter_func(&ftrace.filters); + delete_filter_func(&ftrace.notrace); + delete_filter_func(&ftrace.graph_funcs); + delete_filter_func(&ftrace.nograph_funcs); + return ret; } From 1096c35aa821cc4789a64232a0e210bb87a0e5e8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 18 Jun 2017 23:23:02 +0900 Subject: [PATCH 21/23] perf ftrace: Add -D option for depth filter The -D/--graph-depth option is to set max graph depth. The following example traces max 2-depth of page fault handler. $ sudo perf ftrace -G __do_page_fault -D 2 -- hello ... 0) | __do_page_fault() { 0) 0.063 us | down_read_trylock(); 0) 0.251 us | find_vma(); 0) 5.374 us | handle_mm_fault(); 0) 0.054 us | up_read(); 0) 7.463 us | } ... Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20170618142302.25390-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 3 +++ tools/perf/builtin-ftrace.c | 31 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 78d6126ca4856..721a447f046ea 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -78,6 +78,9 @@ OPTIONS This can be used more than once to specify multiple functions. It will be passed to 'set_graph_notrace' in tracefs. +-D:: +--graph-depth=:: + Set max depth for function graph tracer to follow SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 3285375ce3c23..dd26c62c9893d 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -35,6 +35,7 @@ struct perf_ftrace { struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; + int graph_depth; }; struct filter_entry { @@ -129,6 +130,9 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (reset_tracing_cpu() < 0) return -1; + if (write_tracing_file("max_graph_depth", "0") < 0) + return -1; + reset_tracing_filters(); return 0; } @@ -237,6 +241,26 @@ static void reset_tracing_filters(void) write_tracing_file("set_graph_notrace", " "); } +static int set_tracing_depth(struct perf_ftrace *ftrace) +{ + char buf[16]; + + if (ftrace->graph_depth == 0) + return 0; + + if (ftrace->graph_depth < 0) { + pr_err("invalid graph depth: %d\n", ftrace->graph_depth); + return -1; + } + + snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth); + + if (write_tracing_file("max_graph_depth", buf) < 0) + return -1; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -284,6 +308,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_depth(ftrace) < 0) { + pr_err("failed to set graph depth\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; @@ -425,6 +454,8 @@ int cmd_ftrace(int argc, const char **argv) "Set graph filter on given functions", parse_filter_func), OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", "Set nograph filter on given functions", parse_filter_func), + OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, + "Max depth for function graph tracer"), OPT_END() }; From 4f1fd74283582f3f5c34d1c9ed55117d775b4a20 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 17 Jun 2017 12:46:37 +0900 Subject: [PATCH 22/23] perf config: Check error cases of {show_spec, set}_config() show_spec_config() and set_config() can be called multiple times in the loop in cmd_config(). However, The error cases of them wasn't checked, so fix it. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1497671197-20450-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 75459668edb24..bb1be79bceda5 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -225,10 +225,23 @@ int cmd_config(int argc, const char **argv) break; } - if (value == NULL) + if (value == NULL) { ret = show_spec_config(set, var); - else + if (ret < 0) { + pr_err("%s is not configured: %s\n", + var, config_filename); + free(arg); + break; + } + } else { ret = set_config(set, config_filename, var, value); + if (ret < 0) { + pr_err("Failed to set '%s=%s' on %s\n", + var, value, config_filename); + free(arg); + break; + } + } free(arg); } } From dfe1c6d7efa8ead6878b73216d4c891a28207528 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 17 Jun 2017 12:46:42 +0900 Subject: [PATCH 23/23] perf config: Refactor the code using 'ret' variable in cmd_config() To simplify the code related to 'ret' variable in cmd_config(), initialize 'ret' with -1 instead of 0 and use goto to perform resource release at the end of the function, setting ret to zero just before the out_err label, as usual in the kernel sources. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1497671202-20495-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index bb1be79bceda5..ece45582a48d0 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -156,7 +156,7 @@ static int parse_config_arg(char *arg, char **var, char **value) int cmd_config(int argc, const char **argv) { - int i, ret = 0; + int i, ret = -1; struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); const char *config_filename; @@ -186,10 +186,8 @@ int cmd_config(int argc, const char **argv) * because of reinitializing with options config file location. */ set = perf_config_set__new(); - if (!set) { - ret = -1; + if (!set) goto out_err; - } switch (actions) { case ACTION_LIST: @@ -197,10 +195,11 @@ int cmd_config(int argc, const char **argv) pr_err("Error: takes no arguments\n"); parse_options_usage(config_usage, config_options, "l", 1); } else { - ret = show_config(set); - if (ret < 0) + if (show_config(set) < 0) { pr_err("Nothing configured, " "please check your %s \n", config_filename); + goto out_err; + } } break; default: @@ -215,38 +214,35 @@ int cmd_config(int argc, const char **argv) if (!arg) { pr_err("%s: strdup failed\n", __func__); - ret = -1; - break; + goto out_err; } if (parse_config_arg(arg, &var, &value) < 0) { free(arg); - ret = -1; - break; + goto out_err; } if (value == NULL) { - ret = show_spec_config(set, var); - if (ret < 0) { + if (show_spec_config(set, var) < 0) { pr_err("%s is not configured: %s\n", var, config_filename); free(arg); - break; + goto out_err; } } else { - ret = set_config(set, config_filename, var, value); - if (ret < 0) { + if (set_config(set, config_filename, var, value) < 0) { pr_err("Failed to set '%s=%s' on %s\n", var, value, config_filename); free(arg); - break; + goto out_err; } } free(arg); } } - perf_config_set__delete(set); + ret = 0; out_err: + perf_config_set__delete(set); return ret; }