From 640d5175a671cd0df0b9e3b5935dc80fc5248973 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Nov 2017 15:13:29 -0300 Subject: [PATCH 01/83] perf evlist: Set the correct idx when adding dummy events The evsel->idx field is used mainly to access the right bucket in per-event arrays such as the annotation ones, but also to set evsel->tracking, that in turn will decide what of the events will ask for PERF_RECORD_{MMAP,COMM,EXEC} to be generated, i.e. which perf_event_attr will have its mmap, etc fields set. When we were adding the "dummy" event using perf_evlist__add_dummy() we were not setting it correctly, which could result in multiple tracking events. Now that I'll try using a dummy event to be the tracking one when using 'perf record --delay', i.e. when we process the --delay setting we may already have the evlist set up, like with: perf record -e cycles,instructions --delay 1000 ./workload We will need to add a "dummy" event, then reset evsel->tracking for the first event, "cycles", and set it instead to the dummy one, and also setting its attr.enable_on_exec, so that we get the PERF_RECORD_MMAP, etc metadata events while waiting to enable the explicitely requested events, so lets get this straight and set the right evsel->idx. Cc: Adrian Hunter Cc: Bram Stolk Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nrdfchshqxf7diszhxcecqb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c6c891e154a63..ccb749f9a83f9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -257,7 +257,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist) .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct perf_evsel *evsel = perf_evsel__new(&attr); + struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); if (evsel == NULL) return -ENOMEM; From d3dbf43c56f9176be325ce1cc72a44c8d3c210dc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Nov 2017 15:34:34 -0300 Subject: [PATCH 02/83] perf record: Generate PERF_RECORD_{MMAP,COMM,EXEC} with --delay When we use an initial delay, e.g.: 'perf record --delay 1000', we do not enable the events until that delay has passed after we started the workload, including the tracking event, i.e. the one for which we have attr.mmap, etc, enabled to ask the kernel to generate the PERF_RECORD_{MMAP,COMM,EXEC} metadata events that will then allow us to resolve addresses in samples to the map, dso and symbol. There will be a shadow that even synthesizing samples won't cover, i.e. the workload that we start and other processes forking while we wait for the initial delay to expire. So use a dummy event to be the tracking one and make it be enabled on exec. Before: # perf record --delay 1000 stress --cpu 1 --timeout 5 stress: info: [9029] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd stress: info: [9029] successful run completed in 5s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.624 MB perf.data (15908 samples) ] # perf script | head :9031 9031 32001.826888: 1 cycles:ppp: ffffffff831aa30d event_function (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826893: 1 cycles:ppp: ffffffff8300d1a0 intel_bts_enable_local (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826895: 7 cycles:ppp: ffffffff83023870 sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826897: 103 cycles:ppp: ffffffff8300c331 intel_pmu_handle_irq (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826899: 1615 cycles:ppp: ffffffff830231f8 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826902: 26724 cycles:ppp: ffffffff8384c6a7 native_irq_return_iret (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826913: 329739 cycles:ppp: 7fb2a5410932 [unknown] ([unknown]) :9031 9031 32001.827033: 1225451 cycles:ppp: 7fb2a5410930 [unknown] ([unknown]) :9031 9031 32001.827474: 1391725 cycles:ppp: 7fb2a5410930 [unknown] ([unknown]) :9031 9031 32001.827978: 1233697 cycles:ppp: 7fb2a5410928 [unknown] ([unknown]) # After: # perf record --delay 1000 stress --cpu 1 --timeout 5 stress: info: [9741] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd stress: info: [9741] successful run completed in 5s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.751 MB perf.data (15976 samples) ] # perf script | head stress 9742 32110.959106: 1 cycles:ppp: ffffffff831b26f6 __perf_event_task_sched_in (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959110: 1 cycles:ppp: ffffffff8300c2e9 intel_pmu_handle_irq (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959112: 7 cycles:ppp: ffffffff830231e0 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959115: 101 cycles:ppp: ffffffff83023870 sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959117: 1533 cycles:ppp: ffffffff830231f8 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959119: 23992 cycles:ppp: ffffffff831b0900 ctx_sched_in (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959129: 329406 cycles:ppp: 7f4b1b661930 __random_r (/usr/lib64/libc-2.25.so) stress 9742 32110.959249: 1288322 cycles:ppp: 5566e1e7cbc9 hogcpu (/usr/bin/stress) stress 9742 32110.959712: 1464046 cycles:ppp: 7f4b1b66179e __random (/usr/lib64/libc-2.25.so) stress 9742 32110.960241: 1266918 cycles:ppp: 7f4b1b66195b __random_r (/usr/lib64/libc-2.25.so) # Reported-by: Bram Stolk Tested-by: Bram Stolk Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 6619a53ef757 ("perf record: Add --initial-delay option") Link: http://lkml.kernel.org/n/tip-nrdfchshqxf7diszhxcecqb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3d7f33e19df28..5f78ce9434078 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -339,6 +339,22 @@ static int record__open(struct record *rec) struct perf_evsel_config_term *err_term; int rc = 0; + /* + * For initial_delay we need to add a dummy event so that we can track + * PERF_RECORD_MMAP while we wait for the initial delay to enable the + * real events, the ones asked by the user. + */ + if (opts->initial_delay) { + if (perf_evlist__add_dummy(evlist)) + return -ENOMEM; + + pos = perf_evlist__first(evlist); + pos->tracking = 0; + pos = perf_evlist__last(evlist); + pos->tracking = 1; + pos->attr.enable_on_exec = 1; + } + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, pos) { From a17c4ca0ddef659d33fb6661995bd74e1a6a6101 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:25 +0200 Subject: [PATCH 03/83] perf annotate: Add annotation_line struct In order to make the annotation support generic, addadding 'struct annotation_line', which will hold generic data common to annotation sources (such as the one for python scripts, coming on upcoming patches). Having this, we can add different annotation line support other than objdump disasm. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 34 +++++++++++++++---------------- tools/perf/ui/gtk/annotate.c | 6 +++--- tools/perf/util/annotate.c | 20 +++++++++--------- tools/perf/util/annotate.h | 20 ++++++++++-------- 4 files changed, 42 insertions(+), 38 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8f7f59d1a2b59..a8c2f7405a41c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -84,7 +84,7 @@ static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { if (annotate_browser__opts.hide_src_code) { - struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); return dl->offset == -1; } @@ -123,7 +123,7 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab) static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); struct browser_disasm_line *bdl = disasm_line__browser(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && @@ -286,7 +286,7 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { - struct disasm_line *pos = list_prev_entry(cursor, node); + struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; if (!pos) @@ -404,16 +404,16 @@ static void annotate_browser__set_top(struct annotate_browser *browser, browser->b.top_idx = browser->b.index = idx; while (browser->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->node.prev, struct disasm_line, node); + pos = list_entry(pos->al.node.prev, struct disasm_line, al.node); - if (disasm_line__filter(&browser->b, &pos->node)) + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; --browser->b.top_idx; --back; } - browser->b.top = pos; + browser->b.top = &pos->al; browser->b.navkeypressed = true; } @@ -446,7 +446,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, pthread_mutex_lock(¬es->lock); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos = disasm_line__browser(pos); const char *path = NULL; double max_percent = 0.0; @@ -492,7 +492,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); - dl = list_entry(browser->b.top, struct disasm_line, node); + dl = list_entry(browser->b.top, struct disasm_line, al.node); bdl = disasm_line__browser(dl); if (annotate_browser__opts.hide_src_code) { @@ -589,10 +589,10 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows struct disasm_line *pos; *idx = 0; - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { if (pos->offset == offset) return pos; - if (!disasm_line__filter(&browser->b, &pos->node)) + if (!disasm_line__filter(&browser->b, &pos->al.node)) ++*idx; } @@ -630,8 +630,8 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows struct disasm_line *pos = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue(pos, ¬es->src->source, node) { - if (disasm_line__filter(&browser->b, &pos->node)) + list_for_each_entry_continue(pos, ¬es->src->source, al.node) { + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; ++*idx; @@ -669,8 +669,8 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse struct disasm_line *pos = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue_reverse(pos, ¬es->src->source, node) { - if (disasm_line__filter(&browser->b, &pos->node)) + list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; --*idx; @@ -1134,7 +1134,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos; size_t line_len = strlen(pos->line); @@ -1174,8 +1174,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, annotate_browser__update_addr_width(&browser); ret = annotate_browser__run(&browser, evsel, hbt); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index fc7a2e105bfdf..cf8092676c7ac 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -119,7 +119,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); g_object_unref(GTK_TREE_MODEL(store)); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { GtkTreeIter iter; int ret = 0; @@ -148,8 +148,8 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_container_add(GTK_CONTAINER(window), view); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index da1c4c4a0dd84..004e33dc897c2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -931,12 +931,12 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r static void disasm__add(struct list_head *head, struct disasm_line *line) { - list_add_tail(&line->node, head); + list_add_tail(&line->al.node, head); } struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { - list_for_each_entry_continue(pos, head, node) + list_for_each_entry_continue(pos, head, al.node) if (pos->offset >= 0) return pos; @@ -1122,7 +1122,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 1; if (queue != NULL) { - list_for_each_entry_from(queue, ¬es->src->source, node) { + list_for_each_entry_from(queue, ¬es->src->source, al.node) { if (queue == dl) break; disasm_line__print(queue, sym, start, evsel, len, @@ -1305,7 +1305,7 @@ static void delete_last_nop(struct symbol *sym) struct disasm_line *dl; while (!list_empty(list)) { - dl = list_entry(list->prev, struct disasm_line, node); + dl = list_entry(list->prev, struct disasm_line, al.node); if (dl->ins.ops) { if (dl->ins.ops != &nop_ops) @@ -1317,7 +1317,7 @@ static void delete_last_nop(struct symbol *sym) return; } - list_del(&dl->node); + list_del(&dl->al.node); disasm_line__free(dl); } } @@ -1844,7 +1844,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { if (context && queue == NULL) { queue = pos; queue_len = 0; @@ -1874,7 +1874,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (!context) break; if (queue_len == context) - queue = list_entry(queue->node.next, typeof(*queue), node); + queue = list_entry(queue->al.node.next, typeof(*queue), al.node); else ++queue_len; break; @@ -1911,8 +1911,8 @@ void disasm__purge(struct list_head *head) { struct disasm_line *pos, *n; - list_for_each_entry_safe(pos, n, head, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, head, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } } @@ -1939,7 +1939,7 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) struct disasm_line *pos; size_t printed = 0; - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, head, al.node) printed += disasm_line__fprintf(pos, fp); return printed; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f6ba3560de5ea..cc3cf6b50d55a 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,15 +59,19 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct annotation_line { + struct list_head node; +}; + struct disasm_line { - struct list_head node; - s64 offset; - char *line; - struct ins ins; - int line_nr; - float ipc; - u64 cycles; - struct ins_operands ops; + struct annotation_line al; + s64 offset; + char *line; + struct ins ins; + int line_nr; + float ipc; + u64 cycles; + struct ins_operands ops; }; static inline bool disasm_line__has_offset(const struct disasm_line *dl) From d5490b9647e6e41b203186ed0d73b4103f139fda Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:26 +0200 Subject: [PATCH 04/83] perf annotate: Move line/offset into annotation_line struct Move the line/line_nr/offset menbers to the annotation_line struct to be used as generic members for any annotation source. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 45 ++++++++++++++++--------------- tools/perf/ui/gtk/annotate.c | 14 +++++----- tools/perf/util/annotate.c | 41 ++++++++++++++-------------- tools/perf/util/annotate.h | 6 ++--- 4 files changed, 54 insertions(+), 52 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index a8c2f7405a41c..73d921c3e3ecc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -84,8 +84,9 @@ static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { if (annotate_browser__opts.hide_src_code) { - struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - return dl->offset == -1; + struct annotation_line *al = list_entry(entry, struct annotation_line, node); + + return al->offset == -1; } return false; @@ -141,7 +142,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int percent_max = bdl->samples[i].percent; } - if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) { + if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { if (dl->ipc == 0.0 && dl->cycles == 0) show_title = true; @@ -149,7 +150,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int show_title = true; } - if (dl->offset != -1 && percent_max != 0.0) { + if (dl->al.offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, bdl->samples[i].percent, @@ -199,19 +200,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) width += 1; - if (!*dl->line) + if (!*dl->al.line) ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); - else if (dl->offset == -1) { - if (dl->line_nr && annotate_browser__opts.show_linenr) + else if (dl->al.offset == -1) { + if (dl->al.line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", - ab->addr_width + 1, dl->line_nr); + ab->addr_width + 1, dl->al.line_nr); else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width - cycles_width + 1); + ui_browser__write_nstring(browser, dl->al.line, width - printed - pcnt_width - cycles_width + 1); } else { - u64 addr = dl->offset; + u64 addr = dl->al.offset; int color = -1; if (!annotate_browser__opts.use_offset) @@ -247,7 +248,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > dl->offset; + bool fwd = dl->ops.target.offset > dl->al.offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -452,7 +453,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double max_percent = 0.0; int i; - if (pos->offset == -1) { + if (pos->al.offset == -1) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -464,8 +465,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, - pos->offset, - next ? next->offset : len, + pos->al.offset, + next ? next->al.offset : len, &path, &sample); bpos->samples[i].he = sample; @@ -590,7 +591,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows *idx = 0; list_for_each_entry(pos, ¬es->src->source, al.node) { - if (pos->offset == offset) + if (pos->al.offset == offset) return pos; if (!disasm_line__filter(&browser->b, &pos->al.node)) ++*idx; @@ -636,7 +637,7 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows ++*idx; - if (pos->line && strstr(pos->line, s) != NULL) + if (pos->al.line && strstr(pos->al.line, s) != NULL) return pos; } @@ -675,7 +676,7 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse --*idx; - if (pos->line && strstr(pos->line, s) != NULL) + if (pos->al.line && strstr(pos->al.line, s) != NULL) return pos; } @@ -901,7 +902,7 @@ static int annotate_browser__run(struct annotate_browser *browser, case K_RIGHT: if (browser->selection == NULL) ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); - else if (browser->selection->offset == -1) + else if (browser->selection->al.offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); else if (!browser->selection->ins.ops) goto show_sup_ins; @@ -1136,13 +1137,13 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos; - size_t line_len = strlen(pos->line); + size_t line_len = strlen(pos->al.line); if (browser.b.width < line_len) browser.b.width = line_len; bpos = disasm_line__browser(pos); bpos->idx = browser.nr_entries++; - if (pos->offset != -1) { + if (pos->al.offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly @@ -1151,8 +1152,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, * * E.g. copy_user_generic_unrolled */ - if (pos->offset < (s64)size) - browser.offsets[pos->offset] = pos; + if (pos->al.offset < (s64)size) + browser.offsets[pos->al.offset] = pos; } else bpos->idx_asm = -1; } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cf8092676c7ac..162f15712d2d2 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -31,14 +31,14 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, strcpy(buf, ""); - if (dl->offset == (s64) -1) + if (dl->al.offset == (s64) -1) return 0; symhist = annotation__histogram(symbol__annotation(sym), evidx); - if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples) + if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples) return 0; - percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->nr_samples; + percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples; markup = perf_gtk__get_percent_color(percent); if (markup) @@ -57,16 +57,16 @@ static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym, strcpy(buf, ""); - if (dl->offset == (s64) -1) + if (dl->al.offset == (s64) -1) return 0; - return scnprintf(buf, size, "%"PRIx64, start + dl->offset); + return scnprintf(buf, size, "%"PRIx64, start + dl->al.offset); } static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) { int ret = 0; - char *line = g_markup_escape_text(dl->line, -1); + char *line = g_markup_escape_text(dl->al.line, -1); const char *markup = ""; strcpy(buf, ""); @@ -74,7 +74,7 @@ static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) if (!line) return 0; - if (dl->offset != (s64) -1) + if (dl->al.offset != (s64) -1) markup = NULL; if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 004e33dc897c2..e8b69001229d3 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -886,14 +886,15 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); if (dl != NULL) { - dl->offset = offset; - dl->line = strdup(line); - dl->line_nr = line_nr; - if (dl->line == NULL) + dl->al.offset = offset; + dl->al.line = strdup(line); + dl->al.line_nr = line_nr; + + if (dl->al.line == NULL) goto out_delete; if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; disasm_line__init_ins(dl, arch, map); @@ -903,7 +904,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, return dl; out_free_line: - zfree(&dl->line); + zfree(&dl->al.line); out_delete: free(dl); return NULL; @@ -911,7 +912,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->line); + zfree(&dl->al.line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else @@ -937,7 +938,7 @@ static void disasm__add(struct list_head *head, struct disasm_line *line) struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { list_for_each_entry_continue(pos, head, al.node) - if (pos->offset >= 0) + if (pos->al.offset >= 0) return pos; return NULL; @@ -1077,7 +1078,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st static const char *prev_line; static const char *prev_color; - if (dl->offset != -1) { + if (dl->al.offset != -1) { const char *path = NULL; double percent, max_percent = 0.0; double *ppercents = &percent; @@ -1086,7 +1087,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->offset; + s64 offset = dl->al.offset; const u64 addr = start + offset; struct disasm_line *next; struct block_range *br; @@ -1106,7 +1107,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st percent = disasm__calc_percent(notes, notes->src->lines ? i : evsel->idx + i, offset, - next ? next->offset : (s64) len, + next ? next->al.offset : (s64) len, &path, &sample); ppercents[i] = percent; @@ -1165,7 +1166,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st br = block_range__find(addr); color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); printf("\n"); @@ -1186,10 +1187,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->line) + if (!*dl->al.line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->line); + printf(" %*s: %s\n", width, " ", dl->al.line); } return 0; @@ -1311,9 +1312,9 @@ static void delete_last_nop(struct symbol *sym) if (dl->ins.ops != &nop_ops) return; } else { - if (!strstr(dl->line, " nop ") && - !strstr(dl->line, " nopl ") && - !strstr(dl->line, " nopw ")) + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) return; } @@ -1921,10 +1922,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) { size_t printed; - if (dl->offset == -1) - return fprintf(fp, "%s\n", dl->line); + if (dl->al.offset == -1) + return fprintf(fp, "%s\n", dl->al.line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cc3cf6b50d55a..b7ca628557601 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,14 +61,14 @@ struct annotation; struct annotation_line { struct list_head node; + s64 offset; + char *line; + int line_nr; }; struct disasm_line { struct annotation_line al; - s64 offset; - char *line; struct ins ins; - int line_nr; float ipc; u64 cycles; struct ins_operands ops; From 37236d5e0b6a765319dec3e64d828cb44ebecac6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:27 +0200 Subject: [PATCH 05/83] perf annotate: Move ipc/cycles into annotation_line struct Move ipc/cycles into annotation_line struct to be used as generic members for any annotation source. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++-------- tools/perf/util/annotate.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 73d921c3e3ecc..d1aff2f7cb6c8 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -144,7 +144,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { - if (dl->ipc == 0.0 && dl->cycles == 0) + if (dl->al.ipc == 0.0 && dl->al.cycles == 0) show_title = true; } else show_title = true; @@ -178,16 +178,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } if (ab->have_cycles) { - if (dl->ipc) - ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); + if (dl->al.ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->al.ipc); else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); else ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); - if (dl->cycles) + if (dl->al.cycles) ui_browser__printf(browser, "%*" PRIu64 " ", - CYCLES_WIDTH - 1, dl->cycles); + CYCLES_WIDTH - 1, dl->al.cycles); else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); else @@ -474,7 +474,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01 && pos->ipc == 0) { + if (max_percent < 0.01 && pos->al.ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -994,7 +994,7 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, struct disasm_line *dl = browser->offsets[offset]; if (dl) - dl->ipc = ipc; + dl->al.ipc = ipc; } } } @@ -1025,7 +1025,7 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, count_and_fill(browser, ch->start, offset, ch); dl = browser->offsets[offset]; if (dl && ch->num_aggr) - dl->cycles = ch->cycles_aggr / ch->num_aggr; + dl->al.cycles = ch->cycles_aggr / ch->num_aggr; browser->have_cycles = true; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b7ca628557601..a822c0a4987ec 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -64,13 +64,13 @@ struct annotation_line { s64 offset; char *line; int line_nr; + float ipc; + u64 cycles; }; struct disasm_line { struct annotation_line al; struct ins ins; - float ipc; - u64 cycles; struct ins_operands ops; }; From c34df25b40c20b478634b954a709749aebdc241a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:28 +0200 Subject: [PATCH 06/83] perf annotate: Add symbol__annotate function Add symbol__annotate function to have generic annotation function to be called for all annotation sources. It calls the generic annotation init and then the specific annotation data retrieval function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 6 ++-- tools/perf/ui/gtk/annotate.c | 4 +-- tools/perf/util/annotate.c | 58 ++++++++++++++++++------------- tools/perf/util/annotate.h | 6 ++-- 5 files changed, 42 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 477a8699f0b50..adfeeb488f1ad 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL); + err = symbol__annotate(sym, map, NULL, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d1aff2f7cb6c8..d77994c1cba9c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1120,9 +1120,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - sizeof_bdl, &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + sizeof_bdl, &browser.arch, + perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 162f15712d2d2..b498f1a92bb15 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,8 +169,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL); + err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e8b69001229d3..f0093918882d7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1425,13 +1425,11 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid) +static int symbol__disassemble(struct symbol *sym, struct map *map, + size_t privsize, struct arch *arch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; - struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1445,25 +1443,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, if (err) return err; - arch_name = annotate__norm_arch(arch_name); - if (!arch_name) - return -1; - - arch = arch__find(arch_name); - if (arch == NULL) - return -ENOTSUP; - - if (parch) - *parch = arch; - - if (arch->init) { - err = arch->init(arch, cpuid); - if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); - return err; - } - } - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1581,6 +1560,35 @@ int symbol__disassemble(struct symbol *sym, struct map *map, goto out_remove_tmp; } +int symbol__annotate(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch, char *cpuid) +{ + struct arch *arch; + int err; + + arch_name = annotate__norm_arch(arch_name); + if (!arch_name) + return -1; + + arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + + if (parch) + *parch = arch; + + if (arch->init) { + err = arch->init(arch, cpuid); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + + return symbol__disassemble(sym, map, privsize, arch); +} + static void insert_source_line(struct rb_root *root, struct source_line *src_line) { struct source_line *iter; @@ -1954,8 +1962,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a822c0a4987ec..e577f9d13a587 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -173,9 +173,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid); +int symbol__annotate(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch, char *cpuid); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, From ea07c5aaed33d23875cd59da8b0892f76e882ccd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:29 +0200 Subject: [PATCH 07/83] perf annotate: Add struct annotate_args Adding struct annotate_args to reduce the number of arguments, that need to travel all the way to line allocation. This makes the code easier to read and ease up the changes for following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f0093918882d7..f5bd6826fa66c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -878,12 +878,17 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) return -1; } -static struct disasm_line *disasm_line__new(s64 offset, char *line, - size_t privsize, int line_nr, +struct annotate_args { + size_t privsize; +}; + +static struct disasm_line *disasm_line__new(struct annotate_args *args, + s64 offset, char *line, + int line_nr, struct arch *arch, struct map *map) { - struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); + struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); if (dl != NULL) { dl->al.offset = offset; @@ -1217,8 +1222,8 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, - FILE *file, size_t privsize, + struct arch *arch, FILE *file, + struct annotate_args *args, int *line_nr) { struct annotation *notes = symbol__annotation(sym); @@ -1264,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr, arch, map); free(line); (*line_nr)++; @@ -1426,7 +1431,8 @@ static const char *annotate__norm_arch(const char *arch_name) } static int symbol__disassemble(struct symbol *sym, struct map *map, - size_t privsize, struct arch *arch) + struct annotate_args *args, + struct arch *arch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1526,7 +1532,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, privsize, + if (symbol__parse_objdump_line(sym, map, arch, file, args, &lineno) < 0) break; nline++; @@ -1564,6 +1570,9 @@ int symbol__annotate(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize, struct arch **parch, char *cpuid) { + struct annotate_args args = { + .privsize = privsize, + }; struct arch *arch; int err; @@ -1586,7 +1595,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, privsize, arch); + return symbol__disassemble(sym, map, &args, arch); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) From 24fe7b88934b702442597662643222cd0a6a44a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:30 +0200 Subject: [PATCH 08/83] perf annotate: Add arch into struct annotate_args Add arch into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f5bd6826fa66c..b4d3454618b08 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -880,12 +880,12 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) struct annotate_args { size_t privsize; + struct arch *arch; }; static struct disasm_line *disasm_line__new(struct annotate_args *args, s64 offset, char *line, int line_nr, - struct arch *arch, struct map *map) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); @@ -902,7 +902,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args, if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, arch, map); + disasm_line__init_ins(dl, args->arch, map); } } @@ -1222,7 +1222,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, FILE *file, + FILE *file, struct annotate_args *args, int *line_nr) { @@ -1269,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr, arch, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr, map); free(line); (*line_nr)++; @@ -1431,8 +1431,7 @@ static const char *annotate__norm_arch(const char *arch_name) } static int symbol__disassemble(struct symbol *sym, struct map *map, - struct annotate_args *args, - struct arch *arch) + struct annotate_args *args) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1532,7 +1531,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, args, + if (symbol__parse_objdump_line(sym, map, file, args, &lineno) < 0) break; nline++; @@ -1580,7 +1579,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, if (!arch_name) return -1; - arch = arch__find(arch_name); + args.arch = arch = arch__find(arch_name); if (arch == NULL) return -ENOTSUP; @@ -1595,7 +1594,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, &args, arch); + return symbol__disassemble(sym, map, &args); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) From 1a04db70dcbf621f9919e95456c372281779c053 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:31 +0200 Subject: [PATCH 09/83] perf annotate: Add map into struct annotate_args Add map into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b4d3454618b08..30da4402a3e43 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -881,12 +881,12 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) struct annotate_args { size_t privsize; struct arch *arch; + struct map *map; }; static struct disasm_line *disasm_line__new(struct annotate_args *args, s64 offset, char *line, - int line_nr, - struct map *map) + int line_nr) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); @@ -902,7 +902,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args, if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, args->arch, map); + disasm_line__init_ins(dl, args->arch, args->map); } } @@ -1221,11 +1221,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - FILE *file, +static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, struct annotate_args *args, int *line_nr) { + struct map *map = args->map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1269,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr); free(line); (*line_nr)++; @@ -1430,9 +1430,9 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -static int symbol__disassemble(struct symbol *sym, struct map *map, - struct annotate_args *args) +static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct map *map = args->map; struct dso *dso = map->dso; char command[PATH_MAX * 2]; FILE *file; @@ -1531,8 +1531,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, file, args, - &lineno) < 0) + if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) break; nline++; } @@ -1571,6 +1570,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, { struct annotate_args args = { .privsize = privsize, + .map = map, }; struct arch *arch; int err; @@ -1594,7 +1594,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, &args); + return symbol__disassemble(sym, &args); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) From 4748834f96903f843719b02190f98e36b2c55192 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:32 +0200 Subject: [PATCH 10/83] perf annotate: Add offset/line/line_nr into struct annotate_args Add offset/line/line_nr into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30da4402a3e43..681c9c4ce9f96 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -882,23 +882,24 @@ struct annotate_args { size_t privsize; struct arch *arch; struct map *map; + s64 offset; + char *line; + int line_nr; }; -static struct disasm_line *disasm_line__new(struct annotate_args *args, - s64 offset, char *line, - int line_nr) +static struct disasm_line *disasm_line__new(struct annotate_args *args) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); if (dl != NULL) { - dl->al.offset = offset; - dl->al.line = strdup(line); - dl->al.line_nr = line_nr; + dl->al.offset = args->offset; + dl->al.line = strdup(args->line); + dl->al.line_nr = args->line_nr; if (dl->al.line == NULL) goto out_delete; - if (offset != -1) { + if (args->offset != -1) { if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; @@ -1269,7 +1270,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr); + args->offset = offset; + args->line = parsed_line; + args->line_nr = *line_nr; + + dl = disasm_line__new(args); free(line); (*line_nr)++; From d03a686ea6e77b25edacc3eed386cef870e8d248 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:33 +0200 Subject: [PATCH 11/83] perf annotate: Add evsel into struct annotation_line_args Add evsel into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. This change also allow us to move the arch name initialization under symbol__annotate function. Link: http://lkml.kernel.org/n/tip-a9ok53rrgt1s5e8uglyvy6qt@git.kernel.org Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- tools/perf/ui/gtk/annotate.c | 3 +-- tools/perf/util/annotate.c | 11 ++++++++--- tools/perf/util/annotate.h | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d77994c1cba9c..3b72519c085fe 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1120,7 +1120,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + err = symbol__annotate(sym, map, evsel, sizeof_bdl, &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index b498f1a92bb15..5e0a56df0b4c7 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,8 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 681c9c4ce9f96..75f54eab22c88 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -882,6 +882,7 @@ struct annotate_args { size_t privsize; struct arch *arch; struct map *map; + struct perf_evsel *evsel; s64 offset; char *line; int line_nr; @@ -1570,16 +1571,21 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } int symbol__annotate(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, + struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid) { struct annotate_args args = { .privsize = privsize, .map = map, + .evsel = evsel, }; + const char *arch_name = NULL; struct arch *arch; int err; + if (evsel) + arch_name = perf_evsel__env_arch(evsel); + arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; @@ -1975,8 +1981,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__annotate(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e577f9d13a587..baf34032504a1 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -174,7 +174,7 @@ int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, + struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid); enum symbol_disassemble_errno { From c4c724364d398a9746410d5ff482e8c4c7228249 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:34 +0200 Subject: [PATCH 12/83] perf annotate: Add annotation_line__next function Rename disasm__get_next_ip_line() to annotation_line__next() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 7 ++++--- tools/perf/util/annotate.c | 13 +++++++------ tools/perf/util/annotate.h | 3 ++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3b72519c085fe..881ad6122057e 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -440,7 +440,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos, *next; + struct annotation_line *next; + struct disasm_line *pos; s64 len = symbol__size(sym); browser->entries = RB_ROOT; @@ -458,7 +459,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - next = disasm__get_next_ip_line(¬es->src->source, pos); + next = annotation_line__next(&pos->al, ¬es->src->source); for (i = 0; i < browser->nr_events; i++) { struct sym_hist_entry sample; @@ -466,7 +467,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, pos->al.offset, - next ? next->al.offset : len, + next ? next->offset : len, &path, &sample); bpos->samples[i].he = sample; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 75f54eab22c88..e7da88d7bb272 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -942,10 +942,11 @@ static void disasm__add(struct list_head *head, struct disasm_line *line) list_add_tail(&line->al.node, head); } -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head) { - list_for_each_entry_continue(pos, head, al.node) - if (pos->al.offset >= 0) + list_for_each_entry_continue(pos, head, node) + if (pos->offset >= 0) return pos; return NULL; @@ -1096,10 +1097,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st struct annotation *notes = symbol__annotation(sym); s64 offset = dl->al.offset; const u64 addr = start + offset; - struct disasm_line *next; + struct annotation_line *next; struct block_range *br; - next = disasm__get_next_ip_line(¬es->src->source, dl); + next = annotation_line__next(&dl->al, ¬es->src->source); if (perf_evsel__is_group_event(evsel)) { nr_percent = evsel->nr_members; @@ -1114,7 +1115,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st percent = disasm__calc_percent(notes, notes->src->lines ? i : evsel->idx + i, offset, - next ? next->al.offset : (s64) len, + next ? next->offset : (s64) len, &path, &sample); ppercents[i] = percent; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index baf34032504a1..43bef6cacbc4c 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -85,7 +85,8 @@ struct sym_hist_entry { }; void disasm_line__free(struct disasm_line *dl); -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, From 82b9d7ff096b7e7ae3efaeb341ee673bb494bb61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:35 +0200 Subject: [PATCH 13/83] perf annotate: Add annotation_line__add function Rename disasm__add() into annotation_line__add() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e7da88d7bb272..11c7743203a08 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -937,9 +937,9 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -static void disasm__add(struct list_head *head, struct disasm_line *line) +static void annotation_line__add(struct annotation_line *al, struct list_head *head) { - list_add_tail(&line->al.node, head); + list_add_tail(&al->node, head); } struct annotation_line * @@ -1301,7 +1301,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, dl->ops.target.name = strdup(target.sym->name); } - disasm__add(¬es->src->source, dl); + annotation_line__add(&dl->al, ¬es->src->source); return 0; } From 5b12adc849be011fd6d99a16e39d83afee43c0a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:36 +0200 Subject: [PATCH 14/83] perf annotate: Move rb_node to struct annotation_line Move rb_node to struct annotation_line to make struct annotation_line the rb tree node for sorted lines used in both stdio and TUI code. This way we can unite the sorted lines lines codes for both TUI and stdio in the following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 30 ++++++++++++++++-------------- tools/perf/util/annotate.h | 1 + 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 881ad6122057e..cfde5a2ca3f47 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -26,7 +26,6 @@ struct disasm_line_samples { #define CYCLES_WIDTH 6 struct browser_disasm_line { - struct rb_node rb_node; u32 idx; int idx_asm; int jump_sources; @@ -362,9 +361,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct browser_disasm_line *a, - struct browser_disasm_line *b, int nr_pcnt) +static int disasm__cmp(struct disasm_line *da, + struct disasm_line *db, int nr_pcnt) { + struct browser_disasm_line *a = disasm_line__browser(da); + struct browser_disasm_line *b = disasm_line__browser(db); int i; for (i = 0; i < nr_pcnt; i++) { @@ -375,24 +376,24 @@ static int disasm__cmp(struct browser_disasm_line *a, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct browser_disasm_line *bdl, +static void disasm_rb_tree__insert(struct rb_root *root, struct disasm_line *dl, int nr_events) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; - struct browser_disasm_line *l; + struct disasm_line *l; while (*p != NULL) { parent = *p; - l = rb_entry(parent, struct browser_disasm_line, rb_node); + l = rb_entry(parent, struct disasm_line, al.rb_node); - if (disasm__cmp(bdl, l, nr_events)) + if (disasm__cmp(dl, l, nr_events)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&bdl->rb_node, parent, p); - rb_insert_color(&bdl->rb_node, root); + rb_link_node(&dl->al.rb_node, parent, p); + rb_insert_color(&dl->al.rb_node, root); } static void annotate_browser__set_top(struct annotate_browser *browser, @@ -425,8 +426,9 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct disasm_line *pos; u32 idx; - bpos = rb_entry(nd, struct browser_disasm_line, rb_node); - pos = ((struct disasm_line *)bpos) - 1; + pos = rb_entry(nd, struct disasm_line, al.rb_node); + bpos = disasm_line__browser(pos); + idx = bpos->idx; if (annotate_browser__opts.hide_src_code) idx = bpos->idx_asm; @@ -455,7 +457,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, int i; if (pos->al.offset == -1) { - RB_CLEAR_NODE(&bpos->rb_node); + RB_CLEAR_NODE(&pos->al.rb_node); continue; } @@ -476,10 +478,10 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, } if (max_percent < 0.01 && pos->al.ipc == 0) { - RB_CLEAR_NODE(&bpos->rb_node); + RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, bpos, + disasm_rb_tree__insert(&browser->entries, pos, browser->nr_events); } pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 43bef6cacbc4c..6f01e61179369 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,6 +61,7 @@ struct annotation; struct annotation_line { struct list_head node; + struct rb_node rb_node; s64 offset; char *line; int line_nr; From c835e1914c4bcfdd41f43d270cafc6d8119d7782 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:37 +0200 Subject: [PATCH 15/83] perf annotate: Add annotation_line__(new|delete) functions Changing the way the annotation lines are allocated and adding annotation_line__(new|delete) functions to deal with this. Before the allocation schema was as follows: ----------------------------------------------------------- struct disasm_line | struct annotation_line | private space ----------------------------------------------------------- Where the private space is used in TUI code to store computed annotation data for events. The stdio code computes the data on the fly. The goal is to compute and store annotation line's data directly in the struct annotation_line itself, so this patch changes the line allocation schema as follows: ------------------------------------------------------------ privsize space | struct disasm_line | struct annotation_line ------------------------------------------------------------ Moving struct annotation_line to the end, because in following changes we will move here the non-fixed length event's data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 4 +- tools/perf/util/annotate.c | 63 +++++++++++++++++++++++++++---- tools/perf/util/annotate.h | 10 ++++- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index cfde5a2ca3f47..7ca5ae625cc93 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -76,7 +76,9 @@ struct annotate_browser { static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) { - return (struct browser_disasm_line *)(dl + 1); + struct annotation_line *al = &dl->al; + + return (void *) al - al->privsize; } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11c7743203a08..7c74700ae6d73 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -888,14 +888,64 @@ struct annotate_args { int line_nr; }; +static void annotation_line__delete(struct annotation_line *al) +{ + void *ptr = (void *) al - al->privsize; + + zfree(&al->line); + free(ptr); +} + +/* + * Allocating the annotation line data with following + * structure: + * + * -------------------------------------- + * private space | struct annotation_line + * -------------------------------------- + * + * Size of the private space is stored in 'struct annotation_line'. + * + */ +static struct annotation_line * +annotation_line__new(struct annotate_args *args, size_t privsize) +{ + struct annotation_line *al; + size_t size = privsize + sizeof(*al); + + al = zalloc(size); + if (al) { + al = (void *) al + privsize; + al->privsize = privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + } + + return al; +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------------------------ + * privsize space | struct disasm_line | struct annotation_line + * ------------------------------------------------------------ + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + * + */ static struct disasm_line *disasm_line__new(struct annotate_args *args) { - struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); + struct disasm_line *dl = NULL; + struct annotation_line *al; + size_t privsize = args->privsize + offsetof(struct disasm_line, al); - if (dl != NULL) { - dl->al.offset = args->offset; - dl->al.line = strdup(args->line); - dl->al.line_nr = args->line_nr; + al = annotation_line__new(args, privsize); + if (al != NULL) { + dl = disasm_line(al); if (dl->al.line == NULL) goto out_delete; @@ -919,14 +969,13 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args) void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->al.line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); free((void *)dl->ins.name); dl->ins.name = NULL; - free(dl); + annotation_line__delete(&dl->al); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6f01e61179369..2e7a08afb04f2 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -67,14 +67,22 @@ struct annotation_line { int line_nr; float ipc; u64 cycles; + size_t privsize; }; struct disasm_line { - struct annotation_line al; struct ins ins; struct ins_operands ops; + + /* This needs to be at the end. */ + struct annotation_line al; }; +static inline struct disasm_line *disasm_line(struct annotation_line *al) +{ + return al ? container_of(al, struct disasm_line, al) : NULL; +} + static inline bool disasm_line__has_offset(const struct disasm_line *dl) { return dl->ops.target.offset_avail; From f8eb37bd7c33babc01d9c2e3074ce001eec6cfbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:38 +0200 Subject: [PATCH 16/83] perf annotate: Add annotated_source__purge function Mov disasm__purge() to annotated_source__purge() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 8 +++----- tools/perf/util/annotate.c | 12 ++++++------ tools/perf/util/annotate.h | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 7ca5ae625cc93..4c54d5e760088 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1084,7 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - struct disasm_line *pos, *n; + struct disasm_line *pos; struct annotation *notes; size_t size; struct map_symbol ms = { @@ -1180,10 +1180,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, annotate_browser__update_addr_width(&browser); ret = annotate_browser__run(&browser, evsel, hbt); - list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { - list_del(&pos->al.node); - disasm_line__free(pos); - } + + annotated_source__purge(notes->src); out_free_offsets: free(browser.offsets); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7c74700ae6d73..0c2eb95ba90ae 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1985,13 +1985,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) } } -void disasm__purge(struct list_head *head) +void annotated_source__purge(struct annotated_source *as) { - struct disasm_line *pos, *n; + struct annotation_line *al, *n; - list_for_each_entry_safe(pos, n, head, al.node) { - list_del(&pos->al.node); - disasm_line__free(pos); + list_for_each_entry_safe(al, n, &as->source, node) { + list_del(&al->node); + disasm_line__free(disasm_line(al)); } } @@ -2047,7 +2047,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, if (print_lines) symbol__free_source_line(sym, len); - disasm__purge(&symbol__annotation(sym)->src->source); + annotated_source__purge(symbol__annotation(sym)->src); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 2e7a08afb04f2..cb60cafae1fbc 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -212,7 +212,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int min_pcnt, int max_lines, int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void disasm__purge(struct list_head *head); +void annotated_source__purge(struct annotated_source *as); bool ui__has_annotation(void); From 7e304557ead5b309d59807b2f05ed47f2c0076c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:39 +0200 Subject: [PATCH 17/83] perf annotate: Add samples into struct annotation_line Add samples array into struct annotation_line to hold the annotation data. The data is populated in the following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-17-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++++++ tools/perf/util/annotate.h | 17 ++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0c2eb95ba90ae..313fb2e90dba9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -911,7 +911,14 @@ static struct annotation_line * annotation_line__new(struct annotate_args *args, size_t privsize) { struct annotation_line *al; + struct perf_evsel *evsel = args->evsel; size_t size = privsize + sizeof(*al); + int nr = 1; + + if (perf_evsel__is_group_event(evsel)) + nr = evsel->nr_members; + + size += sizeof(al->samples[0]) * nr; al = zalloc(size); if (al) { @@ -920,6 +927,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize) al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; + al->samples_nr = nr; } return al; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cb60cafae1fbc..55bdd9015f331 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,16 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct sym_hist_entry { + u64 nr_samples; + u64 period; +}; + +struct annotation_data { + double percent; + struct sym_hist_entry he; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -68,6 +78,8 @@ struct annotation_line { float ipc; u64 cycles; size_t privsize; + int samples_nr; + struct annotation_data samples[0]; }; struct disasm_line { @@ -88,11 +100,6 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl) return dl->ops.target.offset_avail; } -struct sym_hist_entry { - u64 nr_samples; - u64 period; -}; - void disasm_line__free(struct disasm_line *dl); struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); From 073ae601edc211383b62618effaaedaa8b1d22db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:40 +0200 Subject: [PATCH 18/83] perf annotate: Add symbol__calc_percent function Add symbol__calc_percent function, that calculates annotation data for symbol and put the data in the struct annotation_line::samples array. Committer notes: Made symbol__calc_percent non static to be used in the next two patches, which will get some fixups from jolsa, doing it this way to keep this bisectable. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-18-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 62 +++++++++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 1 + 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 313fb2e90dba9..ff1036096347f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1628,6 +1628,62 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) goto out_remove_tmp; } +static void calc_percent(struct sym_hist *hist, + struct annotation_data *sample, + s64 offset, s64 end) +{ + unsigned int hits = 0; + u64 period = 0; + + while (offset < end) { + hits += hist->addr[offset].nr_samples; + period += hist->addr[offset].period; + ++offset; + } + + if (hist->nr_samples) { + sample->he.period = period; + sample->he.nr_samples = hits; + sample->percent = 100.0 * hits / hist->nr_samples; + } +} + +static int annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) +{ + struct annotation_line *al, *next; + + list_for_each_entry(al, ¬es->src->source, node) { + s64 end; + int i; + + if (al->offset == -1) + continue; + + next = annotation_line__next(al, ¬es->src->source); + end = next ? next->offset : len; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + struct sym_hist *hist; + + hist = annotation__histogram(notes, evsel->idx + i); + sample = &al->samples[i]; + + calc_percent(hist, sample, al->offset, end); + } + } + + return 0; +} + +int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +{ + struct annotation *notes = symbol__annotation(sym); + + return annotation__calc_percent(notes, evsel, symbol__size(sym)); +} + int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid) @@ -1663,7 +1719,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, &args); + err = symbol__disassemble(sym, &args); + if (err) + return err; + + return symbol__calc_percent(sym, evsel); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 55bdd9015f331..6056840da4c9e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,6 +107,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, s64 end, const char **path, struct sym_hist_entry *sample); +int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; From 8b4c74dc5cd40a3bc77f8bc2b6b7b33dc125e302 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:41 +0200 Subject: [PATCH 19/83] perf annotate: Add symbol__calc_lines function Replace symbol__get_source_line() with symbol__calc_lines(), which calculates the source line tree over the struct annotation_line. This will allow us to remove redundant struct source_line in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 186 +++++++++++++------------------------ tools/perf/util/annotate.h | 2 + 2 files changed, 68 insertions(+), 120 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ff1036096347f..96cf6767b5ced 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -892,6 +892,7 @@ static void annotation_line__delete(struct annotation_line *al) { void *ptr = (void *) al - al->privsize; + free_srcline(al->path); zfree(&al->line); free(ptr); } @@ -1726,21 +1727,21 @@ int symbol__annotate(struct symbol *sym, struct map *map, return symbol__calc_percent(sym, evsel); } -static void insert_source_line(struct rb_root *root, struct source_line *src_line) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; int i, ret; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - ret = strcmp(iter->path, src_line->path); + ret = strcmp(iter->path, al->path); if (ret == 0) { - for (i = 0; i < src_line->nr_pcnt; i++) - iter->samples[i].percent_sum += src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + iter->samples[i].percent_sum += al->samples[i].percent; return; } @@ -1750,18 +1751,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin p = &(*p)->rb_right; } - for (i = 0; i < src_line->nr_pcnt; i++) - src_line->samples[i].percent_sum = src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + al->samples[i].percent_sum = al->samples[i].percent; - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } -static int cmp_source_line(struct source_line *a, struct source_line *b) +static int cmp_source_line(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent_sum == b->samples[i].percent_sum) continue; return a->samples[i].percent_sum > b->samples[i].percent_sum; @@ -1770,135 +1771,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b) return 0; } -static void __resort_source_line(struct rb_root *root, struct source_line *src_line) +static void __resort_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - if (cmp_source_line(src_line, iter)) + if (cmp_source_line(al, iter)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; node = rb_first(src_root); while (node) { struct rb_node *next; - src_line = rb_entry(node, struct source_line, node); + al = rb_entry(node, struct annotation_line, rb_node); next = rb_next(node); rb_erase(node, src_root); - __resort_source_line(dest_root, src_line); + __resort_source_line(dest_root, al); node = next; } } -static void symbol__free_source_line(struct symbol *sym, int len) -{ - struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src->lines; - size_t sizeof_src_line; - int i; - - sizeof_src_line = sizeof(*src_line) + - (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); - - for (i = 0; i < len; i++) { - free_srcline(src_line->path); - src_line = (void *)src_line + sizeof_src_line; - } - - zfree(¬es->src->lines); -} - -/* Get the filename:line for the colored entries */ -static int symbol__get_source_line(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct rb_root *root, int len) -{ - u64 start; - int i, k; - int evidx = evsel->idx; - struct source_line *src_line; - struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); - struct rb_root tmp_root = RB_ROOT; - int nr_pcnt = 1; - u64 nr_samples = h->nr_samples; - size_t sizeof_src_line = sizeof(struct source_line); - - if (perf_evsel__is_group_event(evsel)) { - for (i = 1; i < evsel->nr_members; i++) { - h = annotation__histogram(notes, evidx + i); - nr_samples += h->nr_samples; - } - nr_pcnt = evsel->nr_members; - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); - } - - if (!nr_samples) - return 0; - - src_line = notes->src->lines = calloc(len, sizeof_src_line); - if (!notes->src->lines) - return -1; - - start = map__rip_2objdump(map, sym->start); - - for (i = 0; i < len; i++) { - u64 offset; - double percent_max = 0.0; - - src_line->nr_pcnt = nr_pcnt; - - for (k = 0; k < nr_pcnt; k++) { - double percent = 0.0; - - h = annotation__histogram(notes, evidx + k); - nr_samples = h->addr[i].nr_samples; - if (h->nr_samples) - percent = 100.0 * nr_samples / h->nr_samples; - - if (percent > percent_max) - percent_max = percent; - src_line->samples[k].percent = percent; - src_line->samples[k].nr = nr_samples; - } - - if (percent_max <= 0.5) - goto next; - - offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, - false, true); - insert_source_line(&tmp_root, src_line); - - next: - src_line = (void *)src_line + sizeof_src_line; - } - - resort_source_line(root, &tmp_root); - return 0; -} - static void print_summary(struct rb_root *root, const char *filename) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; printf("\nSorted summary for file %s\n", filename); @@ -1916,9 +1829,9 @@ static void print_summary(struct rb_root *root, const char *filename) char *path; int i; - src_line = rb_entry(node, struct source_line, node); - for (i = 0; i < src_line->nr_pcnt; i++) { - percent = src_line->samples[i].percent_sum; + al = rb_entry(node, struct annotation_line, rb_node); + for (i = 0; i < al->samples_nr; i++) { + percent = al->samples[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); @@ -1926,7 +1839,7 @@ static void print_summary(struct rb_root *root, const char *filename) percent_max = percent; } - path = src_line->path; + path = al->path; color = get_percent_color(percent_max); color_fprintf(stdout, color, " %s\n", path); @@ -2091,29 +2004,62 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) return printed; } +static void annotation__calc_lines(struct annotation *notes, struct map *map, + struct rb_root *root, u64 start) +{ + struct annotation_line *al; + struct rb_root tmp_root = RB_ROOT; + + list_for_each_entry(al, ¬es->src->source, node) { + double percent_max = 0.0; + int i; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + + sample = &al->samples[i]; + + if (sample->percent > percent_max) + percent_max = sample->percent; + } + + if (percent_max <= 0.5) + continue; + + al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); + insert_source_line(&tmp_root, al); + } + + resort_source_line(root, &tmp_root); +} + +static void symbol__calc_lines(struct symbol *sym, struct map *map, + struct rb_root *root) +{ + struct annotation *notes = symbol__annotation(sym); + u64 start = map__rip_2objdump(map, sym->start); + + annotation__calc_lines(notes, map, root, start); +} + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - u64 len; if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; - len = symbol__size(sym); - if (print_lines) { srcline_full_filename = full_paths; - symbol__get_source_line(sym, map, evsel, &source_line, len); + symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } symbol__annotate_printf(sym, map, evsel, full_paths, min_pcnt, max_lines, 0); - if (print_lines) - symbol__free_source_line(sym, len); annotated_source__purge(symbol__annotation(sym)->src); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6056840da4c9e..927810b19f0da 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -66,6 +66,7 @@ struct sym_hist_entry { struct annotation_data { double percent; + double percent_sum; struct sym_hist_entry he; }; @@ -78,6 +79,7 @@ struct annotation_line { float ipc; u64 cycles; size_t privsize; + char *path; int samples_nr; struct annotation_data samples[0]; }; From f681d593d1ce7d2fc665c4047b45f4316408b892 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:42 +0200 Subject: [PATCH 20/83] perf annotate: Remove disasm__calc_percent() from disasm_line__print() Remove disasm__calc_percent() from disasm_line__print(), because we already have the data calculated in struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 ++ tools/perf/util/annotate.c | 59 ++++++++++---------------------------- 2 files changed, 18 insertions(+), 44 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index adfeeb488f1ad..0789f95ca2f37 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -226,6 +226,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, static void perf_top__show_details(struct perf_top *top) { struct hist_entry *he = top->sym_filter_entry; + struct perf_evsel *evsel = hists_to_evsel(he->hists); struct annotation *notes; struct symbol *symbol; int more; @@ -238,6 +239,8 @@ static void perf_top__show_details(struct perf_top *top) pthread_mutex_lock(¬es->lock); + symbol__calc_percent(symbol, evsel); + if (notes->src == NULL) goto out_unlock; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 96cf6767b5ced..209a255455428 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1145,41 +1145,19 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st static const char *prev_color; if (dl->al.offset != -1) { - const char *path = NULL; - double percent, max_percent = 0.0; - double *ppercents = &percent; - struct sym_hist_entry sample; - struct sym_hist_entry *psamples = &sample; + double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); s64 offset = dl->al.offset; const u64 addr = start + offset; - struct annotation_line *next; struct block_range *br; - next = annotation_line__next(&dl->al, ¬es->src->source); + for (i = 0; i < dl->al.samples_nr; i++) { + struct annotation_data *sample = &dl->al.samples[i]; - if (perf_evsel__is_group_event(evsel)) { - nr_percent = evsel->nr_members; - ppercents = calloc(nr_percent, sizeof(double)); - psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); - if (ppercents == NULL || psamples == NULL) { - return -1; - } - } - - for (i = 0; i < nr_percent; i++) { - percent = disasm__calc_percent(notes, - notes->src->lines ? i : evsel->idx + i, - offset, - next ? next->offset : (s64) len, - &path, &sample); - - ppercents[i] = percent; - psamples[i] = sample; - if (percent > max_percent) - max_percent = percent; + if (sample->percent > max_percent) + max_percent = sample->percent; } if (max_percent < min_pcnt) @@ -1204,28 +1182,28 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (path) { - if (!prev_line || strcmp(prev_line, path) + if (dl->al.path) { + if (!prev_line || strcmp(prev_line, dl->al.path) || color != prev_color) { - color_fprintf(stdout, color, " %s", path); - prev_line = path; + color_fprintf(stdout, color, " %s", dl->al.path); + prev_line = dl->al.path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - percent = ppercents[i]; - sample = psamples[i]; - color = get_percent_color(percent); + struct annotation_data *sample = &dl->al.samples[i]; + + color = get_percent_color(sample->percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, - sample.period); + sample->he.period); else if (symbol_conf.show_nr_samples) color_fprintf(stdout, color, " %7" PRIu64, - sample.nr_samples); + sample->he.nr_samples); else - color_fprintf(stdout, color, " %7.2f", percent); + color_fprintf(stdout, color, " %7.2f", sample->percent); } printf(" : "); @@ -1235,13 +1213,6 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); printf("\n"); - - if (ppercents != &percent) - free(ppercents); - - if (psamples != &sample) - free(psamples); - } else if (max_lines && printed >= max_lines) return 1; else { From e425da6caed1a2872e9543bba83488dbe4bbe3f3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:43 +0200 Subject: [PATCH 21/83] perf annotate: Remove disasm__calc_percent() from annotate_browser__calc_percent() Remove disasm__calc_percent() from annotate_browser__calc_percent(), because we already have the data calculated in struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4c54d5e760088..613682432940b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -444,17 +444,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct annotation_line *next; struct disasm_line *pos; - s64 len = symbol__size(sym); browser->entries = RB_ROOT; pthread_mutex_lock(¬es->lock); + symbol__calc_percent(sym, evsel); + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos = disasm_line__browser(pos); - const char *path = NULL; double max_percent = 0.0; int i; @@ -463,17 +462,11 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - next = annotation_line__next(&pos->al, ¬es->src->source); - for (i = 0; i < browser->nr_events; i++) { - struct sym_hist_entry sample; - - bpos->samples[i].percent = disasm__calc_percent(notes, - evsel->idx + i, - pos->al.offset, - next ? next->offset : len, - &path, &sample); - bpos->samples[i].he = sample; + struct annotation_data *sample = &pos->al.samples[i]; + + bpos->samples[i].percent = sample->percent; + bpos->samples[i].he = sample->he; if (max_percent < bpos->samples[i].percent) max_percent = bpos->samples[i].percent; From 81e436a0b3a7a2f3ac0311674ce407b7cdd23f0b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:44 +0200 Subject: [PATCH 22/83] perf annotate: Remove disasm__calc_percent function Remove disasm__calc_percent() function, because it's no longer needed. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 44 -------------------------------------- tools/perf/util/annotate.h | 2 -- 2 files changed, 46 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 209a255455428..29cf2a5ef6204 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1010,50 +1010,6 @@ annotation_line__next(struct annotation_line *pos, struct list_head *head) return NULL; } -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample) -{ - struct source_line *src_line = notes->src->lines; - double percent = 0.0; - - sample->nr_samples = sample->period = 0; - - if (src_line) { - size_t sizeof_src_line = sizeof(*src_line) + - sizeof(src_line->samples) * (src_line->nr_pcnt - 1); - - while (offset < end) { - src_line = (void *)notes->src->lines + - (sizeof_src_line * offset); - - if (*path == NULL) - *path = src_line->path; - - percent += src_line->samples[evidx].percent; - sample->nr_samples += src_line->samples[evidx].nr; - offset++; - } - } else { - struct sym_hist *h = annotation__histogram(notes, evidx); - unsigned int hits = 0; - u64 period = 0; - - while (offset < end) { - hits += h->addr[offset].nr_samples; - period += h->addr[offset].period; - ++offset; - } - - if (h->nr_samples) { - sample->period = period; - sample->nr_samples = hits; - percent = 100.0 * hits / h->nr_samples; - } - } - - return percent; -} - static const char *annotate__address_color(struct block_range *br) { double cov = block_range__coverage(br); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 927810b19f0da..f98acb2ad721b 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,8 +107,6 @@ struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample); int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { From fa1924eb4abcd756febc031d819ba75c3849ca45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:45 +0200 Subject: [PATCH 23/83] perf annotate: Remove struct source_line Remove struct source_line*, no longer needed. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-23-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f98acb2ad721b..4fc805a271d2d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -126,19 +126,6 @@ struct cyc_hist { u16 reset; }; -struct source_line_samples { - double percent; - double percent_sum; - u64 nr; -}; - -struct source_line { - struct rb_node node; - char *path; - int nr_pcnt; - struct source_line_samples samples[1]; -}; - /** struct annotated_source - symbols with hits have this attached as in sannotation * * @histogram: Array of addr hit histograms per event being monitored @@ -154,7 +141,6 @@ struct source_line { */ struct annotated_source { struct list_head source; - struct source_line *lines; int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; From 8f25b8197d43885a4cc19bea581e37bf46ed9958 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:46 +0200 Subject: [PATCH 24/83] perf annotate: Add annotation_line__print function Separating struct annotation_line display function, it will hold the generic line display code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-24-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 29cf2a5ef6204..5c6f739ac3ac0 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1189,6 +1189,18 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 0; } +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *aq) +{ + struct disasm_line *dl = container_of(al, struct disasm_line, al); + struct disasm_line *queue = container_of(aq, struct disasm_line, al); + + return disasm_line__print(dl, sym, start, evsel, len, min_pcnt, printed, + max_lines, queue); +} + /* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following @@ -1797,7 +1809,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); - struct disasm_line *pos, *queue = NULL; + struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0; int more = 0; @@ -1830,15 +1842,19 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); - list_for_each_entry(pos, ¬es->src->source, al.node) { + list_for_each_entry(pos, ¬es->src->source, node) { + int err; + if (context && queue == NULL) { queue = pos; queue_len = 0; } - switch (disasm_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, - queue)) { + err = annotation_line__print(pos, sym, start, evsel, len, + min_pcnt, printed, max_lines, + queue); + + switch (err) { case 0: ++printed; if (context) { @@ -1860,7 +1876,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (!context) break; if (queue_len == context) - queue = list_entry(queue->al.node.next, typeof(*queue), al.node); + queue = list_entry(queue->node.next, typeof(*queue), node); else ++queue_len; break; From 29971f9a82a5d005b37d65fbb73edaf9073279b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:47 +0200 Subject: [PATCH 25/83] perf annotate: Factor annotation_line__print from disasm_line__print Move generic annotation line display code into annotation_line__print function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-25-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 69 ++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5c6f739ac3ac0..cb065ca431ee5 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1093,24 +1093,36 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } -static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct disasm_line *queue) +static int disasm_line__print(struct disasm_line *dl, u64 start) { + s64 offset = dl->al.offset; + const u64 addr = start + offset; + struct block_range *br; + + br = block_range__find(addr); + color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); + annotate__branch_printf(br, addr); + return 0; +} + +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *queue) +{ + struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; static const char *prev_color; - if (dl->al.offset != -1) { + if (al->offset != -1) { double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->al.offset; - const u64 addr = start + offset; - struct block_range *br; - for (i = 0; i < dl->al.samples_nr; i++) { - struct annotation_data *sample = &dl->al.samples[i]; + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample = &al->samples[i]; if (sample->percent > max_percent) max_percent = sample->percent; @@ -1123,11 +1135,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 1; if (queue != NULL) { - list_for_each_entry_from(queue, ¬es->src->source, al.node) { - if (queue == dl) + list_for_each_entry_from(queue, ¬es->src->source, node) { + if (queue == al) break; - disasm_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + annotation_line__print(queue, sym, start, evsel, len, + 0, 0, 1, NULL); } } @@ -1138,17 +1150,17 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (dl->al.path) { - if (!prev_line || strcmp(prev_line, dl->al.path) + if (al->path) { + if (!prev_line || strcmp(prev_line, al->path) || color != prev_color) { - color_fprintf(stdout, color, " %s", dl->al.path); - prev_line = dl->al.path; + color_fprintf(stdout, color, " %s", al->path); + prev_line = al->path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - struct annotation_data *sample = &dl->al.samples[i]; + struct annotation_data *sample = &al->samples[i]; color = get_percent_color(sample->percent); @@ -1164,10 +1176,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st printf(" : "); - br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); - annotate__branch_printf(br, addr); + disasm_line__print(dl, start); printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1180,27 +1189,15 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->al.line) + if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->al.line); + printf(" %*s: %s\n", width, " ", al->line); } return 0; } -static int -annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *aq) -{ - struct disasm_line *dl = container_of(al, struct disasm_line, al); - struct disasm_line *queue = container_of(aq, struct disasm_line, al); - - return disasm_line__print(dl, sym, start, evsel, len, min_pcnt, printed, - max_lines, queue); -} - /* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following From 3ab6db8d0f3b19b93a8de25e3b7ab5fdaac47679 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:48 +0200 Subject: [PATCH 26/83] perf annotate browser: Use samples data from struct annotation_line We now carry the data in 'struct annotation_line', so using it instead of samples from 'struct browser_disasm_line' and removing it and its setup. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-26-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 57 +++++++++++-------------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 613682432940b..5d99429a03bc9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -29,11 +29,6 @@ struct browser_disasm_line { u32 idx; int idx_asm; int jump_sources; - /* - * actual length of this array is saved on the nr_events field - * of the struct annotate_browser - */ - struct disasm_line_samples samples[1]; }; static struct annotate_browser_opt { @@ -76,9 +71,7 @@ struct annotate_browser { static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) { - struct annotation_line *al = &dl->al; - - return (void *) al - al->privsize; + return (void *) dl - sizeof(struct browser_disasm_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -139,8 +132,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bool show_title = false; for (i = 0; i < ab->nr_events; i++) { - if (bdl->samples[i].percent > percent_max) - percent_max = bdl->samples[i].percent; + if (dl->al.samples[i].percent > percent_max) + percent_max = dl->al.samples[i].percent; } if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { @@ -154,17 +147,17 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (dl->al.offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, - bdl->samples[i].percent, + dl->al.samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", - bdl->samples[i].he.period); + dl->al.samples[i].he.period); } else if (annotate_browser__opts.show_nr_samples) { ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].he.nr_samples); + dl->al.samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", - bdl->samples[i].percent); + dl->al.samples[i].percent); } } } else { @@ -363,11 +356,9 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct disasm_line *da, - struct disasm_line *db, int nr_pcnt) +static int disasm__cmp(struct annotation_line *a, + struct annotation_line *b, int nr_pcnt) { - struct browser_disasm_line *a = disasm_line__browser(da); - struct browser_disasm_line *b = disasm_line__browser(db); int i; for (i = 0; i < nr_pcnt; i++) { @@ -378,24 +369,24 @@ static int disasm__cmp(struct disasm_line *da, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct disasm_line *dl, +static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al, int nr_events) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; - struct disasm_line *l; + struct annotation_line *l; while (*p != NULL) { parent = *p; - l = rb_entry(parent, struct disasm_line, al.rb_node); + l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(dl, l, nr_events)) + if (disasm__cmp(al, l, nr_events)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&dl->al.rb_node, parent, p); - rb_insert_color(&dl->al.rb_node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void annotate_browser__set_top(struct annotate_browser *browser, @@ -453,7 +444,6 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, symbol__calc_percent(sym, evsel); list_for_each_entry(pos, ¬es->src->source, al.node) { - struct browser_disasm_line *bpos = disasm_line__browser(pos); double max_percent = 0.0; int i; @@ -465,18 +455,15 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, for (i = 0; i < browser->nr_events; i++) { struct annotation_data *sample = &pos->al.samples[i]; - bpos->samples[i].percent = sample->percent; - bpos->samples[i].he = sample->he; - - if (max_percent < bpos->samples[i].percent) - max_percent = bpos->samples[i].percent; + if (max_percent < sample->percent) + max_percent = sample->percent; } if (max_percent < 0.01 && pos->al.ipc == 0) { RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, pos, + disasm_rb_tree__insert(&browser->entries, &pos->al, browser->nr_events); } pthread_mutex_unlock(¬es->lock); @@ -1096,7 +1083,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, }; int ret = -1, err; int nr_pcnt = 1; - size_t sizeof_bdl = sizeof(struct browser_disasm_line); if (sym == NULL) return -1; @@ -1112,14 +1098,11 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, return -1; } - if (perf_evsel__is_group_event(evsel)) { + if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - sizeof_bdl += sizeof(struct disasm_line_samples) * - (nr_pcnt - 1); - } err = symbol__annotate(sym, map, evsel, - sizeof_bdl, &browser.arch, + sizeof(struct browser_disasm_line), &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; From b15636c62f3a32a8560ea6a32245ec49edd60c6b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:49 +0200 Subject: [PATCH 27/83] perf annotate browser: Do not pass nr_events in disasm_rb_tree__insert We now keep samples_nr in struct annotation_line, so there's no need to pass nr_events to disasm_rb_tree__insert function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-27-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5d99429a03bc9..67e5955b3c6fc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -356,12 +356,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct annotation_line *a, - struct annotation_line *b, int nr_pcnt) +static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent == b->samples[i].percent) continue; return a->samples[i].percent < b->samples[i].percent; @@ -369,8 +368,7 @@ static int disasm__cmp(struct annotation_line *a, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al, - int nr_events) +static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -380,7 +378,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, nr_events)) + if (disasm__cmp(al, l)) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -452,7 +450,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - for (i = 0; i < browser->nr_events; i++) { + for (i = 0; i < pos->al.samples_nr; i++) { struct annotation_data *sample = &pos->al.samples[i]; if (max_percent < sample->percent) @@ -463,8 +461,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, &pos->al, - browser->nr_events); + disasm_rb_tree__insert(&browser->entries, &pos->al); } pthread_mutex_unlock(¬es->lock); From 0d9579701fee0a482185ab4e8ee7f5ae86f8ae19 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:55:36 +0100 Subject: [PATCH 28/83] perf annotate browser: Rename struct browser_disasm_line to browser_line Rename struct browser_disasm_line to browser_line, because the browser operates now on generic lines and no longer on disasm lines. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105536.GA20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 67e5955b3c6fc..5ed6c158af403 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -25,10 +25,10 @@ struct disasm_line_samples { #define IPC_WIDTH 6 #define CYCLES_WIDTH 6 -struct browser_disasm_line { - u32 idx; - int idx_asm; - int jump_sources; +struct browser_line { + u32 idx; + int idx_asm; + int jump_sources; }; static struct annotate_browser_opt { @@ -69,9 +69,9 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) +static inline struct browser_line *disasm_line__browser(struct disasm_line *dl) { - return (void *) dl - sizeof(struct browser_disasm_line); + return (void *) dl - sizeof(struct browser_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -119,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_disasm_line *bdl = disasm_line__browser(dl); + struct browser_line *bdl = disasm_line__browser(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -302,7 +302,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = ab->selection, *target; - struct browser_disasm_line *btarget, *bcursor; + struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; @@ -413,7 +413,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct browser_disasm_line *bpos; + struct browser_line *bpos; struct disasm_line *pos; u32 idx; @@ -471,7 +471,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { struct disasm_line *dl; - struct browser_disasm_line *bdl; + struct browser_line *bdl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); @@ -1027,7 +1027,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser for (offset = 0; offset < size; ++offset) { struct disasm_line *dl = browser->offsets[offset], *dlt; - struct browser_disasm_line *bdlt; + struct browser_line *bdlt; if (!disasm_line__is_valid_jump(dl, sym)) continue; @@ -1099,7 +1099,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, nr_pcnt = evsel->nr_members; err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_disasm_line), &browser.arch, + sizeof(struct browser_line), &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; @@ -1114,7 +1114,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, browser.start = map__rip_2objdump(map, sym->start); list_for_each_entry(pos, ¬es->src->source, al.node) { - struct browser_disasm_line *bpos; + struct browser_line *bpos; size_t line_len = strlen(pos->al.line); if (browser.b.width < line_len) From daf25d4303cbf1795535b6c0b7172ba6f12aa2bd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:55:52 +0100 Subject: [PATCH 29/83] perf annotate browser: Rename disasm_line__browser to browser_line Rename disasm_line__browser function to browser_line, because the browser got generic and is no longer disasm specific. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105552.GB20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5ed6c158af403..3691dc8cef4c4 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -69,7 +69,7 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_line *disasm_line__browser(struct disasm_line *dl) +static inline struct browser_line *browser_line(struct disasm_line *dl) { return (void *) dl - sizeof(struct browser_line); } @@ -119,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = disasm_line__browser(dl); + struct browser_line *bdl = browser_line(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -319,8 +319,8 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!target) return; - bcursor = disasm_line__browser(cursor); - btarget = disasm_line__browser(target); + bcursor = browser_line(cursor); + btarget = browser_line(target); if (annotate_browser__opts.hide_src_code) { from = bcursor->idx_asm; @@ -418,7 +418,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, u32 idx; pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = disasm_line__browser(pos); + bpos = browser_line(pos); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -476,7 +476,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); dl = list_entry(browser->b.top, struct disasm_line, al.node); - bdl = disasm_line__browser(dl); + bdl = browser_line(dl); if (annotate_browser__opts.hide_src_code) { if (bdl->idx_asm < offset) @@ -1040,7 +1040,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser if (dlt == NULL) continue; - bdlt = disasm_line__browser(dlt); + bdlt = browser_line(dlt); if (++bdlt->jump_sources > browser->max_jump_sources) browser->max_jump_sources = bdlt->jump_sources; @@ -1119,7 +1119,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (browser.b.width < line_len) browser.b.width = line_len; - bpos = disasm_line__browser(pos); + bpos = browser_line(pos); bpos->idx = browser.nr_entries++; if (pos->al.offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; From 7bcbcd589b15eae849d45540832ba4f9530c778e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:56:17 +0100 Subject: [PATCH 30/83] perf annotate browser: Change selection to struct annotation_line Use struct annotation_line as a browser::selection. We want to be able to use the annotate_browser for all sorts of source data, so it needs to be able to work over the generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105617.GC20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 63 +++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3691dc8cef4c4..657811669a6cc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -47,26 +47,26 @@ static struct annotate_browser_opt { struct arch; struct annotate_browser { - struct ui_browser b; - struct rb_root entries; - struct rb_node *curr_hot; - struct disasm_line *selection; - struct disasm_line **offsets; - struct arch *arch; - int nr_events; - u64 start; - int nr_asm_entries; - int nr_entries; - int max_jump_sources; - int nr_jumps; - bool searching_backwards; - bool have_cycles; - u8 addr_width; - u8 jumps_width; - u8 target_width; - u8 min_addr_width; - u8 max_addr_width; - char search_bf[128]; + struct ui_browser b; + struct rb_root entries; + struct rb_node *curr_hot; + struct annotation_line *selection; + struct disasm_line **offsets; + struct arch *arch; + int nr_events; + u64 start; + int nr_asm_entries; + int nr_entries; + int max_jump_sources; + int nr_jumps; + bool searching_backwards; + bool have_cycles; + u8 addr_width; + u8 jumps_width; + u8 target_width; + u8 min_addr_width; + u8 max_addr_width; + char search_bf[128]; }; static inline struct browser_line *browser_line(struct disasm_line *dl) @@ -265,7 +265,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (current_entry) - ab->selection = dl; + ab->selection = &dl->al; } static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) @@ -301,7 +301,8 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *cursor = ab->selection, *target; + struct disasm_line *cursor = disasm_line(ab->selection); + struct disasm_line *target; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -526,7 +527,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, struct hist_browser_timer *hbt) { struct map_symbol *ms = browser->b.priv; - struct disasm_line *dl = browser->selection; + struct disasm_line *dl = disasm_line(browser->selection); struct annotation *notes; struct addr_map_symbol target = { .map = ms->map, @@ -584,7 +585,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows static bool annotate_browser__jump(struct annotate_browser *browser) { - struct disasm_line *dl = browser->selection; + struct disasm_line *dl = disasm_line(browser->selection); u64 offset; s64 idx; @@ -610,7 +611,7 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = browser->selection; + struct disasm_line *pos = disasm_line(browser->selection); *idx = browser->b.index; list_for_each_entry_continue(pos, ¬es->src->source, al.node) { @@ -649,7 +650,7 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = browser->selection; + struct disasm_line *pos = disasm_line(browser->selection); *idx = browser->b.index; list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { @@ -882,13 +883,16 @@ static int annotate_browser__run(struct annotate_browser *browser, continue; case K_ENTER: case K_RIGHT: + { + struct disasm_line *dl = disasm_line(browser->selection); + if (browser->selection == NULL) ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); - else if (browser->selection->al.offset == -1) + else if (browser->selection->offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); - else if (!browser->selection->ins.ops) + else if (!dl->ins.ops) goto show_sup_ins; - else if (ins__is_ret(&browser->selection->ins)) + else if (ins__is_ret(&dl->ins)) goto out; else if (!(annotate_browser__jump(browser) || annotate_browser__callq(browser, evsel, hbt))) { @@ -896,6 +900,7 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions."); } continue; + } case 't': if (annotate_browser__opts.show_total_period) { annotate_browser__opts.show_total_period = false; From e1b60b5bd3c7a3f215e79fa911122aba59b3d984 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:53 +0200 Subject: [PATCH 31/83] perf annotate browser: Change offsets to struct annotation_line Use struct annotation_line as a browser::offsets array entry. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-31-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 49 ++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 657811669a6cc..911f06ce0f1be 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -51,7 +51,7 @@ struct annotate_browser { struct rb_root entries; struct rb_node *curr_hot; struct annotation_line *selection; - struct disasm_line **offsets; + struct annotation_line **offsets; struct arch *arch; int nr_events; u64 start; @@ -303,6 +303,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = disasm_line(ab->selection); struct disasm_line *target; + struct annotation_line *al; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -316,10 +317,12 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!disasm_line__is_valid_jump(cursor, sym)) return; - target = ab->offsets[cursor->ops.target.offset]; - if (!target) + al = ab->offsets[cursor->ops.target.offset]; + if (!al) return; + target = disasm_line(al); + bcursor = browser_line(cursor); btarget = browser_line(target); @@ -978,10 +981,10 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, return; for (offset = start; offset <= end; offset++) { - struct disasm_line *dl = browser->offsets[offset]; + struct annotation_line *al = browser->offsets[offset]; - if (dl) - dl->al.ipc = ipc; + if (al) + al->ipc = ipc; } } } @@ -1006,13 +1009,13 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, ch = ¬es->src->cycles_hist[offset]; if (ch && ch->cycles) { - struct disasm_line *dl; + struct annotation_line *al; if (ch->have_start) count_and_fill(browser, ch->start, offset, ch); - dl = browser->offsets[offset]; - if (dl && ch->num_aggr) - dl->al.cycles = ch->cycles_aggr / ch->num_aggr; + al = browser->offsets[offset]; + if (al && ch->num_aggr) + al->cycles = ch->cycles_aggr / ch->num_aggr; browser->have_cycles = true; } } @@ -1031,13 +1034,18 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser return; for (offset = 0; offset < size; ++offset) { - struct disasm_line *dl = browser->offsets[offset], *dlt; + struct annotation_line *al = browser->offsets[offset]; + struct disasm_line *dl, *dlt; struct browser_line *bdlt; + dl = disasm_line(al); + if (!disasm_line__is_valid_jump(dl, sym)) continue; - dlt = browser->offsets[dl->ops.target.offset]; + al = browser->offsets[dl->ops.target.offset]; + dlt = disasm_line(al); + /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? @@ -1066,7 +1074,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - struct disasm_line *pos; + struct annotation_line *al; struct annotation *notes; size_t size; struct map_symbol ms = { @@ -1094,7 +1102,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - browser.offsets = zalloc(size * sizeof(struct disasm_line *)); + browser.offsets = zalloc(size * sizeof(struct annotation_line *)); if (browser.offsets == NULL) { ui__error("Not enough memory!"); return -1; @@ -1118,15 +1126,16 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); - list_for_each_entry(pos, ¬es->src->source, al.node) { + list_for_each_entry(al, ¬es->src->source, node) { + struct disasm_line *dl = disasm_line(al); struct browser_line *bpos; - size_t line_len = strlen(pos->al.line); + size_t line_len = strlen(al->line); if (browser.b.width < line_len) browser.b.width = line_len; - bpos = browser_line(pos); + bpos = browser_line(dl); bpos->idx = browser.nr_entries++; - if (pos->al.offset != -1) { + if (al->offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly @@ -1135,8 +1144,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, * * E.g. copy_user_generic_unrolled */ - if (pos->al.offset < (s64)size) - browser.offsets[pos->al.offset] = pos; + if (al->offset < (s64)size) + browser.offsets[al->offset] = al; } else bpos->idx_asm = -1; } From a5ef27020b4bc0785fabb2591eb670d3bc641257 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:54 +0200 Subject: [PATCH 32/83] perf annotate browser: Use struct annotation_line in browser_line Using struct annotation_line arg in browser_line function to make it generic. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-32-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 60 +++++++++++++++---------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 911f06ce0f1be..fb83deb8c9092 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -69,9 +69,12 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_line *browser_line(struct disasm_line *dl) +static inline struct browser_line *browser_line(struct annotation_line *al) { - return (void *) dl - sizeof(struct browser_line); + void *ptr = al; + + ptr = container_of(al, struct disasm_line, al); + return ptr - sizeof(struct browser_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -119,7 +122,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = browser_line(dl); + struct browser_line *bdl = browser_line(&dl->al); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -302,8 +305,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = disasm_line(ab->selection); - struct disasm_line *target; - struct annotation_line *al; + struct annotation_line *target; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -317,13 +319,9 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!disasm_line__is_valid_jump(cursor, sym)) return; - al = ab->offsets[cursor->ops.target.offset]; - if (!al) - return; - - target = disasm_line(al); + target = ab->offsets[cursor->ops.target.offset]; - bcursor = browser_line(cursor); + bcursor = browser_line(&cursor->al); btarget = browser_line(target); if (annotate_browser__opts.hide_src_code) { @@ -422,7 +420,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, u32 idx; pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = browser_line(pos); + bpos = browser_line(&pos->al); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -475,37 +473,37 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { struct disasm_line *dl; - struct browser_line *bdl; + struct browser_line *bl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); dl = list_entry(browser->b.top, struct disasm_line, al.node); - bdl = browser_line(dl); + bl = browser_line(&dl->al); if (annotate_browser__opts.hide_src_code) { - if (bdl->idx_asm < offset) - offset = bdl->idx; + if (bl->idx_asm < offset) + offset = bl->idx; browser->b.nr_entries = browser->nr_entries; annotate_browser__opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); - browser->b.top_idx = bdl->idx - offset; - browser->b.index = bdl->idx; + browser->b.top_idx = bl->idx - offset; + browser->b.index = bl->idx; } else { - if (bdl->idx_asm < 0) { + if (bl->idx_asm < 0) { ui_helpline__puts("Only available for assembly lines."); browser->b.seek(&browser->b, -offset, SEEK_CUR); return false; } - if (bdl->idx_asm < offset) - offset = bdl->idx_asm; + if (bl->idx_asm < offset) + offset = bl->idx_asm; browser->b.nr_entries = browser->nr_asm_entries; annotate_browser__opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); - browser->b.top_idx = bdl->idx_asm - offset; - browser->b.index = bdl->idx_asm; + browser->b.top_idx = bl->idx_asm - offset; + browser->b.index = bl->idx_asm; } return true; @@ -1035,8 +1033,8 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser for (offset = 0; offset < size; ++offset) { struct annotation_line *al = browser->offsets[offset]; - struct disasm_line *dl, *dlt; - struct browser_line *bdlt; + struct disasm_line *dl; + struct browser_line *blt; dl = disasm_line(al); @@ -1044,18 +1042,17 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser continue; al = browser->offsets[dl->ops.target.offset]; - dlt = disasm_line(al); /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? */ - if (dlt == NULL) + if (al == NULL) continue; - bdlt = browser_line(dlt); - if (++bdlt->jump_sources > browser->max_jump_sources) - browser->max_jump_sources = bdlt->jump_sources; + blt = browser_line(al); + if (++blt->jump_sources > browser->max_jump_sources) + browser->max_jump_sources = blt->jump_sources; ++browser->nr_jumps; } @@ -1127,13 +1124,12 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, browser.start = map__rip_2objdump(map, sym->start); list_for_each_entry(al, ¬es->src->source, node) { - struct disasm_line *dl = disasm_line(al); struct browser_line *bpos; size_t line_len = strlen(al->line); if (browser.b.width < line_len) browser.b.width = line_len; - bpos = browser_line(dl); + bpos = browser_line(al); bpos->idx = browser.nr_entries++; if (al->offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; From 9213afbdf9562cd108e7ed03bd960d8acdfb49c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:55 +0200 Subject: [PATCH 33/83] perf annotate browser: Use struct annotation_line in find functions Use struct annotation_line in find functions: annotate_browser__find_string annotate_browser__find_string_reverse Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-33-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index fb83deb8c9092..8f75e56aedc2c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -606,23 +606,23 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } static -struct disasm_line *annotate_browser__find_string(struct annotate_browser *browser, +struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser, char *s, s64 *idx) { struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = disasm_line(browser->selection); + struct annotation_line *al = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue(pos, ¬es->src->source, al.node) { - if (disasm_line__filter(&browser->b, &pos->al.node)) + list_for_each_entry_continue(al, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &al->node)) continue; ++*idx; - if (pos->al.line && strstr(pos->al.line, s) != NULL) - return pos; + if (al->line && strstr(al->line, s) != NULL) + return al; } return NULL; @@ -630,38 +630,38 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows static bool __annotate_browser__search(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; s64 idx; - dl = annotate_browser__find_string(browser, browser->search_bf, &idx); - if (dl == NULL) { + al = annotate_browser__find_string(browser, browser->search_bf, &idx); + if (al == NULL) { ui_helpline__puts("String not found!"); return false; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, disasm_line(al), idx); browser->searching_backwards = false; return true; } static -struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, +struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, char *s, s64 *idx) { struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = disasm_line(browser->selection); + struct annotation_line *al = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { - if (disasm_line__filter(&browser->b, &pos->al.node)) + list_for_each_entry_continue_reverse(al, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &al->node)) continue; --*idx; - if (pos->al.line && strstr(pos->al.line, s) != NULL) - return pos; + if (al->line && strstr(al->line, s) != NULL) + return al; } return NULL; @@ -669,16 +669,16 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse static bool __annotate_browser__search_reverse(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; s64 idx; - dl = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); - if (dl == NULL) { + al = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); + if (al == NULL) { ui_helpline__puts("String not found!"); return false; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, disasm_line(al), idx); browser->searching_backwards = true; return true; } From ec03a77d7d28a2c2de246f67322c5d916852dd9d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:56 +0200 Subject: [PATCH 34/83] perf annotate browser: Use struct annotation_line in browser top Use struct annotation_line in browser::b::top. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-34-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8f75e56aedc2c..f0f27cf9db85e 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -390,7 +390,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line } static void annotate_browser__set_top(struct annotate_browser *browser, - struct disasm_line *pos, u32 idx) + struct annotation_line *pos, u32 idx) { unsigned back; @@ -399,16 +399,16 @@ static void annotate_browser__set_top(struct annotate_browser *browser, browser->b.top_idx = browser->b.index = idx; while (browser->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->al.node.prev, struct disasm_line, al.node); + pos = list_entry(pos->node.prev, struct annotation_line, node); - if (disasm_line__filter(&browser->b, &pos->al.node)) + if (disasm_line__filter(&browser->b, &pos->node)) continue; --browser->b.top_idx; --back; } - browser->b.top = &pos->al; + browser->b.top = pos; browser->b.navkeypressed = true; } @@ -416,11 +416,11 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { struct browser_line *bpos; - struct disasm_line *pos; + struct annotation_line *pos; u32 idx; - pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = browser_line(&pos->al); + pos = rb_entry(nd, struct annotation_line, rb_node); + bpos = browser_line(pos); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -472,13 +472,13 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; struct browser_line *bl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); - dl = list_entry(browser->b.top, struct disasm_line, al.node); - bl = browser_line(&dl->al); + al = list_entry(browser->b.top, struct annotation_line, node); + bl = browser_line(al); if (annotate_browser__opts.hide_src_code) { if (bl->idx_asm < offset) @@ -600,7 +600,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) return true; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, &dl->al, idx); return true; } @@ -639,7 +639,7 @@ static bool __annotate_browser__search(struct annotate_browser *browser) return false; } - annotate_browser__set_top(browser, disasm_line(al), idx); + annotate_browser__set_top(browser, al, idx); browser->searching_backwards = false; return true; } @@ -678,7 +678,7 @@ static bool __annotate_browser__search_reverse(struct annotate_browser *browser) return false; } - annotate_browser__set_top(browser, disasm_line(al), idx); + annotate_browser__set_top(browser, al, idx); browser->searching_backwards = true; return true; } From a5433b3ec937765a1d7521bc6bb87f6e76497640 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:57 +0200 Subject: [PATCH 35/83] perf annotate browser: Add disasm_line__write function Factor disasm_line__write function from annotate_browser__write, which now keeps only generic display code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-35-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 98 +++++++++++++++++-------------- 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index f0f27cf9db85e..5a2f37a91feb2 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -118,11 +118,37 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab) return ab->have_cycles ? IPC_WIDTH + CYCLES_WIDTH : 0; } +static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser, + char *bf, size_t size) +{ + if (dl->ins.ops && dl->ins.ops->scnprintf) { + if (ins__is_jump(&dl->ins)) { + bool fwd = dl->ops.target.offset > dl->al.offset; + + ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : + SLSMG_UARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_call(&dl->ins)) { + ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_ret(&dl->ins)) { + ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); + SLsmg_write_char(' '); + } else { + ui_browser__write_nstring(browser, " ", 2); + } + } else { + ui_browser__write_nstring(browser, " ", 2); + } + + disasm_line__scnprintf(dl, bf, size, !annotate_browser__opts.use_offset); +} + static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = browser_line(&dl->al); + struct annotation_line *al = list_entry(entry, struct annotation_line, node); + struct browser_line *bl = browser_line(al); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -135,32 +161,32 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bool show_title = false; for (i = 0; i < ab->nr_events; i++) { - if (dl->al.samples[i].percent > percent_max) - percent_max = dl->al.samples[i].percent; + if (al->samples[i].percent > percent_max) + percent_max = al->samples[i].percent; } - if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { + if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { - if (dl->al.ipc == 0.0 && dl->al.cycles == 0) + if (al->ipc == 0.0 && al->cycles == 0) show_title = true; } else show_title = true; } - if (dl->al.offset != -1 && percent_max != 0.0) { + if (al->offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, - dl->al.samples[i].percent, + al->samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", - dl->al.samples[i].he.period); + al->samples[i].he.period); } else if (annotate_browser__opts.show_nr_samples) { ui_browser__printf(browser, "%6" PRIu64 " ", - dl->al.samples[i].he.nr_samples); + al->samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", - dl->al.samples[i].percent); + al->samples[i].percent); } } } else { @@ -175,16 +201,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } if (ab->have_cycles) { - if (dl->al.ipc) - ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->al.ipc); + if (al->ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, al->ipc); else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); else ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); - if (dl->al.cycles) + if (al->cycles) ui_browser__printf(browser, "%*" PRIu64 " ", - CYCLES_WIDTH - 1, dl->al.cycles); + CYCLES_WIDTH - 1, al->cycles); else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); else @@ -197,19 +223,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) width += 1; - if (!*dl->al.line) + if (!*al->line) ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); - else if (dl->al.offset == -1) { - if (dl->al.line_nr && annotate_browser__opts.show_linenr) + else if (al->offset == -1) { + if (al->line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", - ab->addr_width + 1, dl->al.line_nr); + ab->addr_width + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->al.line, width - printed - pcnt_width - cycles_width + 1); + ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1); } else { - u64 addr = dl->al.offset; + u64 addr = al->offset; int color = -1; if (!annotate_browser__opts.use_offset) @@ -218,13 +244,13 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!annotate_browser__opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (bdl->jump_sources) { + if (bl->jump_sources) { if (annotate_browser__opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", ab->jumps_width, - bdl->jump_sources); - prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, + bl->jump_sources); + prev = annotate_browser__set_jumps_percent_color(ab, bl->jump_sources, current_entry); ui_browser__write_nstring(browser, bf, printed); ui_browser__set_color(browser, prev); @@ -243,32 +269,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_nstring(browser, bf, printed); if (change_color) ui_browser__set_color(browser, color); - if (dl->ins.ops && dl->ins.ops->scnprintf) { - if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > dl->al.offset; - - ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : - SLSMG_UARROW_CHAR); - SLsmg_write_char(' '); - } else if (ins__is_call(&dl->ins)) { - ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); - SLsmg_write_char(' '); - } else if (ins__is_ret(&dl->ins)) { - ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); - SLsmg_write_char(' '); - } else { - ui_browser__write_nstring(browser, " ", 2); - } - } else { - ui_browser__write_nstring(browser, " ", 2); - } - disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); + disasm_line__write(disasm_line(al), browser, bf, sizeof(bf)); + ui_browser__write_nstring(browser, bf, width - pcnt_width - cycles_width - 3 - printed); } if (current_entry) - ab->selection = &dl->al; + ab->selection = al; } static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) From f48e7c407050e5f5f53a0fa9a266d83b001dd356 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:58 +0200 Subject: [PATCH 36/83] perf annotate: Align source and offset lines Align source with offset lines, which are more advanced, because of the address column. Before: : static void *worker_thread(void *__tdata) : { 0.00 : 48a971: push %rbp 0.00 : 48a972: mov %rsp,%rbp 0.00 : 48a975: sub $0x30,%rsp 0.00 : 48a979: mov %rdi,-0x28(%rbp) 0.00 : 48a97d: mov %fs:0x28,%rax 0.00 : 48a986: mov %rax,-0x8(%rbp) 0.00 : 48a98a: xor %eax,%eax : struct thread_data *td = __tdata; 0.00 : 48a98c: mov -0x28(%rbp),%rax 0.00 : 48a990: mov %rax,-0x10(%rbp) : int m = 0, i; 0.00 : 48a994: movl $0x0,-0x1c(%rbp) : int ret; : : for (i = 0; i < loops; i++) { 0.00 : 48a99b: movl $0x0,-0x18(%rbp) After: : static void *worker_thread(void *__tdata) : { 0.00 : 48a971: push %rbp 0.00 : 48a972: mov %rsp,%rbp 0.00 : 48a975: sub $0x30,%rsp 0.00 : 48a979: mov %rdi,-0x28(%rbp) 0.00 : 48a97d: mov %fs:0x28,%rax 0.00 : 48a986: mov %rax,-0x8(%rbp) 0.00 : 48a98a: xor %eax,%eax : struct thread_data *td = __tdata; 0.00 : 48a98c: mov -0x28(%rbp),%rax 0.00 : 48a990: mov %rax,-0x10(%rbp) : int m = 0, i; 0.00 : 48a994: movl $0x0,-0x1c(%rbp) : int ret; : : for (i = 0; i < loops; i++) { 0.00 : 48a99b: movl $0x0,-0x18(%rbp) It makes bigger different when displaying script sources, where the comment lines looks oddly shifted from the lines which actually hold code. I'll send script support separately. Committer note: Do not use a fixed column width for the addresses, as kernel ones se more than 10 columns, look at the last offset and get the right width. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index cb065ca431ee5..eab4a8e3c6799 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1092,15 +1092,14 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } } - -static int disasm_line__print(struct disasm_line *dl, u64 start) +static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width) { s64 offset = dl->al.offset; const u64 addr = start + offset; struct block_range *br; br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr); color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); return 0; @@ -1109,7 +1108,7 @@ static int disasm_line__print(struct disasm_line *dl, u64 start) static int annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue) + int max_lines, struct annotation_line *queue, int addr_fmt_width) { struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; @@ -1139,7 +1138,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue == al) break; annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + 0, 0, 1, NULL, addr_fmt_width); } } @@ -1174,9 +1173,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start color_fprintf(stdout, color, " %7.2f", sample->percent); } - printf(" : "); + printf(" : "); - disasm_line__print(dl, start); + disasm_line__print(dl, start, addr_fmt_width); printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1192,7 +1191,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", al->line); + printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); } return 0; @@ -1796,6 +1795,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } +static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) +{ + char bf[32]; + struct annotation_line *line; + + list_for_each_entry_reverse(line, lines, node) { + if (line->offset != -1) + return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset); + } + + return 0; +} + int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context) @@ -1808,7 +1820,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0; + int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; @@ -1839,6 +1851,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); + addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + list_for_each_entry(pos, ¬es->src->source, node) { int err; @@ -1849,7 +1863,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, err = annotation_line__print(pos, sym, start, evsel, len, min_pcnt, printed, max_lines, - queue); + queue, addr_fmt_width); switch (err) { case 0: From 1873b710ace80b9437227a0f8f1a5dab18f49ec1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Nov 2017 13:41:35 -0300 Subject: [PATCH 37/83] tools headers: Synchronize kernel ABI headers wrt SPDX tags Two more, that were just in perf/core and thus weren't covered by Ingo's latest headers synch, kcmp.h and prctl.h, silencing this: Warning: Kernel ABI header at 'tools/include/uapi/linux/kcmp.h' differs from latest version at 'include/uapi/linux/kcmp.h' Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2a0r7iybyqpkftllyy5t9hfk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kcmp.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h index 481e103da78ed..ef13050109253 100644 --- a/tools/include/uapi/linux/kcmp.h +++ b/tools/include/uapi/linux/kcmp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_KCMP_H #define _UAPI_LINUX_KCMP_H diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index a8d0759a9e400..b640071421f70 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _LINUX_PRCTL_H #define _LINUX_PRCTL_H From c2f1cead19b628d7a23d2cfc43e444af669f9eab Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Oct 2017 13:27:55 -0700 Subject: [PATCH 38/83] perf record: Fix -c/-F options for cpu event aliases The Intel PMU event aliases have a implicit period= specifier to set the default period. Unfortunately this breaks overriding these periods with -c or -F, because the alias terms look like they are user specified to the internal parser, and user specified event qualifiers override the command line options. Track that they are coming from aliases by adding a "weak" state to the term. Any weak terms don't override command line options. I only did it for -c/-F for now, I think that's the only case that's broken currently. Before: $ perf record -c 1000 -vv -e uops_issued.any ... { sample_period, sample_freq } 2000003 After: $ perf record -c 1000 -vv -e uops_issued.any ... { sample_period, sample_freq } 1000 Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171020202755.21410-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 12 ++++++++---- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 2 ++ tools/perf/util/parse-events.h | 3 +++ tools/perf/util/pmu.c | 5 +++++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f894893c203d1..bfde6a7a80f28 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -733,12 +733,16 @@ static void apply_config_terms(struct perf_evsel *evsel, list_for_each_entry(term, config_terms, list) { switch (term->type) { case PERF_EVSEL__CONFIG_TERM_PERIOD: - attr->sample_period = term->val.period; - attr->freq = 0; + if (!(term->weak && opts->user_interval != ULLONG_MAX)) { + attr->sample_period = term->val.period; + attr->freq = 0; + } break; case PERF_EVSEL__CONFIG_TERM_FREQ: - attr->sample_freq = term->val.freq; - attr->freq = 1; + if (!(term->weak && opts->user_freq != UINT_MAX)) { + attr->sample_freq = term->val.freq; + attr->freq = 1; + } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9277df96ffdad..157f49e8a772d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -67,6 +67,7 @@ struct perf_evsel_config_term { bool overwrite; char *branch; } val; + bool weak; }; struct perf_stat_evsel; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7fcd95961ef0..170316795a184 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1116,6 +1116,7 @@ do { \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ __t->val.__name = __val; \ + __t->weak = term->weak; \ list_add_tail(&__t->list, head_terms); \ } while (0) @@ -2410,6 +2411,7 @@ static int new_term(struct parse_events_term **_term, *term = *temp; INIT_LIST_HEAD(&term->list); + term->weak = false; switch (term->type_val) { case PARSE_EVENTS__TERM_TYPE_NUM: diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index be337c266697a..88108cd11b4c8 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -101,6 +101,9 @@ struct parse_events_term { /* error string indexes for within parsed string */ int err_term; int err_val; + + /* Coming from implicit alias */ + bool weak; }; struct parse_events_error { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 07cb2ac041d7a..80fb1593913a9 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -405,6 +405,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, parse_events_terms__purge(&list); return ret; } + /* + * Weak terms don't override command line options, + * which we don't want for implicit terms in aliases. + */ + cloned->weak = true; list_add_tail(&cloned->list, &list); } list_splice(&list, terms); From d0565132605f454179699a1b8a3276fc0f8cc87b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Oct 2017 13:27:54 -0700 Subject: [PATCH 39/83] perf evsel: Enable type checking for perf_evsel_config_term types Use a typed enum for the perf_evsel_config_term type enum. This allows gcc to do much stronger type checks, and also check for missing case statements. I removed the unused _MAX member from the number. It found one missing case. I'm not sure it's a real problem, so I just turned it into a BUG_ON for now. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171020202755.21410-1-andi@firstfloor.org [ Renamed the enum name to term_type as per jolsa's request ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 ++ tools/perf/util/evsel.h | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bfde6a7a80f28..4376cdfaea499 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -779,6 +779,8 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + BUG_ON(1); default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 157f49e8a772d..0688880227e18 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -38,7 +38,7 @@ struct cgroup_sel; * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ -enum { +enum term_type { PERF_EVSEL__CONFIG_TERM_PERIOD, PERF_EVSEL__CONFIG_TERM_FREQ, PERF_EVSEL__CONFIG_TERM_TIME, @@ -49,12 +49,11 @@ enum { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_MAX, }; struct perf_evsel_config_term { struct list_head list; - int type; + enum term_type type; union { u64 period; u64 freq; From cbd5c1787bab4643e5959522275b46de94eba5ac Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Nov 2017 16:22:46 -0800 Subject: [PATCH 40/83] perf trace: Fix an exit code of trace__symbols_init Currently if trace_event__register_resolver() fails, we return -errno, but we can't be sure that errno isn't zero in this case. Signed-off-by: Andrei Vagin Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Link: http://lkml.kernel.org/r/20171108002246.8924-2-avagin@openvz.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f2757d38c7d70..84debdbad3271 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1152,12 +1152,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; - if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0) - return -errno; + err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); + if (err < 0) + goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, trace->opts.proc_map_timeout, 1); +out: if (err) symbol__exit(); From 86f5fe01cf3ad42e99e7655dec93e0e36ef65614 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Nov 2017 11:27:37 +0100 Subject: [PATCH 41/83] perf tools: Use shell function for perl cflags retrieval Using the shell function for perl CFLAGS retrieval instead of back quotes (``). Both execute shell with the command, but the latter is more explicit and seems to be the preferred way. Also we don't have any other use of the back quotes in perf Makefiles. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171108102739.30338-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63f534a0902f2..f6786fa2419fe 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -579,7 +579,7 @@ else PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) - PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) From 82806c3aae7496d6974429f3bd43ebeeef18b9b2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 12:03:40 -0300 Subject: [PATCH 42/83] perf evsel: Fix up leftover perf_evsel_stat usage via evsel->priv I forgot one conversion, which got noticed by Thomas when running: $ perf stat -e '{cpu-clock,instructions}' kill kill: not enough arguments Segmentation fault (core dumped) $ Fix it, those stats are in evsel->stats, not anymore in evsel->priv. Reported-by: Thomas-Mich Richter Tested-by: Thomas-Mich Richter Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: e669e833da8d ("perf evsel: Restore evsel->priv as a tool private area") Link: http://lkml.kernel.org/r/20171109150046.GN4333@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4376cdfaea499..cb9bcdb065ea8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1377,7 +1377,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader, static int perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread) { - struct perf_stat_evsel *ps = leader->priv; + struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->attr.read_format; int size = perf_evsel__read_size(leader); u64 *data = ps->group_data; From 60dbcd2532dd7eec2f1e23a37b80ff85d8fb2953 Mon Sep 17 00:00:00 2001 From: Seonghyun Park Date: Thu, 9 Nov 2017 23:07:04 +0900 Subject: [PATCH 43/83] perf tests: Add missing WRITE_ASS for new fields of perf_event_attr Include newly added fields 'mmap2', 'comm_exec', 'use_clockid', 'namespaces', 'write_backward' and 'context_switch' from perf_event_attr to store_event(). Signed-off-by: Seonghyun Park Cc: Jiri Olsa Cc: Namhyung Kim Cc: Seonghyun Park Link: http://lkml.kernel.org/n/tip-vltn7pqhcv8h5fmo9cthk87q@git.kernel.org [ Fix log message to add 'write_backward', fix the patch to add 'use_clock_id' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0e1367f90af53..97f64ad7fa08e 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -124,6 +124,12 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, WRITE_ASS(exclude_guest, "d"); WRITE_ASS(exclude_callchain_kernel, "d"); WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); WRITE_ASS(wakeup_events, PRIu32); WRITE_ASS(bp_type, PRIu32); WRITE_ASS(config1, "llu"); From fa48c892645dfd3159e5aa6eb9cefd00d5cb347a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 16:04:26 -0300 Subject: [PATCH 44/83] perf script: Fix --per-event-dump for auxtrace synth evsels When processing PERF_RECORD_AUXTRACE_INFO several perf_evsel entries will be synthesized and inserted into session->evlist, eventually ending in perf_script.tool.sample(), which ends up calling builtin-script.c's process_event(), that expects evsel->priv to be a perf_evsel_script object with a valid FILE pointer in fp. So we need to intercept the processing of PERF_RECORD_AUXTRACE_INFO and then setup evsel->priv for these newly created perf_evsel instances, do it to fix the segfault in process_event() trying to use a NULL for that FILE pointer. Reported-by: Alexander Shishkin Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Wang Nan Cc: yuzhoujian Fixes: a14390fde64e ("perf script: Allow creating per-event dump files") Link: http://lkml.kernel.org/n/tip-bthnur8r8de01gxvn2qayx6e@git.kernel.org [ Merge fix by Ravi Bangoria before pushing upstream to preserv bisectability ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 68f36dc0344f2..9b43bda45a415 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) struct perf_evsel *evsel; evlist__for_each_entry(script->session->evlist, evsel) { + /* + * Already setup? I.e. we may be called twice in cases like + * Intel PT, one for the intel_pt// and dummy events, then + * for the evsels syntheized from the auxtrace info. + * + * Ses perf_script__process_auxtrace_info. + */ + if (evsel->priv != NULL) + continue; + evsel->priv = perf_evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; @@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +#ifdef HAVE_AUXTRACE_SUPPORT +static int perf_script__process_auxtrace_info(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + int ret = perf_event__process_auxtrace_info(tool, event, session); + + if (ret == 0) { + struct perf_script *script = container_of(tool, struct perf_script, tool); + + ret = perf_script__setup_per_event_dump(script); + } + + return ret; +} +#else +#define perf_script__process_auxtrace_info 0 +#endif + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv) .feature = perf_event__process_feature, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, - .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace_info = perf_script__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, .auxtrace_error = perf_event__process_auxtrace_error, .stat = perf_event__process_stat_event, From e795dd42b716ff36ebaa5384fd1be8458d6c9c34 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 8 Nov 2017 18:42:03 -0500 Subject: [PATCH 45/83] perf vendor events powerpc: Update POWER9 events The POWER9 hardware has dropped support for several events, added a few new events and changed the category for a couple of events. Update the POWER9 events in Linux to reflect these changes. Signed-off-by: Sukadev Bhattiprolu Cc: Jiri Olsa Cc: Michael Ellerman Cc: Madhavan Srinivasan Link: http://lkml.kernel.org/r/20171108201938.GA10985@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/cache.json | 5 - .../arch/powerpc/power9/frontend.json | 7 +- .../arch/powerpc/power9/marked.json | 27 +- .../pmu-events/arch/powerpc/power9/other.json | 276 +++++------------- .../arch/powerpc/power9/pipeline.json | 14 +- .../pmu-events/arch/powerpc/power9/pmc.json | 2 +- .../arch/powerpc/power9/translation.json | 5 - 7 files changed, 88 insertions(+), 248 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json index 18f6645f28975..7945c5196c439 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -124,11 +124,6 @@ "EventName": "PM_CMPLU_STALL_LARX", "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied" }, - {, - "EventCode": "0x3006C", - "EventName": "PM_RUN_CYC_SMT2_MODE", - "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode" - }, {, "EventCode": "0x1C058", "EventName": "PM_DTLB_MISS_16G", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index c63a919eda981..bd8361b5fd6a6 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -1,9 +1,4 @@ [ - {, - "EventCode": "0x3E15C", - "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER", - "BriefDescription": "TM marked store abort for this thread" - }, {, "EventCode": "0x25044", "EventName": "PM_IPTEG_FROM_L31_MOD", @@ -369,4 +364,4 @@ "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json index b9df54fb37e36..22f9f32060a87 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -1,9 +1,4 @@ [ - {, - "EventCode": "0x3C052", - "EventName": "PM_DATA_SYS_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" - }, {, "EventCode": "0x3013E", "EventName": "PM_MRK_STALL_CMPLU_CYC", @@ -254,6 +249,11 @@ "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3", "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache" }, + {, + "EventCode": "0x3C052", + "EventName": "PM_DATA_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" + }, {, "EventCode": "0x4D142", "EventName": "PM_MRK_DATA_FROM_L3", @@ -434,21 +434,6 @@ "EventName": "PM_ITLB_MISS", "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed" }, - {, - "EventCode": "0x2D024", - "EventName": "PM_RADIX_PWC_L2_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache." - }, - {, - "EventCode": "0x3F056", - "EventName": "PM_RADIX_PWC_L3_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache." - }, - {, - "EventCode": "0x4E014", - "EventName": "PM_TM_TX_PASS_RUN_INST", - "BriefDescription": "Run instructions spent in successful transactions" - }, {, "EventCode": "0x1E044", "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT", @@ -644,4 +629,4 @@ "EventName": "PM_MRK_BR_MPRED_CMPL", "BriefDescription": "Marked Branch Mispredicted" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 54cc3be00fc2d..5ce312973f1ec 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -79,6 +79,11 @@ "EventName": "PM_RADIX_PWC_MISS", "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache." }, + {, + "EventCode": "0x26882", + "EventName": "PM_L2_DC_INV", + "BriefDescription": "D-cache invalidates sent over the reload bus to the core" + }, {, "EventCode": "0x24048", "EventName": "PM_INST_FROM_LMEM", @@ -94,11 +99,6 @@ "EventName": "PM_TM_PASSED", "BriefDescription": "Number of TM transactions that passed" }, - {, - "EventCode": "0xD1A0", - "EventName": "PM_MRK_LSU_FLUSH_LHS", - "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" - }, {, "EventCode": "0xF088", "EventName": "PM_LSU0_STORE_REJECT", @@ -127,7 +127,7 @@ {, "EventCode": "0xD08C", "EventName": "PM_LSU2_LDMX_FIN", - "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])" + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])." }, {, "EventCode": "0x300F8", @@ -204,11 +204,6 @@ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC", "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" }, - {, - "EventCode": "0xF0B4", - "EventName": "PM_DC_PREF_CONS_ALLOC", - "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch" - }, {, "EventCode": "0xF894", "EventName": "PM_LSU3_L1_CAM_CANCEL", @@ -219,21 +214,11 @@ "EventName": "PM_FLUSH_DISP_TLBIE", "BriefDescription": "Dispatch Flush: TLBIE" }, - {, - "EventCode": "0xD1A4", - "EventName": "PM_MRK_LSU_FLUSH_SAO", - "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush" - }, {, "EventCode": "0x4E11E", "EventName": "PM_MRK_DATA_FROM_DMEM_CYC", "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load" }, - {, - "EventCode": "0x5894", - "EventName": "PM_LWSYNC", - "BriefDescription": "Lwsync instruction decoded and transferred" - }, {, "EventCode": "0x14156", "EventName": "PM_MRK_DATA_FROM_L2_CYC", @@ -244,11 +229,6 @@ "EventName": "PM_RD_CLEARING_SC", "BriefDescription": "Read clearing SC" }, - {, - "EventCode": "0x50A0", - "EventName": "PM_HWSYNC", - "BriefDescription": "Hwsync instruction decoded and transferred" - }, {, "EventCode": "0x168B0", "EventName": "PM_L3_P1_NODE_PUMP", @@ -264,6 +244,11 @@ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load" }, + {, + "EventCode": "0x468AE", + "EventName": "PM_L3_P3_CO_RTY", + "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted" + }, {, "EventCode": "0x460A8", "EventName": "PM_SN_HIT", @@ -279,11 +264,6 @@ "EventName": "PM_DC_PREF_HW_ALLOC", "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism" }, - {, - "EventCode": "0xF0BC", - "EventName": "PM_LS2_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0xD0AC", "EventName": "PM_SRQ_SYNC_CYC", @@ -379,26 +359,11 @@ "EventName": "PM_RUN_CYC_SMT4_MODE", "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode" }, - {, - "EventCode": "0x5088", - "EventName": "PM_DECODE_FUSION_OP_PRESERV", - "BriefDescription": "Destructive op operand preservation" - }, {, "EventCode": "0x1D14E", "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" }, - {, - "EventCode": "0x509C", - "EventName": "PM_FORCED_NOP", - "BriefDescription": "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time" - }, - {, - "EventCode": "0xC098", - "EventName": "PM_LS2_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x20058", "EventName": "PM_DARQ1_10_12_ENTRIES", @@ -434,11 +399,6 @@ "EventName": "PM_LSU1_STORE_REJECT", "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" }, - {, - "EventCode": "0x4505E", - "EventName": "PM_FLOP_CMPL", - "BriefDescription": "Floating Point Operation Finished" - }, {, "EventCode": "0x1D144", "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", @@ -480,14 +440,9 @@ "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued" }, {, - "EventCode": "0xC094", - "EventName": "PM_LS0_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, - {, - "EventCode": "0xF8BC", - "EventName": "PM_LS3_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + "EventCode": "0x460AE", + "EventName": "PM_L3_P2_CO_RTY", + "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted" }, {, "EventCode": "0x58B0", @@ -504,11 +459,6 @@ "EventName": "PM_TM_ST_CONF", "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)" }, - {, - "EventCode": "0xD998", - "EventName": "PM_MRK_LSU_FLUSH_EMSH", - "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" - }, {, "EventCode": "0xF8A0", "EventName": "PM_NON_DATA_STORE", @@ -524,11 +474,6 @@ "EventName": "PM_BR_UNCOND", "BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve." }, - {, - "EventCode": "0x1F056", - "EventName": "PM_RADIX_PWC_L1_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit." - }, {, "EventCode": "0xF8A8", "EventName": "PM_DC_PREF_FUZZY_CONF", @@ -544,6 +489,11 @@ "EventName": "PM_LSU2_TM_L1_MISS", "BriefDescription": "Load tm L1 miss" }, + {, + "EventCode": "0xC880", + "EventName": "PM_LS1_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x2894", "EventName": "PM_TM_OUTER_TEND", @@ -564,21 +514,11 @@ "EventName": "PM_MRK_LSU_DERAT_MISS", "BriefDescription": "Marked derat reload (miss) for any page size" }, - {, - "EventCode": "0x160A0", - "EventName": "PM_L3_PF_MISS_L3", - "BriefDescription": "L3 PF missed in L3" - }, {, "EventCode": "0x1C04A", "EventName": "PM_DATA_FROM_RL2L3_SHR", "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" }, - {, - "EventCode": "0xD99C", - "EventName": "PM_MRK_LSU_FLUSH_UE", - "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" - }, {, "EventCode": "0x268B0", "EventName": "PM_L3_P1_GRP_PUMP", @@ -629,11 +569,6 @@ "EventName": "PM_TMA_REQ_L2", "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding" }, - {, - "EventCode": "0x5884", - "EventName": "PM_DECODE_LANES_NOT_AVAIL", - "BriefDescription": "Decode has something to transmit but dispatch lanes are not available" - }, {, "EventCode": "0x3C042", "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", @@ -690,9 +625,9 @@ "BriefDescription": "False LHS match detected" }, {, - "EventCode": "0xD9A4", - "EventName": "PM_MRK_LSU_FLUSH_LARX_STCX", - "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches" + "EventCode": "0xF0B0", + "EventName": "PM_L3_LD_PREF", + "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" }, {, "EventCode": "0x4D012", @@ -715,9 +650,9 @@ "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" }, {, - "EventCode": "0xF8B8", - "EventName": "PM_LS1_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + "EventCode": "0x160A0", + "EventName": "PM_L3_PF_MISS_L3", + "BriefDescription": "L3 PF missed in L3" }, {, "EventCode": "0x408C", @@ -764,11 +699,6 @@ "EventName": "PM_TM_NESTED_TEND", "BriefDescription": "Completion time nested tend" }, - {, - "EventCode": "0x36084", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "All D-side store dispatch attempts for this thread" - }, {, "EventCode": "0x368A0", "EventName": "PM_L3_PF_OFF_CHIP_CACHE", @@ -829,11 +759,6 @@ "EventName": "PM_L3_SN_USAGE", "BriefDescription": "Rotating sample of 16 snoop valids" }, - {, - "EventCode": "0x16084", - "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" - }, {, "EventCode": "0x1608C", "EventName": "PM_RC0_BUSY", @@ -842,7 +767,7 @@ {, "EventCode": "0x36082", "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." + "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)" }, {, "EventCode": "0xF8B0", @@ -904,11 +829,6 @@ "EventName": "PM_IC_PREF_REQ", "BriefDescription": "Instruction prefetch requests" }, - {, - "EventCode": "0xC898", - "EventName": "PM_LS3_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x488C", "EventName": "PM_IC_PREF_WRITE", @@ -1017,7 +937,7 @@ {, "EventCode": "0x3E05E", "EventName": "PM_L3_CO_MEPF", - "BriefDescription": "L3 castouts in Mepf state for this thread" + "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request" }, {, "EventCode": "0x460A2", @@ -1204,11 +1124,6 @@ "EventName": "PM_TM_FAIL_NON_TX_CONFLICT", "BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR" }, - {, - "EventCode": "0xD198", - "EventName": "PM_MRK_LSU_FLUSH_ATOMIC", - "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed" - }, {, "EventCode": "0x201E0", "EventName": "PM_MRK_DATA_FROM_MEMORY", @@ -1294,11 +1209,6 @@ "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" }, - {, - "EventCode": "0xC894", - "EventName": "PM_LS1_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x360A2", "EventName": "PM_L3_L2_CO_HIT", @@ -1324,11 +1234,6 @@ "EventName": "PM_L2_CASTOUT_SHR", "BriefDescription": "L2 Castouts - Shared (Tx,Sx)" }, - {, - "EventCode": "0xD884", - "EventName": "PM_LSU3_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x26092", "EventName": "PM_L2_LD_MISS_64B", @@ -1362,12 +1267,12 @@ {, "EventCode": "0xD8A8", "EventName": "PM_ISLB_MISS", - "BriefDescription": "Instruction SLB miss - Total of all segment sizes" + "BriefDescription": "Instruction SLB Miss - Total of all segment sizes" }, {, - "EventCode": "0xD19C", - "EventName": "PM_MRK_LSU_FLUSH_RELAUNCH_MISS", - "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent" + "EventCode": "0x368AE", + "EventName": "PM_L3_P1_CO_RTY", + "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" }, {, "EventCode": "0x260A2", @@ -1384,6 +1289,11 @@ "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN", "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT" }, + {, + "EventCode": "0xC084", + "EventName": "PM_LS2_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x1608E", "EventName": "PM_ST_CAUSED_FAIL", @@ -1409,11 +1319,6 @@ "EventName": "PM_CO_USAGE", "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" }, - {, - "EventCode": "0xD084", - "EventName": "PM_LSU2_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x48B8", "EventName": "PM_BR_MPRED_TAKEN_TA", @@ -1449,30 +1354,25 @@ "EventName": "PM_DC_PREF_STRIDED_CONF", "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software." }, + {, + "EventCode": "0x36084", + "EventName": "PM_L2_RCST_DISP", + "BriefDescription": "All D-side store dispatch attempts for this thread" + }, {, "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. " }, - {, - "EventCode": "0x5090", - "EventName": "PM_SHL_ST_DISABLE", - "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)" - }, {, "EventCode": "0x201E8", "EventName": "PM_THRESH_EXC_512", "BriefDescription": "Threshold counter exceeded a value of 512" }, - {, - "EventCode": "0x5084", - "EventName": "PM_DECODE_FUSION_EXT_ADD", - "BriefDescription": "32-bit extended addition" - }, {, "EventCode": "0x36080", "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." + "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)" }, {, "EventCode": "0x3504C", @@ -1554,21 +1454,11 @@ "EventName": "PM_MEM_RWITM", "BriefDescription": "Memory Read With Intent to Modify for this thread" }, - {, - "EventCode": "0x26882", - "EventName": "PM_L2_DC_INV", - "BriefDescription": "D-cache invalidates sent over the reload bus to the core" - }, {, "EventCode": "0xC090", "EventName": "PM_LSU_STCX", "BriefDescription": "STCX sent to nest, i.e. total" }, - {, - "EventCode": "0xD080", - "EventName": "PM_LSU0_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x2C120", "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT", @@ -1609,11 +1499,6 @@ "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request" }, - {, - "EventCode": "0xD9A0", - "EventName": "PM_MRK_LSU_FLUSH_LHL_SHL", - "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)." - }, {, "EventCode": "0x35042", "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", @@ -1692,7 +1577,7 @@ {, "EventCode": "0x2001A", "EventName": "PM_NTC_ALL_FIN", - "BriefDescription": "Cycles after all instructions have finished to group completed" + "BriefDescription": "Cycles after instruction finished to instruction completed." }, {, "EventCode": "0x3005A", @@ -1709,6 +1594,11 @@ "EventName": "PM_LSU1_L1_CAM_CANCEL", "BriefDescription": "ls1 l1 tm cam cancel" }, + {, + "EventCode": "0x268AE", + "EventName": "PM_L3_P3_PF_RTY", + "BriefDescription": "L3 PF received retry port 3, every retry counted" + }, {, "EventCode": "0xE884", "EventName": "PM_LS1_ERAT_MISS_PREF", @@ -1742,7 +1632,7 @@ {, "EventCode": "0x160B6", "EventName": "PM_L3_WI0_BUSY", - "BriefDescription": "Rotating sample of 8 WI valid" + "BriefDescription": "Rotating sample of 8 WI valid (duplicate)" }, {, "EventCode": "0x368AC", @@ -1790,9 +1680,9 @@ "BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)" }, {, - "EventCode": "0x589C", - "EventName": "PM_PTESYNC", - "BriefDescription": "ptesync instruction counted when the instruction is decoded and transmitted" + "EventCode": "0x260AE", + "EventName": "PM_L3_P2_PF_RTY", + "BriefDescription": "L3 PF received retry port 2, every retry counted" }, {, "EventCode": "0x26086", @@ -1824,6 +1714,11 @@ "EventName": "PM_SHL_ST_DEP_CREATED", "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled" }, + {, + "EventCode": "0x46882", + "EventName": "PM_L2_ST_HIT", + "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + }, {, "EventCode": "0x360AC", "EventName": "PM_L3_SN0_BUSY", @@ -1844,11 +1739,6 @@ "EventName": "PM_L2_ST_MISS", "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread" }, - {, - "EventCode": "0xF8B4", - "EventName": "PM_DC_PREF_XCONS_ALLOC", - "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch" - }, {, "EventCode": "0x35048", "EventName": "PM_IPTEG_FROM_DL2L3_SHR", @@ -1969,11 +1859,6 @@ "EventName": "PM_THRD_PRIO_2_3_CYC", "BriefDescription": "Cycles thread running at priority level 2 or 3" }, - {, - "EventCode": "0x10134", - "EventName": "PM_MRK_ST_DONE_L2", - "BriefDescription": "marked store completed in L2 ( RC machine done)" - }, {, "EventCode": "0x368B2", "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", @@ -2004,11 +1889,6 @@ "EventName": "PM_L2_GRP_GUESS_WRONG", "BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)" }, - {, - "EventCode": "0x368AE", - "EventName": "PM_L3_P1_CO_RTY", - "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" - }, {, "EventCode": "0xC0AC", "EventName": "PM_LSU_FLUSH_EMSH", @@ -2034,11 +1914,6 @@ "EventName": "PM_L2_GROUP_PUMP", "BriefDescription": "RC requests that were on group (aka nodel) pump attempts" }, - {, - "EventCode": "0xF0B0", - "EventName": "PM_L3_LD_PREF", - "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" - }, {, "EventCode": "0x16080", "EventName": "PM_L2_LD", @@ -2049,6 +1924,11 @@ "EventName": "PM_MATH_FLOP_CMPL", "BriefDescription": "Math flop instruction completed" }, + {, + "EventCode": "0xC080", + "EventName": "PM_LS0_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x368B0", "EventName": "PM_L3_P1_SYS_PUMP", @@ -2119,11 +1999,6 @@ "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" }, - {, - "EventCode": "0xF0B8", - "EventName": "PM_LS0_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x20132", "EventName": "PM_MRK_DFU_FIN", @@ -2139,6 +2014,11 @@ "EventName": "PM_LSU_FLUSH_LHS", "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" }, + {, + "EventCode": "0x16084", + "EventName": "PM_L2_RCLD_DISP", + "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + }, {, "EventCode": "0x3F150", "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", @@ -2224,11 +2104,6 @@ "EventName": "PM_IC_PREF_CANCEL_PAGE", "BriefDescription": "Prefetch Canceled due to page boundary" }, - {, - "EventCode": "0xF09C", - "EventName": "PM_SLB_TABLEWALK_CYC", - "BriefDescription": "Cycles when a tablewalk is pending on this thread on the SLB table" - }, {, "EventCode": "0x460AA", "EventName": "PM_L3_P0_CO_L31", @@ -2247,10 +2122,10 @@ {, "EventCode": "0x46082", "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful D-side store dispatches for this thread " + "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" }, {, - "EventCode": "0x4609E", + "EventCode": "0x36880", "EventName": "PM_L2_INST_MISS", "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" }, @@ -2340,9 +2215,9 @@ "BriefDescription": "All ISU rejects" }, {, - "EventCode": "0x46882", - "EventName": "PM_L2_ST_HIT", - "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + "EventCode": "0xC884", + "EventName": "PM_LS3_LD_VECTOR_FIN", + "BriefDescription": "" }, {, "EventCode": "0x360A8", @@ -2359,11 +2234,6 @@ "EventName": "PM_LSU_NCST", "BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1" }, - {, - "EventCode": "0xD880", - "EventName": "PM_LSU1_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0xD0B8", "EventName": "PM_LSU_LMQ_FULL_CYC", @@ -2389,4 +2259,4 @@ "EventName": "PM_L3_PF_USAGE", "BriefDescription": "Rotating sample of 32 PF actives" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index bc2db636dabf1..5af1abbe82c48 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -124,6 +124,11 @@ "EventName": "PM_PMC5_OVERFLOW", "BriefDescription": "Overflow from counter 5" }, + {, + "EventCode": "0x4505E", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operation Finished" + }, {, "EventCode": "0x2C018", "EventName": "PM_CMPLU_STALL_DMISS_L21_L31", @@ -389,11 +394,6 @@ "EventName": "PM_ICT_NOSLOT_BR_MPRED", "BriefDescription": "Ict empty for this thread due to branch mispred" }, - {, - "EventCode": "0x3405E", - "EventName": "PM_IFETCH_THROTTLE", - "BriefDescription": "Cycles in which Instruction fetch throttle was active." - }, {, "EventCode": "0x1F148", "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", @@ -422,7 +422,7 @@ {, "EventCode": "0xD0A8", "EventName": "PM_DSLB_MISS", - "BriefDescription": "Data SLB Miss - Total of all segment sizes" + "BriefDescription": "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))" }, {, "EventCode": "0x4C058", @@ -549,4 +549,4 @@ "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index 3ef8a10aac863..d0b89f9305675 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -119,4 +119,4 @@ "EventName": "PM_1FLOP_CMPL", "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json index 8c0f12024afaf..bc8e03d7a6b03 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -89,11 +89,6 @@ "EventName": "PM_STCX_FAIL", "BriefDescription": "stcx failed" }, - {, - "EventCode": "0x20112", - "EventName": "PM_MRK_NTF_FIN", - "BriefDescription": "Marked next to finish instruction finished" - }, {, "EventCode": "0x300F0", "EventName": "PM_ST_MISS_L1", From 4359dd88afb7cac8a98e32f6bdfe0b46c79bc3cd Mon Sep 17 00:00:00 2001 From: Thomas-Mich Richter Date: Tue, 7 Nov 2017 15:48:53 +0100 Subject: [PATCH 46/83] perf buildid-cache: Update help text for purge command Clarify the perf buildid-cache help text for the purge operation. The purge subcommand takes a list of files (binaries) as option parameter. Make the wording the same as for the add and remove operation. Signed-off-by: Thomas-Mich Richter Reviewed-by: Hendrik Brueckner Acked-by: Masami Hiramatsu Cc: Martin Schwidefsky LPU-Reference: 20171107144853.12925-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 3d354ba6e9c59..41db2cba77eb9 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -325,8 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv) "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), - OPT_STRING('p', "purge", &purge_name_list_str, "path list", - "path(s) to remove (remove old caches too)"), + OPT_STRING('p', "purge", &purge_name_list_str, "file list", + "file(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), From 35c0a81a97692cc0afe3d005c9a737bbde06e784 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:24 -0800 Subject: [PATCH 47/83] perf tools: Document some missing perf.data headers Document STAT and CACHE header entries. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index e90c59c6d8157..15e8b48077ba3 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -238,6 +238,29 @@ struct auxtrace_index { struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; }; + HEADER_STAT = 19, + +This is merely a flag signifying that the data section contains data +recorded from perf stat record. + + HEADER_CACHE = 20, + +Description of the cache hierarchy. Based on the Linux sysfs format +in /sys/devices/system/cpu/cpu*/cache/ + + u32 version Currently always 1 + u32 number_of_cache_levels + +struct { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + struct perf_header_string type; + struct perf_header_string size; + struct perf_header_string map; +}[number_of_cache_levels]; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, From 5039c8a28fa97b8dce7b363a5ecd4bee2b87bf03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:26 -0800 Subject: [PATCH 48/83] perf script: Allow printing period for non freq mode groups When using leader sampling the values of the not sampled but counted events are shown by perf script in "period". Currently printing period is only allowed when the main event has a period, that is it is in frequency mode. This implies that we cannot dump the values of counted events when the leader event is not in frequency mode. Just remove the check that the period must be set on all events. It will just be printed as 0 instead if it's not available. This fixes the following: $ perf record -c 100000 -e '{cycles,branches}:S' $ perf script -F event,period Further commentary by Jiri Olsa: The period will be the value of configured period, not 0: int perf_evsel__parse_sample(struct ... ... data->period = evsel->attr.sample_period; $ perf record -c 100000 $ perf script -F event,period | head -3 Failed to open /tmp/perf-2048.map, continuing without symbols 100000 cycles:ppp: 100000 cycles:ppp: other than that I think we can remove that check, because we will have always sane number in period Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9b43bda45a415..ee7c7aaaae72e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -423,11 +423,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; - if (PRINT_FIELD(PERIOD) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_PERIOD, "PERIOD", - PERF_OUTPUT_PERIOD)) - return -EINVAL; - if (PRINT_FIELD(IREGS) && perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) From 958964f803b27baffd238708842b527a1d30e110 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Sun, 12 Nov 2017 10:10:46 +0900 Subject: [PATCH 49/83] perf top: Document missing options Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510449047-12941-2-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 4353262bc462b..8a32cc77bead3 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -268,6 +268,12 @@ INTERACTIVE PROMPTING KEYS [S]:: Stop annotation, return to full profile display. +[K]:: + Hide kernel symbols. + +[U]:: + Hide user symbols. + [z]:: Toggle event count zeroing across display updates. From 8fce3743cea47db86dd13ab4c479158a872271e8 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Sun, 12 Nov 2017 10:10:47 +0900 Subject: [PATCH 50/83] perf top: Remove a duplicate word Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510449047-12941-3-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0789f95ca2f37..68320ac5e9b07 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -412,7 +412,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[S] stop annotation.\n"); fprintf(stdout, - "\t[K] hide kernel_symbols symbols. \t(%s)\n", + "\t[K] hide kernel symbols. \t(%s)\n", top->hide_kernel_symbols ? "yes" : "no"); fprintf(stdout, "\t[U] hide user symbols. \t(%s)\n", From d492326f160e44e08fcf132a63163b36dd8e8839 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:04 +0000 Subject: [PATCH 51/83] perf tests: Set evlist of test__backward_ring_buffer() to !overwrite Setting overwrite in perf_evlist__mmap() is meaningless because the event in this evlist is already have 'overwrite' postfix and goes to backward ring buffer automatically. Pass 'false' to perf_evlist__mmap() to make it similar to others. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/backward-ring-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 71b9a0b613d2b..43a8c6ac4070e 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages, true); + err = perf_evlist__mmap(evlist, mmap_pages, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); From 677b0601768881934f658bebb1713c3c843893fa Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:05 +0000 Subject: [PATCH 52/83] perf tests: Set evlist of test__sw_clock_freq() to !overwrite Unsetting overwrite when calling perf_evlist__mmap is harmless. This commit passes false to it, makes following commits eliminate the overwrite argument easier. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sw-clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 725a196991a88..c6937ed12e6bc 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128, true); + err = perf_evlist__mmap(evlist, 128, false); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); From 301d724aa19add1c0cf3ec8cad0d10151d30393f Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:06 +0000 Subject: [PATCH 53/83] perf tests: Set evlist of test__basic_mmap() to !overwrite In this test, a large ring buffer is required so all events can feed into, so overwrite or not is meaningless. Change to !overwrite so following commits can remove this argument. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-5-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5a8bf318f8a7e..91f10d6d9ae2f 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, false) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; From a0e3dd79cdd8ad838cbcefeff530a15193f8336e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:07 +0000 Subject: [PATCH 54/83] perf tests: Set evlist of test__task_exit() to !overwrite Changing ringbuffer to !overwrite in this task is harmless because this test uses a very low frequency (1) and using a very simple program (true). There should have only 3 events in the whole test. Overwriting is impossible to happen. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-6-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/task-exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bc4a7344e2742..98c098475e71d 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -97,7 +97,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, false) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; From 19993b82a571893e661afd90f1d77fa698785cee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 13 Nov 2017 16:06:29 -0300 Subject: [PATCH 55/83] perf machine: Guard against NULL in machine__exit() A recent fix for 'perf trace' introduced a bug where machine__exit(trace->host) could be called while trace->host was still NULL, so make this more robust by guarding against NULL, just like free() does. The problem happens, for instance, when !root users try to run 'perf trace': [acme@jouet linux]$ trace Error: No permissions to read /sys/kernel/debug/tracing/events/raw_syscalls/sys_(enter|exit) Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' perf: Segmentation fault Obtained 7 stack frames. [0x4f1b2e] /lib64/libc.so.6(+0x3671f) [0x7f43a1dd971f] [0x4f3fec] [0x47468b] [0x42a2db] /lib64/libc.so.6(__libc_start_main+0xe9) [0x7f43a1dc3509] [0x42a6c9] Segmentation fault (core dumped) [acme@jouet linux]$ Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrei Vagin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Cc: Wang Nan Fixes: 33974a414ce2 ("perf trace: Call machine__exit() at exit") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6a8d03c3d9b70..270f3223c6df1 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -172,6 +172,9 @@ void machine__exit(struct machine *machine) { int i; + if (machine == NULL) + return; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); From 2f0af8600e82e9f950fc32908386b9c639f88d48 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 Nov 2017 09:15:42 +0900 Subject: [PATCH 56/83] perf help: Fix a bug during strstart() conversion The commit 8e99b6d4533c changed prefixcmp() to strstart() but missed to change the return value in some place. It makes perf help print annoying output even for sane config items like below: $ perf help '.root': unsupported man viewer sub key. ... Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Taeung Song Cc: Jiri Olsa Cc: Sihyeon Jang Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20171114001542.GA16464@sejong Fixes: 8e99b6d4533c ("tools include: Adopt strstarts() from the kernel") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index bd1fedef3d1c5..a0f7ed2b869b0 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -284,7 +284,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) add_man_viewer(value); return 0; } - if (!strstarts(var, "man.")) + if (strstarts(var, "man.")) return add_man_viewer_info(var, value); return 0; @@ -314,7 +314,7 @@ static const char *cmd_to_page(const char *perf_cmd) if (!perf_cmd) return "perf"; - else if (!strstarts(perf_cmd, "perf")) + else if (strstarts(perf_cmd, "perf")) return perf_cmd; return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; From 648388ae68e953b312e28eaf869fe6c01e2f70cc Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Nov 2017 08:55:40 +0530 Subject: [PATCH 57/83] perf annotate: Do not truncate instruction names at 6 chars There are many instructions, esp on PowerPC, whose mnemonics are longer than 6 characters. Using precision limit causes truncation of such mnemonics. Fix this by removing precision limit. Note that, 'width' is still 6, so alignment won't get affected for length <= 6. Before: li r11,-1 xscvdp vs1,vs1 add. r10,r10,r11 After: li r11,-1 xscvdpsxds vs1,vs1 add. r10,r10,r11 Reported-by: Donald Stence Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Taeung Song Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index eab4a8e3c6799..30d74dabdc426 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -165,7 +165,7 @@ static void ins__delete(struct ins_operands *ops) static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw); } int ins__scnprintf(struct ins *ins, char *bf, size_t size, @@ -230,12 +230,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { if (ops->target.name) - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } static struct ins_ops call_ops = { @@ -299,7 +299,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, c++; } - return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + return scnprintf(bf, size, "%-6s %.*s%" PRIx64, ins->name, c ? c - ops->raw : 0, ops->raw, ops->target.offset); } @@ -372,7 +372,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, if (ops->locked.ins.ops == NULL) return ins__raw_scnprintf(ins, bf, size, ops); - printed = scnprintf(bf, size, "%-6.6s ", ins->name); + printed = scnprintf(bf, size, "%-6s ", ins->name); return printed + ins__scnprintf(&ops->locked.ins, bf + printed, size - printed, ops->locked.ops); } @@ -448,7 +448,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m static int mov__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s,%s", ins->name, + return scnprintf(bf, size, "%-6s %s,%s", ins->name, ops->source.name ?: ops->source.raw, ops->target.name ?: ops->target.raw); } @@ -488,7 +488,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops static int dec__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name ?: ops->target.raw); } @@ -500,7 +500,7 @@ static struct ins_ops dec_ops = { static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, struct ins_operands *ops __maybe_unused) { - return scnprintf(bf, size, "%-6.6s", "nop"); + return scnprintf(bf, size, "%-6s", "nop"); } static struct ins_ops nop_ops = { @@ -990,7 +990,7 @@ void disasm_line__free(struct disasm_line *dl) int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { if (raw || !dl->ins.ops) - return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); + return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw); return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } From f231af789b11a2f1a3795acc3228a3e178a80c21 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Nov 2017 08:18:46 +0100 Subject: [PATCH 58/83] perf test shell: Fix check open filename arg using 'perf trace' on s390x This 'perf test' case fails on s390x. The 'touch' command on s390x uses the 'openat' system call to open the file named on the command line: [root@s35lp76 perf]# perf probe -l probe:vfs_getname (on getname_flags:72@fs/namei.c with pathname) [root@s35lp76 perf]# perf trace -e open touch /tmp/abc 0.400 ( 0.015 ms): touch/27542 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 [root@s35lp76 perf]# There is no 'open' system call for file '/tmp/abc'. Instead the 'openat' system call is used: [root@s35lp76 perf]# strace touch /tmp/abc execve("/usr/bin/touch", ["touch", "/tmp/abc"], 0x3ffd547ec98 /* 30 vars */) = 0 [...] openat(AT_FDCWD, "/tmp/abc", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 [...] On s390x the 'egrep' command does not find a matching pattern and returns an error. Fix this for s390x create a platform dependent command line to enable the 'perf probe' call to listen to the 'openat' system call and get the expected output. Signed-off-by: Thomas-Mich Richter Tested-by: Arnaldo Carvalho de Melo Cc: Hendrik Brueckner Cc: Thomas-Mich Richter LPU-Reference: 20171114071847.2381-1-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-3qf38jk0prz54rhmhyu871my@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2e68c5f120da8..2a9ef080efd02 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,8 +17,10 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - perf trace -e open touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open\(filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } + + perf trace -e ${svc:-open} touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } From 0879e5e5f33c8a1eb01281ad920173664e68b266 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Nov 2017 08:18:47 +0100 Subject: [PATCH 59/83] perf test shell: Fix test case probe libc's inet_pton on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'perf test' case "probe libc's inet_pton & backtrace it with ping" fails on s390x. The reason is the 'realpath /lib64/ld*.so.* | uniq' line which returns 2 libraries: root@s35lp76 shell]# realpath /lib64/ld*.so.* | uniq /usr/lib64/ld-2.26.so /usr/lib64/ld_pre_smc.so.1.0.1 [root@s35lp76 shell] This output makes the "perf probe" command lines invalid. Use ldd tool to find out the libraries required by "bash" and check if symbol "inet_pton" is part of the "libc" library. Some distros do not have a /lib64 directory. I have also added a check for the existence of an IPv6 network interface before it is being used. Committer changes: We can't really use ldd for libc, as in some systems, such as x86_64, it has hardlinks and then ldd sees one and the kernel the other, so grep for libc in /proc/self/maps to get the one we'll receive from PERF_RECORD_MMAP. Thomas checked this change and acked it. Signed-off-by: Thomas-Mich Richter Tested-by: Arnaldo Carvalho de Melo Suggested-by: Hendrik Brückner Reviewed-by: Hendrik Brückner Link: http://lkml.kernel.org/r/20171114133409.GN8836@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 7a84d73324e3c..8b3da21a08f19 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -10,8 +10,8 @@ . $(dirname $0)/lib/probe.sh -ld=$(realpath /lib64/ld*.so.* | uniq) -libc=$(echo $ld | sed 's/ld/libc/g') +libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') +nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { idx=0 @@ -37,6 +37,9 @@ trace_libc_inet_pton_backtrace() { done } +# Check for IPv6 interface existence +ip a sh lo | fgrep -q inet6 || exit 2 + skip_if_no_perf_probe && \ perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace From 07d6f446a9e45b7e6e7438f8560e40d4dcfa0321 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:01:06 -0300 Subject: [PATCH 60/83] perf evlist: Add helper to check if attr.exclude_kernel is set in all evsels The warning about kptr_restrict needs to be emitted only when it is set and we ask for kernel space samples, so add a helper to help with that. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-fh7drty6yljei9gxxzer6eup@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 12 ++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ccb749f9a83f9..b62e523a70352 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1786,3 +1786,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, state_err: return; } + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->attr.exclude_kernel) + return false; + } + + return true; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e72ae64c11acb..491f695429209 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -312,4 +312,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event); + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ From 9c39ed90153d95d362004ed0d5e259ec46af3803 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:12:11 -0300 Subject: [PATCH 61/83] perf report: Ignore kptr_restrict when not sampling the kernel If none of the evsels has attr.exclude_kernel set to zero, no kernel samples, so no point in warning the user about problems in processing kernel samples, as there will be none. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7dn926v3at8txxkky92aesz2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1394cd8d96f7b..af5dd038195e3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -441,6 +441,9 @@ static void report__warn_kptr_restrict(const struct report *rep) struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; + if (perf_evlist__exclude_kernel(rep->session->evlist)) + return; + if (kernel_map == NULL || (kernel_map->dso->hit && (kernel_kmap->ref_reloc_sym == NULL || From 6c4439545517c9d6155e85f1a508be38408fb0b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:03:19 -0300 Subject: [PATCH 62/83] perf record: Ignore kptr_restrict when not sampling the kernel If we're not sampling the kernel, we shouldn't care about kptr_restrict neither synthesize anything for assisting in resolving kernel samples, like the reference relocation symbol or kernel modules information. Before: $ cat /proc/sys/kernel/kptr_restrict /proc/sys/kernel/perf_event_paranoid 2 2 $ perf record sleep 1 WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Couldn't record kernel reference relocation symbol Symbol resolution may be skewed if relocation was used (e.g. kexec). Check /proc/kallsyms permission or run as root. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf evlist -v cycles:uppp: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 $ After: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (10 samples) ] $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-t025e9zftbx2b8cq2w01g5e5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5f78ce9434078..003255910c05d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -765,17 +765,19 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); + if (!perf_evlist__exclude_kernel(rec->evlist)) { + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + } if (perf_guest) { machines__process_guests(&session->machines, @@ -1709,7 +1711,7 @@ int cmd_record(int argc, const char **argv) err = -ENOMEM; - if (symbol_conf.kptr_restrict) + if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) pr_warning( "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" "check /proc/sys/kernel/kptr_restrict.\n\n" From b89a5124d2089eec8f090dcd05dd88abaec0cbd2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 13:30:19 -0300 Subject: [PATCH 63/83] perf top: Ignore kptr_restrict when not sampling the kernel If all events have attr.exclude_kernel set, no need to look at kptr_restrict. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yegpzg5bf2im69g0tfizqaqz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 68320ac5e9b07..865191281591a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -735,14 +735,16 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { - ui__warning( + if (!perf_evlist__exclude_kernel(top->session->evlist)) { + ui__warning( "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? " modules" : ""); - if (use_browser <= 0) - sleep(5); + if (use_browser <= 0) + sleep(5); + } machine->kptr_restrict_warned = true; } From 239fb4fed6c49110ebebe7378a84d96e3f0cf55d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Nov 2017 15:04:47 -0600 Subject: [PATCH 64/83] perf c2c: Fix spelling mistakes in browser help text Togle -> Toggle, lenght -> length. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171114150447.f4b63bc5d97c83cdaa8bf7dc@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 17855c4626a08..f1da9b0833c07 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2224,9 +2224,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct hist_browser *browser; int key = -1; const char help[] = - " ENTER Togle callchains (if present) \n" - " n Togle Node details info \n" - " s Togle full lenght of symbol and source line columns \n" + " ENTER Toggle callchains (if present) \n" + " n Toggle Node details info \n" + " s Toggle full length of symbol and source line columns \n" " q Return back to cacheline list \n"; /* Display compact version first. */ @@ -2303,7 +2303,7 @@ static int perf_c2c__hists_browse(struct hists *hists) int key = -1; const char help[] = " d Display cacheline details \n" - " ENTER Togle callchains (if present) \n" + " ENTER Toggle callchains (if present) \n" " q Quit \n"; browser = perf_c2c_browser__new(hists); From 114bc191c37028d87a540251d93e7b328f4de3fe Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Nov 2017 15:04:52 -0600 Subject: [PATCH 65/83] perf evsel: Say which PMU Hardware event doesn't support sampling/overflow-interrupts Help identify to the user the event with the unsupported sampling error. Also suggest a corrective action. BEFORE: $ sudo ./oldperf record -e armv8_pmuv3/mem_access/,ccn/cycles/,armv8_pmuv3/l2d_cache/ true Error: PMU Hardware doesn't support sampling/overflow-interrupts. AFTER: $ sudo ./newperf record -e armv8_pmuv3/mem_access/,ccn/cycles/,armv8_pmuv3/l2d_cache/ true Error: ccn/cycles/: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171114150452.e846f2e23684c7d7d8ee706f@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cb9bcdb065ea8..b8e9def77f447 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2745,8 +2745,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, break; case EOPNOTSUPP: if (evsel->attr.sample_period != 0) - return scnprintf(msg, size, "%s", - "PMU Hardware doesn't support sampling/overflow-interrupts."); + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", + perf_evsel__name(evsel)); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); From 38ba1daf8164d43d48b45c8e8deee4b20c21484d Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 22:06:49 +0900 Subject: [PATCH 66/83] perf lock: Document missing options Add man page entry for --force. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510837609-6277-1-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index ab25be28c9dca..74d7745921968 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -42,6 +42,10 @@ COMMON OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +-f:: +--force:: + Don't complan, do it. + REPORT OPTIONS -------------- From 742015ff12ae27324b8ad2d28e43da6743529bad Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 9 Aug 2017 18:14:06 +0200 Subject: [PATCH 67/83] perf: Fix header.size for namespace events Reset header size for namespace events, otherwise it only gets bigger in ctx iterations. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Fixes: e422267322cd ("perf: Add PERF_RECORD_NAMESPACES to include namespaces related info") Link: http://lkml.kernel.org/n/tip-nlo4gonz9d4guyb8153ukzt0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 81dd57b9e5e39..aa21555972aad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6640,6 +6640,7 @@ static void perf_event_namespaces_output(struct perf_event *event, struct perf_namespaces_event *namespaces_event = data; struct perf_output_handle handle; struct perf_sample_data sample; + u16 header_size = namespaces_event->event_id.header.size; int ret; if (!perf_event_namespaces_match(event)) @@ -6650,7 +6651,7 @@ static void perf_event_namespaces_output(struct perf_event *event, ret = perf_output_begin(&handle, event, namespaces_event->event_id.header.size); if (ret) - return; + goto out; namespaces_event->event_id.pid = perf_event_pid(event, namespaces_event->task); @@ -6662,6 +6663,8 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event__output_id_sample(event, &handle, &sample); perf_output_end(&handle); +out: + namespaces_event->event_id.header.size = header_size; } static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, From 52186b8aa40f06350b33f8e4031879d389e2b9f2 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:02 +0900 Subject: [PATCH 68/83] perf inject: Document missing options Add the missing --force option to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-1-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 87b2588d1cbde..a64d6588470e6 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -60,6 +60,10 @@ include::itrace.txt[] found in the jitdumps files captured in the input perf.data file. Use this option if you are monitoring environment using JIT runtimes, such as Java, DART or V8. +-f:: +--force:: + Don't complain, do it. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] From 9b9d28a0087608052b39e7d9ee2f07b4e0fd6dca Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:03 +0900 Subject: [PATCH 69/83] perf trace: Document missing option, colons Add missing --force option to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-2-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index d53bea6bd5710..6909cf1e0eea2 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -86,18 +86,18 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0- In per-thread mode with inheritance mode on (default), Events are captured only when the thread executes on the designated CPUs. Default is to monitor all CPUs. ---duration: +--duration:: Show only events that had a duration greater than N.M ms. ---sched: +--sched:: Accrue thread runtime and provide a summary at the end of the session. --i ---input +-i:: +--input:: Process events from a given perf data file. --T ---time +-T:: +--time:: Print full timestamp rather time relative to first sample. --comm:: @@ -117,6 +117,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show tool stats such as number of times fd->pathname was discovered thru hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. +-f:: +--force:: + Don't complain, do it. + -F=[all|min|maj]:: --pf=[all|min|maj]:: Trace pagefaults. Optionally, you can specify whether you want minor, From f4a30d2bee25b92f25086c81e33c80d767500097 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:04 +0900 Subject: [PATCH 70/83] perf timechart: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-3-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-timechart.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index df98d1c82688a..ef0c7565bd5c6 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -50,7 +50,9 @@ TIMECHART OPTIONS -p:: --process:: Select the processes to display, by name or PID - +-f:: +--force:: + Don't complain, do it. --symfs=:: Look for files with symbols relative to this directory. -n:: From e9b61e52c384f4af13404ad95161af58af08c908 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:05 +0900 Subject: [PATCH 71/83] perf sched: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-4-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 55b67338548e1..c7e50f2638873 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -74,6 +74,10 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the sched data. +-f:: +--force:: + Don't complain, do it. + OPTIONS for 'perf sched map' ---------------------------- From deb368acf1731bf89c34b171094c4f8eff66ebd9 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:06 +0900 Subject: [PATCH 72/83] perf evlist: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-5-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index 6f7200fb85cf2..c0a66400a960d 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -20,6 +20,10 @@ OPTIONS --input=:: Input file name. (default: perf.data unless stdin is a fifo) +-f:: +--force:: + Don't complain, do it. + -F:: --freq=:: Show just the sample frequency used for each event. From 5a79eef4eccf0571e856eb13c0ffe19083d27474 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:07 +0900 Subject: [PATCH 73/83] perf buildid-cache: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-6-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-buildid-cache.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 84681007f80f4..73c2650bd0db5 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -24,6 +24,9 @@ OPTIONS -a:: --add=:: Add specified file to the cache. +-f:: +--force:: + Don't complain, do it. -k:: --kcore:: Add specified kcore file to the cache. For the current host that is From 914eb9ca51117776d83e6761a1c555fb76f0ded2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 6 Aug 2017 16:39:39 +0200 Subject: [PATCH 74/83] perf callchain: Reset cursor arg instead of callchain_cursor We already pass cursor into thread__resolve_callchain function, so there's no point in resetting the global instance. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-puk015qvuppao9m1xtdy9v7j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 270f3223c6df1..64d255f6a5374 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2204,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread, { int ret = 0; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); if (callchain_param.order == ORDER_CALLEE) { ret = thread__resolve_callchain_sample(thread, cursor, From 3ad31d8a0df257c3f18c989119359c1f25cd009d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 16:07:05 +0200 Subject: [PATCH 75/83] perf evsel: Centralize perf_sample initialization Move the initialization bits into common place at the beginning of the function. Also removing some superfluous zero initialization for addr and transaction, because we zero all the data at the top. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-1gv5t6fvv735t1rt3mxpy1h9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b8e9def77f447..03d7abcdc6b78 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1983,6 +1983,8 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->id = -1ULL; + data->data_src = PERF_MEM_DATA_SRC_NONE; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -2000,7 +2002,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (evsel->sample_size + sizeof(event->header) > event->header.size) return -EFAULT; - data->id = -1ULL; if (type & PERF_SAMPLE_IDENTIFIER) { data->id = *array; array++; @@ -2030,7 +2031,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->addr = 0; if (type & PERF_SAMPLE_ADDR) { data->addr = *array; array++; @@ -2194,14 +2194,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->data_src = PERF_MEM_DATA_SRC_NONE; if (type & PERF_SAMPLE_DATA_SRC) { OVERFLOW_CHECK_u64(array); data->data_src = *array; array++; } - data->transaction = 0; if (type & PERF_SAMPLE_TRANSACTION) { OVERFLOW_CHECK_u64(array); data->transaction = *array; From 014681208ea0d1a7e5ea2f014242e7d196d04c34 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:10:28 +0200 Subject: [PATCH 76/83] perf evlist: Add perf_evlist__parse_sample_timestamp function Add perf_evlist__parse_sample_timestamp to retrieve the timestamp of the sample. The idea is to use this function instead of the full sample parsing before we queue the sample. At that time only the timestamp is needed and we parse the sample once again later on delivery. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-o7syqo8lipj4or7renpu8e8y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 +++++++ tools/perf/util/evlist.h | 4 +++ tools/perf/util/evsel.c | 65 ++++++++++++++++++++++++++++++++++++---- tools/perf/util/evsel.h | 4 +++ 4 files changed, 78 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b62e523a70352..199bb82efbcdf 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1582,6 +1582,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even return perf_evsel__parse_sample(evsel, event, sample); } +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp) +{ + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (!evsel) + return -EFAULT; + return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); +} + size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 491f695429209..4e8131dacbd7d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -205,6 +205,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample); +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp); + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 03d7abcdc6b78..95853c51c0ca4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1962,6 +1962,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +static int +perf_event__check_size(union perf_event *event, unsigned int sample_size) +{ + /* + * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes + * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to + * check the format does not go past the end of the event. + */ + if (sample_size + sizeof(event->header) > event->header.size) + return -EFAULT; + + return 0; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1994,12 +2008,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array = event->sample.array; - /* - * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes - * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to - * check the format does not go past the end of the event. - */ - if (evsel->sample_size + sizeof(event->header) > event->header.size) + if (perf_event__check_size(event, evsel->sample_size)) return -EFAULT; if (type & PERF_SAMPLE_IDENTIFIER) { @@ -2232,6 +2241,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, return 0; } +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp) +{ + u64 type = evsel->attr.sample_type; + const u64 *array; + + if (!(type & PERF_SAMPLE_TIME)) + return -1; + + if (event->header.type != PERF_RECORD_SAMPLE) { + struct perf_sample data = { + .time = -1ULL, + }; + + if (!evsel->attr.sample_id_all) + return -1; + if (perf_evsel__parse_id_sample(evsel, event, &data)) + return -1; + + *timestamp = data.time; + return 0; + } + + array = event->sample.array; + + if (perf_event__check_size(event, evsel->sample_size)) + return -EFAULT; + + if (type & PERF_SAMPLE_IDENTIFIER) + array++; + + if (type & PERF_SAMPLE_IP) + array++; + + if (type & PERF_SAMPLE_TID) + array++; + + if (type & PERF_SAMPLE_TIME) + *timestamp = *array; + + return 0; +} + size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0688880227e18..c3663a70c9b9b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -338,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp); + static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { return list_entry(evsel->node.next, struct perf_evsel, node); From dc83e1394083d6e12625a3158bf88396dfaec633 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:24:33 +0200 Subject: [PATCH 77/83] perf ordered_events: Pass timestamp arg in perf_session__queue_event There's no need to pass whole sample data, because it's only timestamp that is used. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xd1hpoze3kgb1rb639o3vehb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/util/ordered-events.c | 3 +-- tools/perf/util/ordered-events.h | 2 +- tools/perf/util/session.c | 6 +++--- tools/perf/util/session.h | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0c36f2ac6a0e0..cd253db6917f9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -754,7 +754,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session__queue_event(kvm->session, event, &sample, 0); + err = perf_session__queue_event(kvm->session, event, sample.time, 0); /* * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 8e09fd2d842f4..bad9e0296e9ab 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -157,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - u64 timestamp = sample->time; struct ordered_event *oevent; if (!timestamp || timestamp == ~0ULL) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 96e5292d88e25..8c7a2948593e9 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -45,7 +45,7 @@ struct ordered_events { }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5c412310f2664..8976e417eab29 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -873,9 +873,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, } int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - return ordered_events__queue(&s->ordered_events, event, sample, file_offset); + return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1517,7 +1517,7 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, &sample, file_offset); + ret = perf_session__queue_event(session, event, sample.time, file_offset); if (ret != -ETIME) return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 80bc80de8362c..5b1c32b3694a3 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -53,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); From 93d10af26bb7159349158b721ba2e258291d53c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:21:14 +0200 Subject: [PATCH 78/83] perf tools: Optimize sample parsing for ordered events Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 8 ++++---- tools/perf/util/session.c | 41 +++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cd253db6917f9..597c7de9bec94 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, u64 *mmap_time) { union perf_event *event; - struct perf_sample sample; + u64 timestamp; s64 n = 0; int err; *mmap_time = ULLONG_MAX; while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { - err = perf_evlist__parse_sample(kvm->evlist, event, &sample); + err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp); if (err) { perf_evlist__mmap_consume(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } - err = perf_session__queue_event(kvm->session, event, sample.time, 0); + err = perf_session__queue_event(kvm->session, event, timestamp, 0); /* * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. @@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, /* save time stamp of our first sample for this mmap */ if (n == 0) - *mmap_time = sample.time; + *mmap_time = timestamp; /* limit events per mmap handled all at once */ n++; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8976e417eab29..df28571379082 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -27,7 +27,6 @@ static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session) static int ordered_events__deliver_event(struct ordered_events *oe, struct ordered_event *event) { - struct perf_sample sample; struct perf_session *session = container_of(oe, struct perf_session, ordered_events); - int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - - if (ret) { - pr_err("Can't parse sample, err = %d\n", ret); - return ret; - } - return perf_session__deliver_event(session, event->event, &sample, + return perf_session__deliver_event(session, event->event, session->tool, event->file_offset); } @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_sample sample; int ret; - ret = auxtrace__process_event(session, event, sample, tool); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) return ret; if (ret > 0) return 0; return machines__deliver_event(&session->machines, session->evlist, - event, sample, tool, file_offset); + event, &sample, tool, file_offset); } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session, { struct perf_evlist *evlist = session->evlist; struct perf_tool *tool = session->tool; - struct perf_sample sample; int ret; if (session->header.needs_swap) @@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, file_offset); - /* - * For all kernel events we get the sample data - */ - ret = perf_evlist__parse_sample(evlist, event, &sample); - if (ret) - return ret; - if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, sample.time, file_offset); + u64 timestamp; + + ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + if (ret) + return ret; + + ret = perf_session__queue_event(session, event, timestamp, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return perf_session__deliver_event(session, event, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) From b135e5ee1a0e325166c30b16cf5493fea44ede45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Nov 2017 10:23:39 +0100 Subject: [PATCH 79/83] perf top: Fix window dimensions change handling The stdio perf top crashes when we change the terminal window size. The reason is that we assumed we get the perf_top pointer as a signal handler argument which is not the case. Changing the SIGWINCH handler logic to change global resize variable, which is checked in the main thread loop. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ysuzwz77oev1ftgvdscn9bpu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 865191281591a..4cbd3dd14a335 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -77,6 +77,7 @@ #include "sane_ctype.h" static volatile int done; +static volatile int resize; #define HEADER_LINE_NR 5 @@ -86,10 +87,13 @@ static void perf_top__update_print_entries(struct perf_top *top) } static void perf_top__sig_winch(int sig __maybe_unused, - siginfo_t *info __maybe_unused, void *arg) + siginfo_t *info __maybe_unused, void *arg __maybe_unused) { - struct perf_top *top = arg; + resize = 1; +} +static void perf_top__resize(struct perf_top *top) +{ get_term_dimensions(&top->winsize); perf_top__update_print_entries(top); } @@ -480,7 +484,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) .sa_sigaction = perf_top__sig_winch, .sa_flags = SA_SIGINFO, }; - perf_top__sig_winch(SIGWINCH, NULL, top); + perf_top__resize(top); sigaction(SIGWINCH, &act, NULL); } else { signal(SIGWINCH, SIG_DFL); @@ -1035,6 +1039,11 @@ static int __cmd_top(struct perf_top *top) if (hits == top->samples) ret = perf_evlist__poll(top->evlist, 100); + + if (resize) { + perf_top__resize(top); + resize = 0; + } } ret = 0; From 244a1086aba97a6b673162fd6684c5c024b724db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 14:30:57 +0100 Subject: [PATCH 80/83] perf top: Use signal interface for SIGWINCH handler There's no need for SA_SIGINFO data in SIGWINCH handler, switching it to register the handler via signal interface as we do for the rest of the signals in perf top. Signed-off-by: Jiri Olsa Tested-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-elxp1vdnaog1scaj13cx7cu0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4cbd3dd14a335..a29a98334f330 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -86,8 +86,7 @@ static void perf_top__update_print_entries(struct perf_top *top) top->print_entries = top->winsize.ws_row - HEADER_LINE_NR; } -static void perf_top__sig_winch(int sig __maybe_unused, - siginfo_t *info __maybe_unused, void *arg __maybe_unused) +static void winch_sig(int sig __maybe_unused) { resize = 1; } @@ -480,12 +479,8 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) case 'e': prompt_integer(&top->print_entries, "Enter display entries (lines)"); if (top->print_entries == 0) { - struct sigaction act = { - .sa_sigaction = perf_top__sig_winch, - .sa_flags = SA_SIGINFO, - }; perf_top__resize(top); - sigaction(SIGWINCH, &act, NULL); + signal(SIGWINCH, winch_sig); } else { signal(SIGWINCH, SIG_DFL); } @@ -1366,12 +1361,8 @@ int cmd_top(int argc, const char **argv) get_term_dimensions(&top.winsize); if (top.print_entries == 0) { - struct sigaction act = { - .sa_sigaction = perf_top__sig_winch, - .sa_flags = SA_SIGINFO, - }; perf_top__update_print_entries(&top); - sigaction(SIGWINCH, &act, NULL); + signal(SIGWINCH, winch_sig); } status = __cmd_top(&top); From a7eec4c677fe60c8760fa9054b578c743ff6a3ec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 11:53:21 +0100 Subject: [PATCH 81/83] perf top: Fix crash when annotating symbol Ravi reported crash in perf top --stdio when annotating a function [1]. The issue was, that we don't pass evsel pointer into symbol__annotate() function, which got over looked in the last annotation changes. [1] https://marc.info/?l=linux-kernel&m=151060884412702&w=2 Committer note: This fixes the crash, but makes it stumble into another bug, double locking the annotation data structures, that is in turn fixed by the next patch in this series. Signed-off-by: Jiri Olsa Tested-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6eol035redpoqvxqnuiqudtc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a29a98334f330..0077724fb24fe 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -99,6 +99,7 @@ static void perf_top__resize(struct perf_top *top) static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) { + struct perf_evsel *evsel = hists_to_evsel(he->hists); struct symbol *sym; struct annotation *notes; struct map *map; @@ -137,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, NULL, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; From 9e4e0a9d2ef37c7bc60c32e2a3189bd1f04067a5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 12:05:59 +0100 Subject: [PATCH 82/83] perf tools: Change (symbol|annotation)__calc_percent return type to void There's no need for symbol__calc_percent and annotation__calc_percent functions to return any value, since it's always zero. Changing both function to return void. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-z0gs28hh24m4gia1t1ctraye@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- tools/perf/util/annotate.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30d74dabdc426..846abb4955acc 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1584,8 +1584,8 @@ static void calc_percent(struct sym_hist *hist, } } -static int annotation__calc_percent(struct annotation *notes, - struct perf_evsel *evsel, s64 len) +static void annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) { struct annotation_line *al, *next; @@ -1609,15 +1609,13 @@ static int annotation__calc_percent(struct annotation *notes, calc_percent(hist, sample, al->offset, end); } } - - return 0; } -int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - return annotation__calc_percent(notes, evsel, symbol__size(sym)); + annotation__calc_percent(notes, evsel, symbol__size(sym)); } int symbol__annotate(struct symbol *sym, struct map *map, @@ -1656,10 +1654,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, } err = symbol__disassemble(sym, &args); - if (err) - return err; + if (!err) + symbol__calc_percent(sym, evsel); + + return err; - return symbol__calc_percent(sym, evsel); } static void insert_source_line(struct rb_root *root, struct annotation_line *al) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 4fc805a271d2d..6d7289e88fa37 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,7 +107,7 @@ struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; From 05d3f1a1d5a3d37ca4b591d5524f5a5b159d0564 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 12:20:08 +0100 Subject: [PATCH 83/83] perf tools: Move symbol__calc_percent() call to outside symbol__disassemble() We need to call symbol__calc_percent() periodicaly for top, so it's no longer convenient to keep it in symbol__disassemble(). Let's separate the symbol__disassemble() to allocate and init the symbol annotation structs and symbol__calc_percent() to compute the lines percentages based on symbol hists data. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-gtnp8t4tb00q6lag07psn5nq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 ++ tools/perf/ui/gtk/annotate.c | 2 ++ tools/perf/util/annotate.c | 9 +++------ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5a2f37a91feb2..03b7363a49c9a 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1126,6 +1126,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, goto out_free_offsets; } + symbol__calc_percent(sym, evsel); + ui_helpline__push("Press ESC to exit"); notes = symbol__annotation(sym); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 5e0a56df0b4c7..cdb5ecf91666b 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -177,6 +177,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, return -1; } + symbol__calc_percent(sym, evsel); + if (perf_gtk__is_active_context(pgctx)) { window = pgctx->main_window; notebook = pgctx->notebook; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 846abb4955acc..22ea7936d92f4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1653,12 +1653,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - err = symbol__disassemble(sym, &args); - if (!err) - symbol__calc_percent(sym, evsel); - - return err; - + return symbol__disassemble(sym, &args); } static void insert_source_line(struct rb_root *root, struct annotation_line *al) @@ -2005,6 +2000,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; + symbol__calc_percent(sym, evsel); + if (print_lines) { srcline_full_filename = full_paths; symbol__calc_lines(sym, map, &source_line);