From 6703c9771d83ebe092b0d49cb0609a3f9d8b4ff7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jan 2018 14:59:21 -0300 Subject: [PATCH 01/27] perf test bpf: Improve message about expected samples When failing on one of the BPF tests we were just stating: BPF filter result incorrect Add some more info to help figuring out the problem: BPF filter result incorrect, expected 56, got 0 samples This came out while investigating this failure, first seen after updating the kernel to the 4.15.0-rc6 tag: [root@jouet ~]# perf test bpf 39: BPF filter : 39.1: Basic BPF filtering : FAILED! 39.2: BPF pinning : Skip 39.3: BPF prologue generation: Skip 39.4: BPF relocation checker : Skip [root@jouet ~]# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-403npu7daupv6b2bmxliv5pk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index c433dd30975a3..057c6b8fdb53e 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -190,7 +190,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } if (count != expect) { - pr_debug("BPF filter result incorrect\n"); + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); goto out_delete_evlist; } From 13cb2d0f513cf35bf74484a392f745f9e9c5a3f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jan 2018 15:18:07 -0300 Subject: [PATCH 02/27] perf test bpf: Use designated struct field initializers To follow standard practice in the kernel sources, documenting the initialization better and helping quickly finding the value for some field in a struct with many entries. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-syn3hz9hz7ukxlxbx5x6hv20@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 59 +++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 057c6b8fdb53e..0512f1b5bfdba 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -63,46 +63,41 @@ static struct { bool pin; } bpf_testcase_table[] = { { - LLVM_TESTCASE_BASE, - "Basic BPF filtering", - "[basic_bpf_test]", - "fix 'perf test LLVM' first", - "load bpf object failed", - &epoll_wait_loop, - (NR_ITERS + 1) / 2, - false, + .prog_id = LLVM_TESTCASE_BASE, + .desc = "Basic BPF filtering", + .name = "[basic_bpf_test]", + .msg_compile_fail = "fix 'perf test LLVM' first", + .msg_load_fail = "load bpf object failed", + .target_func = &epoll_wait_loop, + .expect_result = (NR_ITERS + 1) / 2, }, { - LLVM_TESTCASE_BASE, - "BPF pinning", - "[bpf_pinning]", - "fix kbuild first", - "check your vmlinux setting?", - &epoll_wait_loop, - (NR_ITERS + 1) / 2, - true, + .prog_id = LLVM_TESTCASE_BASE, + .desc = "BPF pinning", + .name = "[bpf_pinning]", + .msg_compile_fail = "fix kbuild first", + .msg_load_fail = "check your vmlinux setting?", + .target_func = &epoll_wait_loop, + .expect_result = (NR_ITERS + 1) / 2, + .pin = true, }, #ifdef HAVE_BPF_PROLOGUE { - LLVM_TESTCASE_BPF_PROLOGUE, - "BPF prologue generation", - "[bpf_prologue_test]", - "fix kbuild first", - "check your vmlinux setting?", - &llseek_loop, - (NR_ITERS + 1) / 4, - false, + .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, + .desc = "BPF prologue generation", + .name = "[bpf_prologue_test]", + .msg_compile_fail = "fix kbuild first", + .msg_load_fail = "check your vmlinux setting?", + .target_func = &llseek_loop, + .expect_result = (NR_ITERS + 1) / 4, }, #endif { - LLVM_TESTCASE_BPF_RELOCATION, - "BPF relocation checker", - "[bpf_relocation_test]", - "fix 'perf test LLVM' first", - "libbpf error when dealing with relocation", - NULL, - 0, - false, + .prog_id = LLVM_TESTCASE_BPF_RELOCATION, + .desc = "BPF relocation checker", + .name = "[bpf_relocation_test]", + .msg_compile_fail = "fix 'perf test LLVM' first", + .msg_load_fail = "libbpf error when dealing with relocation", }, }; From e0337f4f9aff60a19079b0f224136bb03877db58 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 4 Jan 2018 12:43:32 -0300 Subject: [PATCH 03/27] perf test bpf: Hook on epoll_pwait() The 'perf test bpf' was hooking a eBPF program on the SyS_epoll_wait() kernel function, that was what the epoll_wait() glibc function ended up calling, but since at least glibc 2.26, the one that comes with, for instance, Fedora 27, glibc ends up calling SyS_epoll_pwait() when epoll_wait() is used, causing this 'perf test' entry to fail. So switch to using epoll_pwait() and hook the eBPF program to the SyS_epoll_pwait() kernel function to make it work on a wider range of glibc and kernel versions. Tested-by: Wang Nan Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-zynvquy63er8s5mrgsz65pto@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-example.c | 4 ++-- tools/perf/tests/bpf.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 268e5f8e4aa27..e4123c1b0e883 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,8 +31,8 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=SyS_epoll_wait") -int bpf_func__SyS_epoll_wait(void *ctx) +SEC("func=SyS_epoll_pwait") +int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; int *flag = bpf_map_lookup_elem(&flip_table, &ind); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 0512f1b5bfdba..8e709c9d512c4 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -19,13 +19,13 @@ #ifdef HAVE_LIBBPF_SUPPORT -static int epoll_wait_loop(void) +static int epoll_pwait_loop(void) { int i; /* Should fail NR_ITERS times */ for (i = 0; i < NR_ITERS; i++) - epoll_wait(-(i + 1), NULL, 0, 0); + epoll_pwait(-(i + 1), NULL, 0, 0, NULL); return 0; } @@ -68,7 +68,7 @@ static struct { .name = "[basic_bpf_test]", .msg_compile_fail = "fix 'perf test LLVM' first", .msg_load_fail = "load bpf object failed", - .target_func = &epoll_wait_loop, + .target_func = &epoll_pwait_loop, .expect_result = (NR_ITERS + 1) / 2, }, { @@ -77,7 +77,7 @@ static struct { .name = "[bpf_pinning]", .msg_compile_fail = "fix kbuild first", .msg_load_fail = "check your vmlinux setting?", - .target_func = &epoll_wait_loop, + .target_func = &epoll_pwait_loop, .expect_result = (NR_ITERS + 1) / 2, .pin = true, }, From 44df1afdb174fd6038e419f80efd914c0b5f2f85 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Dec 2017 01:50:40 +0000 Subject: [PATCH 04/27] perf tools: Fix compile error with libunwind x86 Fix a compile error: ... CC util/libunwind/x86_32.o In file included from util/libunwind/x86_32.c:33:0: util/libunwind/../../arch/x86/util/unwind-libunwind.c: In function 'libunwind__x86_reg_id': util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: error: 'EINVAL' undeclared (first use in this function) return -EINVAL; ^ util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: note: each undeclared identifier is reported only once for each function it appears in mv: cannot stat 'util/libunwind/.x86_32.o.tmp': No such file or directory make[4]: *** [util/libunwind/x86_32.o] Error 1 make[3]: *** [util] Error 2 make[2]: *** [libperf-in.o] Error 2 make[1]: *** [sub-make] Error 2 make: *** [all] Error 2 It happens when libunwind-x86 feature is detected. Signed-off-by: Wang Nan Link: http://lkml.kernel.org/r/20171206015040.114574-1-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 9c917f80c9065..05920e3edf7a7 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef REMOTE_UNWIND_LIBUNWIND #include +#ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" #include "../../util/unwind.h" From 935f5a9d4500020879858c9224c98dfabf16101d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sat, 30 Dec 2017 00:26:52 +0800 Subject: [PATCH 05/27] perf report: Fix a wrong offset issue when using /proc/kcore When a valid vmlinux is not found, 'perf report' falls back to look at /proc/kcore. In this case, it will report the impossible large offset. For example: # perf record -b -e cycles:k find /etc/ > /dev/null # perf report --stdio --branch-history 22.77% _vm_normal_page+18446603336221188162 | ---page_remove_rmap +18446603336221188324 page_remove_rmap +18446603336221188487 (cycles:5) unlock_page_memcg +18446603336221188096 page_remove_rmap +18446603336221188327 (cycles:1) The issue is the value which is passed to parameter 'addr' in __get_srcline() is the objdump address. It's not correct if we calculate the offset by using 'addr - sym->start'. This patch creates a new parameter 'ip' in __get_srcline(). It is not converted to objdump address. With this patch, the perf report output is: 22.77% _vm_normal_page+66 | ---page_remove_rmap +228 page_remove_rmap +391 (cycles:5) unlock_page_memcg +0 page_remove_rmap +231 (cycles:1) page_remove_rmap +236 Committer testing: Make sure you get any valid vmlinux out of the way, using '-v' on the 'perf report' case and deleting it from places where perf searches them, like your kernel build dir and the build-id cache, in ~/.debug/. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1514564812-17344-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- tools/perf/util/machine.c | 2 +- tools/perf/util/map.c | 2 +- tools/perf/util/sort.c | 16 ++++++++++------ tools/perf/util/srcline.c | 9 +++++---- tools/perf/util/srcline.h | 5 +++-- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 68e687d1bf99a..28b233c3dcbe3 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1960,7 +1960,8 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); + al->path = get_srcline(map->dso, start + al->offset, NULL, + false, true, start + al->offset); insert_source_line(&tmp_root, al); } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 64d255f6a5374..b05a67464c03c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1726,7 +1726,7 @@ static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) bool show_addr = callchain_param.key == CCKEY_ADDRESS; srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), - sym, show_sym, show_addr); + sym, show_sym, show_addr, ip); srcline__tree_insert(&map->dso->srclines, ip, srcline); } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6d40efd744027..8fe57031e1a85 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -419,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, map__rip_2objdump(map, addr), NULL, - true, true); + true, true, addr); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a00eacdf02ed9..211e7f326b9f2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -336,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true); + he->ms.sym, true, true, he->ip); } static int64_t @@ -380,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->from.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->from.al_addr); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -391,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->from.al_addr), right->branch_info->from.sym, - true, true); + true, true, + right->branch_info->from.al_addr); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -423,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->to.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->to.al_addr); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -434,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->to.al_addr), right->branch_info->to.sym, - true, true); + true, true, + right->branch_info->to.al_addr); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -465,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true, true); + e->ms.sym, false, true, true, e->ip); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index d19f05c56de61..3c21fd059b649 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -496,7 +496,8 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip) { char *file = NULL; unsigned line = 0; @@ -536,7 +537,7 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", - addr - sym->start) < 0) + ip - sym->start) < 0) return SRCLINE_UNKNOWN; } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; @@ -550,9 +551,9 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr) + bool show_sym, bool show_addr, u64 ip) { - return __get_srcline(dso, addr, sym, show_sym, show_addr, false); + return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip); } struct srcline_node { diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 847b7086182c1..b2bb5502fd628 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -11,9 +11,10 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr); + bool show_sym, bool show_addr, u64 ip); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines); + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip); void free_srcline(char *srcline); /* insert the srcline into the DSO, which will take ownership */ From 40c39e3046411f84bab82f66783ff3593e2bcd9b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 26 Dec 2017 18:42:43 +0800 Subject: [PATCH 06/27] perf report: Fix a no annotate browser displayed issue When enabling '-b' option in perf record, for example, perf record -b ... perf report and then browsing the annotate browser from perf report (press 'A'), it would fail (annotate browser can't be displayed). It's because the '.add_entry_cb' op of struct report is overwritten by hist_iter__branch_callback() in builtin-report.c. But this function doesn't do something like mapping symbols and sources. So next, do_annotate() will return directly. notes = symbol__annotation(act->ms.sym); if (!notes->src) return 0; This patch adds the lost code to hist_iter__branch_callback (refer to hist_iter__report_callback). v2: Fix a crash bug when perform 'perf report --stdio'. The reason is that we init the symbol annotation only in browser mode, it doesn't allocate/init resources for stdio mode. So now in hist_iter__branch_callback(), it will return directly if it's not in browser mode. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1514284963-18587-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index eb9ce6327e71b..07827cd514803 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -162,12 +162,28 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct report *rep = arg; struct branch_info *bi; + struct perf_sample *sample = iter->sample; + struct perf_evsel *evsel = iter->evsel; + int err; + + if (!ui__has_annotation()) + return 0; + + hist__account_cycles(sample->branch_stack, al, sample, + rep->nonany_branch_mode); bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + branch_type_count(&rep->brtype_stat, &bi->flags, bi->from.addr, bi->to.addr); - return 0; +out: + return err; } static int process_sample_event(struct perf_tool *tool, From 6011518db3bd04c80cd3ce3e6aea1c399739adb4 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:41 +0800 Subject: [PATCH 07/27] perf header: Add infrastructure to record first and last sample time perf report/script/... have a --time option to limit the time range of output. That's very useful to slice large traces, e.g. when processing the output of perf script for some analysis. But right now --time only supports absolute time. Also there is no fast way to get the start/end times of a given trace except for looking at it. This makes it hard to e.g. only decode the first half of the trace, which is useful for parallelization of scripts Another problem is that perf records are variable size and there is no synchronization mechanism. So the only way to find the last sample reliably would be to walk all samples. But we want to avoid that in perf report/... because it is already quite expensive. That is why storing the first sample time and last sample time in perf record is better. This patch creates a new header feature type HEADER_SAMPLE_TIME and related ops. Save the first sample time and the last sample time to the feature section in perf file header. That will be done when, for instance, processing build-ids, where we already have to process all samples to create the build-id table, take advantage of that to further amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf report/script' faster when using --time. Committer testing: After this patch is applied the header is written with zeroes, we need the next patch, for "perf record" to actually write the timestamps: # perf report -D | grep PERF_RECORD_SAMPLE\( 22501155244406 0x44f0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21be8c5 period: 1 addr: 0 22501155793625 0x4a30 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21ffd50 period: 2828043 addr: 0 # perf report --header | grep "time of " # time of first sample : 0.000000 # time of last sample : 0.000000 # Changelog: v7: 1. Rebase to latest perf/core branch. 2. Add following clarification in patch description according to Arnaldo's suggestion. "That will be done when, for instance, processing build-ids, where we already have to process all samples to create the build-id table, take advantage of that to further amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf report/script' faster when using --time." v4: Use perf script time style for timestamp printing. Also add with the printing of sample duration. v3: Remove the definitions of first_sample_time/last_sample_time from perf_session. Just define them in perf_evlist Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 4 ++ tools/perf/util/evlist.h | 2 + tools/perf/util/header.c | 60 +++++++++++++++++++ tools/perf/util/header.h | 1 + 4 files changed, 67 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 15e8b48077ba3..f7d85e89a98a6 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -261,6 +261,10 @@ struct { struct perf_header_string map; }[number_of_cache_levels]; + HEADER_SAMPLE_TIME = 21, + +Two uint64_t for the time of first sample and the time of last sample. + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 75160666d3051..e7fbca69cbac4 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -50,6 +50,8 @@ struct perf_evlist { struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + u64 first_sample_time; + u64 last_sample_time; }; struct perf_evsel_str_handler { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ca73aa7be7082..a326e0d8b5b6b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "evlist.h" #include "evsel.h" @@ -35,6 +36,7 @@ #include #include "asm/bug.h" #include "tool.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -1180,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused, return 0; } +static int write_sample_time(struct feat_fd *ff, + struct perf_evlist *evlist) +{ + int ret; + + ret = do_write(ff, &evlist->first_sample_time, + sizeof(evlist->first_sample_time)); + if (ret < 0) + return ret; + + return do_write(ff, &evlist->last_sample_time, + sizeof(evlist->last_sample_time)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1505,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) } } +static void print_sample_time(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + char time_buf[32]; + double d; + + session = container_of(ff->ph, struct perf_session, header); + + timestamp__scnprintf_usec(session->evlist->first_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of first sample : %s\n", time_buf); + + timestamp__scnprintf_usec(session->evlist->last_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of last sample : %s\n", time_buf); + + d = (double)(session->evlist->last_sample_time - + session->evlist->first_sample_time) / NSEC_PER_MSEC; + + fprintf(fp, "# sample duration : %10.3f ms\n", d); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -2146,6 +2184,27 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) return -1; } +static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_session *session; + u64 first_sample_time, last_sample_time; + int ret; + + session = container_of(ff->ph, struct perf_session, header); + + ret = do_read_u64(ff, &first_sample_time); + if (ret) + return -1; + + ret = do_read_u64(ff, &last_sample_time); + if (ret) + return -1; + + session->evlist->first_sample_time = first_sample_time; + session->evlist->last_sample_time = last_sample_time; + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2203,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), + FEAT_OPR(SAMPLE_TIME, sample_time, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 317fb901e47fc..f28aaaa3a4405 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -35,6 +35,7 @@ enum { HEADER_AUXTRACE, HEADER_STAT, HEADER_CACHE, + HEADER_SAMPLE_TIME, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; From 68588baf8d01826673f2874f434123029e519052 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:42 +0800 Subject: [PATCH 08/27] perf record: Record the first and last sample time in the header In the default 'perf record' configuration, all samples are processed, to create the HEADER_BUILD_ID table. So it's very easy to get the first/last samples and save the time to perf file header via the function write_sample_time(). Later, at post processing time, perf report/script will fetch the time from perf file header. Committer testing: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.099 MB perf.data (1101 samples) ] [root@jouet home]# perf report --header | grep "time of " # time of first sample : 22947.909226 # time of last sample : 22948.910704 # # perf report -D | grep PERF_RECORD_SAMPLE\( 0 22947909226101 0x20bb68 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa21b1af3 period: 1 addr: 0 0 22947909229928 0x20bb98 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa200d204 period: 1 addr: 0 3 22948910397351 0x219360 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 28251/28251: 0xffffffffa22071d8 period: 169518 addr: 0 0 22948910652380 0x20f120 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 198807 addr: 0 2 22948910704034 0x2172d0 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 88111 addr: 0 # Changelog: v7: Just update the patch description according to Arnaldo's suggestion. v6: Currently '--buildid-all' is not enabled at default. So the walking on all samples is the default operation. There is no big overhead to calculate the timestamp boundary in process_sample_event handler once we already go through all samples. So the timestamp boundary calculation is enabled by default when '--buildid-all' is not enabled. While if '--buildid-all' is enabled, we creates a new option "--timestamp-boundary" for user to decide if it enables the timestamp boundary calculation. v5: There is an issue that the sample walking can only work when '--buildid-all' is not enabled. So we need to let the walking be able to work even if '--buildid-all' is enabled and let the processing skips the dso hit marking for this case. At first, I want to provide a new option "--record-time-boundaries". While after consideration, I think a new option is not very necessary. v3: Remove the definitions of first_sample_time and last_sample_time from struct record and directly save them in perf_evlist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5a626ef666c29..3eea6de35a38d 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -430,6 +430,9 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. +--timestamp-boundary:: +Record timestamp boundary (time of first/last samples). + --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 50385d89c4970..65681a1a292a5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -78,6 +78,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; + bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; }; @@ -409,8 +410,15 @@ static int process_sample_event(struct perf_tool *tool, { struct record *rec = container_of(tool, struct record, tool); - rec->samples++; + if (rec->evlist->first_sample_time == 0) + rec->evlist->first_sample_time = sample->time; + + rec->evlist->last_sample_time = sample->time; + if (rec->buildid_all) + return 0; + + rec->samples++; return build_id__mark_dso_hit(tool, event, sample, evsel, machine); } @@ -435,9 +443,11 @@ static int process_buildids(struct record *rec) /* * If --buildid-all is given, it marks all DSO regardless of hits, - * so no need to process samples. + * so no need to process samples. But if timestamp_boundary is enabled, + * it still needs to walk on all samples to get the timestamps of + * first/last samples. */ - if (rec->buildid_all) + if (rec->buildid_all && !rec->timestamp_boundary) rec->tool.sample = NULL; return perf_session__process_events(session); @@ -1621,6 +1631,8 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), + OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, + "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, &record.switch_output.set, "signal,size,time", "Switch output when receive SIGUSR2 or cross size,time threshold", From 13a70f350665580708ab11f725d3578eaacbf2d0 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:43 +0800 Subject: [PATCH 09/27] perf tools: Create function to parse time percent Current perf report/script/... have a --time option to limit the time range of output. But right now it only supports absolute time, add support for time percentage. For example: 1. Select the second 10% time slice perf report --time 10%/2 2. Select from 0% to 10% time slice perf report --time 0%-10% It also support the multiple time ranges. 3. Select the first and second 10% time slices perf report --time 10%/1,10%/2 4. Select from 0% to 10% and 30% to 40% slices perf report --time 0%-10%,30%-40% Changelog: v4: An issue is found. Following passes. perf script --time 10%/10x12321xsdfdasfdsafdsafdsa Now it uses strtol to replace atoi. Committer notes: This just puts in place the infrastructure, so the examples in this cset comment will only work later, after more patches in this series are applied. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 205 +++++++++++++++++++++++++++++++++-- tools/perf/util/time-utils.h | 3 + 2 files changed, 196 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 81927d0274176..61c46022de0bf 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "perf.h" #include "debug.h" @@ -60,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime, return 0; } -int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +static int split_start_end(char **start, char **end, const char *ostr, char ch) { char *start_str, *end_str; char *d, *str; - int rc = 0; if (ostr == NULL || *ostr == '\0') return 0; @@ -74,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) if (str == NULL) return -ENOMEM; - ptime->start = 0; - ptime->end = 0; - - /* str has the format: , - * variations: , - * , - * , - */ start_str = str; - d = strchr(start_str, ','); + d = strchr(start_str, ch); if (d) { *d = '\0'; ++d; } end_str = d; + *start = start_str; + *end = end_str; + + return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ + char *start_str = NULL, *end_str; + int rc; + + rc = split_start_end(&start_str, &end_str, ostr, ','); + if (rc || !start_str) + return rc; + + ptime->start = 0; + ptime->end = 0; + rc = parse_timestr_sec_nsec(ptime, start_str, end_str); - free(str); + free(start_str); /* make sure end time is after start time if it was given */ if (rc == 0 && ptime->end && ptime->end < ptime->start) @@ -104,6 +114,177 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int parse_percent(double *pcnt, char *str) +{ + char *c; + + c = strchr(str, '%'); + if (c) + *c = '\0'; + else + return -1; + + *pcnt = atof(str) / 100.0; + + return 0; +} + +static int percent_slash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *p, *end_str; + double pcnt, start_pcnt, end_pcnt; + u64 total = end - start; + int i; + + /* + * Example: + * 10%/2: select the second 10% slice and the third 10% slice + */ + + /* We can modify this string since the original one is copied */ + p = strchr(str, '/'); + if (!p) + return -1; + + *p = '\0'; + if (parse_percent(&pcnt, str) < 0) + return -1; + + p++; + i = (int)strtol(p, &end_str, 10); + if (*end_str) + return -1; + + if (pcnt <= 0.0) + return -1; + + start_pcnt = pcnt * (i - 1); + end_pcnt = pcnt * i; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +static int percent_dash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *start_str = NULL, *end_str; + double start_pcnt, end_pcnt; + u64 total = end - start; + int ret; + + /* + * Example: 0%-10% + */ + + ret = split_start_end(&start_str, &end_str, str, '-'); + if (ret || !start_str) + return ret; + + if ((parse_percent(&start_pcnt, start_str) != 0) || + (parse_percent(&end_pcnt, end_str) != 0)) { + free(start_str); + return -1; + } + + free(start_str); + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0 || + start_pcnt > end_pcnt) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +typedef int (*time_pecent_split)(char *, struct perf_time_interval *, + u64 start, u64 end); + +static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end, + time_pecent_split func) +{ + char *str, *p1, *p2; + int len, ret, i = 0; + + str = strdup(ostr); + if (str == NULL) + return -ENOMEM; + + len = strlen(str); + p1 = str; + + while (p1 < str + len) { + if (i >= num) { + free(str); + return -1; + } + + p2 = strchr(p1, ','); + if (p2) + *p2 = '\0'; + + ret = (func)(p1, &ptime_buf[i], start, end); + if (ret < 0) { + free(str); + return -1; + } + + pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); + pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); + + i++; + + if (p2) + p1 = p2 + 1; + else + break; + } + + free(str); + return i; +} + +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end) +{ + char *c; + + /* + * ostr example: + * 10%/2,10%/3: select the second 10% slice and the third 10% slice + * 0%-10%,30%-40%: multiple time range + */ + + memset(ptime_buf, 0, sizeof(*ptime_buf) * num); + + c = strchr(ostr, '/'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_slash_split); + } + + c = strchr(ostr, '-'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_dash_split); + } + + return -1; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 15b475c50ccf7..23087231785ad 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -13,6 +13,9 @@ int parse_nsec_time(const char *str, u64 *ptime); int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); From 9a9b8b4b2271e763c1600311a3d4ecc2ac359b55 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:44 +0800 Subject: [PATCH 10/27] perf tools: Create function to perform multiple time range checking Previous patch supports the multiple time range. For example, select the first and second 10% time slices. perf report --time 10%/1,10%/2 We need a function to check if a timestamp is in the ranges of [0, 10%) and [10%, 20%]. Note that it includes the last element in [10%, 20%] but it doesn't include the last element in [0, 10%). It's to avoid the overlap. This patch implments a new function perf_time__ranges_skip_sample for this checking. Change log: v4: Let perf_time__ranges_skip_sample be compatible with perf_time__skip_sample when only one time range. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/time-utils.h | 3 +++ 2 files changed, 31 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 61c46022de0bf..3f7f18f06982e 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -300,6 +300,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp) +{ + struct perf_time_interval *ptime; + int i; + + if ((timestamp == 0) || (num == 0)) + return false; + + if (num == 1) + return perf_time__skip_sample(&ptime_buf[0], timestamp); + + /* + * start/end of multiple time ranges must be valid. + */ + for (i = 0; i < num; i++) { + ptime = &ptime_buf[i]; + + if (timestamp >= ptime->start && + ((timestamp < ptime->end && i < num - 1) || + (timestamp <= ptime->end && i == num - 1))) { + break; + } + } + + return (i == num) ? true : false; +} + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { u64 sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 23087231785ad..34d5eba26bf5b 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -18,6 +18,9 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp); + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); From 5b969bc766807e5c2f184d1d6f97b8471de946f1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:45 +0800 Subject: [PATCH 11/27] perf report: Support time percent and multiple time ranges perf report has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf report --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf report --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-6-git-send-email-yao.jin@linux.intel.com [ Add missing colons at end of examples in the man page ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 20 +++++++++++++++ tools/perf/builtin-report.c | 31 ++++++++++++++++++++---- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ddde2b54af572..1e02c4e1a81f0 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -402,6 +402,26 @@ OPTIONS stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multiple time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice: + + perf report --time 10%/2 + + Select from 0% to 10% time slice: + + perf report --time 0%-10% + + Select the first and second 10% time slices: + + perf report --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices: + + perf report --time 0%-10%,30%-40% + --itrace:: Options for decoding instruction tracing data. The options are: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 07827cd514803..770bf8a614f22 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,8 @@ #include #include +#define PTIME_RANGE_MAX 10 + struct report { struct perf_tool tool; struct perf_session *session; @@ -69,7 +71,8 @@ struct report { const char *cpu_list; const char *symbol_filter_str; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; float min_percent; u64 nr_entries; u64 queue_size; @@ -202,8 +205,10 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_time__skip_sample(&rep->ptime, sample->time)) + if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num, + sample->time)) { return 0; + } if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", @@ -1093,9 +1098,25 @@ int cmd_report(int argc, const char **argv) if (symbol__init(&session->header.env) < 0) goto error; - if (perf_time__parse_str(&report.ptime, report.time_str) != 0) { - pr_err("Invalid time string\n"); - return -EINVAL; + if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + return -EINVAL; + } + + report.range_num = perf_time__percent_parse_str( + report.ptime_range, PTIME_RANGE_MAX, + report.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (report.range_num < 0) { + pr_err("Invalid time string\n"); + return -EINVAL; + } + } else { + report.range_num = 1; } sort__setup_elide(stdout); From 2ab046cd01e33a854798a3e245c9e3f32b950a7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:46 +0800 Subject: [PATCH 12/27] perf script: Support time percent and multiple time ranges perf script has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf script --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf script --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 16 +++++++++++ tools/perf/builtin-script.c | 34 +++++++++++++++++++----- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 974ceb12c7f3e..7b622a812a725 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -329,6 +329,22 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multipe time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice + perf script --time 10%/2 + + Select from 0% to 10% time slice + perf script --time 0%-10% + + Select the first and second 10% time slices + perf script --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices + perf script --time 0%-10%,30%-40% + --max-blocks:: Set the maximum number of program blocks to print with brstackasm for each sample. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 77e47cf39f2ca..330dcd9b9b8ff 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1436,6 +1436,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } +#define PTIME_RANGE_MAX 10 + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1449,7 +1451,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; }; static int perf_evlist__max_name_len(struct perf_evlist *evlist) @@ -1734,8 +1737,10 @@ static int process_sample_event(struct perf_tool *tool, struct perf_script *scr = container_of(tool, struct perf_script, tool); struct addr_location al; - if (perf_time__skip_sample(&scr->ptime, sample->time)) + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { return 0; + } if (debug_mode) { if (sample->time < last_timestamp) { @@ -3360,10 +3365,27 @@ int cmd_script(int argc, const char **argv) goto out_delete; /* needs to be parsed after looking up reference time */ - if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { - pr_err("Invalid time string\n"); - err = -EINVAL; - goto out_delete; + if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + err = -EINVAL; + goto out_delete; + } + + script.range_num = perf_time__percent_parse_str( + script.ptime_range, PTIME_RANGE_MAX, + script.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (script.range_num < 0) { + pr_err("Invalid time string\n"); + err = -EINVAL; + goto out_delete; + } + } else { + script.range_num = 1; } err = __cmd_script(&script); From 24787afbcd0127859394eb9230659ee6d5dc4644 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:45 +0100 Subject: [PATCH 13/27] perf tools: Enable LIBBABELTRACE by default There's no reason anymore to treat babel trace in a special way, because a) we no longer display its state b) the needed babeltrace library is now out and well adopted among distros. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/Makefile.perf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f050f38d8fa3a..12dec6ea5ed21 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -780,7 +780,7 @@ else NO_PERF_READ_VDSOX32 := 1 endif -ifdef LIBBABELTRACE +ifndef NO_LIBBABELTRACE $(call feature_check,libbabeltrace) ifeq ($(feature-libbabeltrace), 1) CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 68cf1360a3f39..9fdefd748e2e3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -77,7 +77,7 @@ include ../scripts/utilities.mak # # Define NO_ZLIB if you do not want to support compressed kernel modules # -# Define LIBBABELTRACE if you DO want libbabeltrace support +# Define NO_LIBBABELTRACE if you do not want libbabeltrace support # for CTF data format. # # Define NO_LZMA if you do not want to support compressed (xz) kernel modules From db9fc765e8f4d0144d13cdfa4be32d81eae01710 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:46 +0100 Subject: [PATCH 14/27] perf tools: Display perf_event_attr::namespaces debug info Display namespaces bit in -vv debug display: $ perf record -vv --namespaces ... ... perf_event_attr: size 112 ... namespaces 1 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a4d256ea0dc42..c435b2444153e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1577,6 +1577,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); From 313ccb96159489eabdbdcf4deb34e7fbac17557d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:47 +0100 Subject: [PATCH 15/27] perf: Allocate context task_ctx_data for child event Currently we use perf_event_context::task_ctx_data to save and restore the LBR status when the task is scheduled out and in. We don't allocate it for child contexts, which results in shorter task's LBR stack, because we don't save the history from previous run and start over every time we schedule the task in. I made a test to generate samples with LBR call stack and got higher numbers on bigger chain depths: before: after: LBR call chain: nr: 1 60561 498127 LBR call chain: nr: 2 0 0 LBR call chain: nr: 3 107030 2172 LBR call chain: nr: 4 466685 62758 LBR call chain: nr: 5 2307319 878046 LBR call chain: nr: 6 48713 495218 LBR call chain: nr: 7 1040 4551 LBR call chain: nr: 8 481 172 LBR call chain: nr: 9 878 120 LBR call chain: nr: 10 2377 6698 LBR call chain: nr: 11 28830 151487 LBR call chain: nr: 12 29347 339867 LBR call chain: nr: 13 4 22 LBR call chain: nr: 14 3 53 Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Fixes: 4af57ef28c2c ("perf: Add pmu specific data for perf task context") Link: http://lkml.kernel.org/r/20180107160356.28203-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4df5b695bf0db..55fb648a32b07 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10703,6 +10703,19 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; + + if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) && + !child_ctx->task_ctx_data) { + struct pmu *pmu = child_event->pmu; + + child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size, + GFP_KERNEL); + if (!child_ctx->task_ctx_data) { + free_event(child_event); + return NULL; + } + } + /* * is_orphaned_event() and list_add_tail(&parent_event->child_list) * must be under the same lock in order to serialize against @@ -10713,6 +10726,7 @@ inherit_event(struct perf_event *parent_event, if (is_orphaned_event(parent_event) || !atomic_long_inc_not_zero(&parent_event->refcount)) { mutex_unlock(&parent_event->child_mutex); + /* task_ctx_data is freed with child_ctx */ free_event(child_event); return NULL; } From 81df978c49379481716aef591de77313c286d747 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:48 +0100 Subject: [PATCH 16/27] perf: Add sample_id to PERF_RECORD_ITRACE_START event comment Adding missing sample_id line into PERF_RECORD_ITRACE_START event comment. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-5-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 1 + tools/include/uapi/linux/perf_event.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b9a4953018ede..8bb66e8da945b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b9a4953018ede..8bb66e8da945b 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, From 8cf7e0e22414f5acf85ecb7cd0d4482e6c9696ae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:49 +0100 Subject: [PATCH 17/27] perf: Make perf_callchain function static And move it to core.c, because there's no caller of this function other than the one in core.c Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/callchain.c | 15 --------------- kernel/events/core.c | 16 ++++++++++++++++ kernel/events/internal.h | 4 ---- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 1b2be63c85282..772a43fea825c 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -178,21 +178,6 @@ put_callchain_entry(int rctx) put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); } -struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs) -{ - bool kernel = !event->attr.exclude_callchain_kernel; - bool user = !event->attr.exclude_callchain_user; - /* Disallow cross-task user callchains. */ - bool crosstask = event->ctx->task && event->ctx->task != current; - const u32 max_stack = event->attr.sample_max_stack; - - if (!kernel && !user) - return NULL; - - return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); -} - struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) diff --git a/kernel/events/core.c b/kernel/events/core.c index 55fb648a32b07..5fc1ded4b450d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5980,6 +5980,22 @@ static u64 perf_virt_to_phys(u64 virt) return phys_addr; } +static struct perf_callchain_entry * +perf_callchain(struct perf_event *event, struct pt_regs *regs) +{ + bool kernel = !event->attr.exclude_callchain_kernel; + bool user = !event->attr.exclude_callchain_user; + /* Disallow cross-task user callchains. */ + bool crosstask = event->ctx->task && event->ctx->task != current; + const u32 max_stack = event->attr.sample_max_stack; + + if (!kernel && !user) + return NULL; + + return get_perf_callchain(regs, 0, kernel, user, + max_stack, crosstask, true); +} + void perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09b1537ae06cd..6dc725a7e7bc9 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -201,10 +201,6 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) -/* Callchain handling */ -extern struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs); - static inline int get_recursion_context(int *recursion) { int rctx; From 99e818cc88889a2fa2f483b91b372c47b94b7c98 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:50 +0100 Subject: [PATCH 18/27] perf: Return empty callchain instead of NULL It simplifies the code a bit, because we dump the callchain Link: http://lkml.kernel.org/n/tip-uqp7qd6aif47g39glnbu95yl@git.kernel.org even if it's empty. With 'empty' callchain we can remove all the NULL-checking code paths. Original-patch-from: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180107160356.28203-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5fc1ded4b450d..4e1a1bf8d8670 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5815,19 +5815,11 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_read(handle, event); if (sample_type & PERF_SAMPLE_CALLCHAIN) { - if (data->callchain) { - int size = 1; - - if (data->callchain) - size += data->callchain->nr; - - size *= sizeof(u64); + int size = 1; - __output_copy(handle, data->callchain, size); - } else { - u64 nr = 0; - perf_output_put(handle, nr); - } + size += data->callchain->nr; + size *= sizeof(u64); + __output_copy(handle, data->callchain, size); } if (sample_type & PERF_SAMPLE_RAW) { @@ -5980,6 +5972,8 @@ static u64 perf_virt_to_phys(u64 virt) return phys_addr; } +static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; + static struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { @@ -5988,12 +5982,14 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) /* Disallow cross-task user callchains. */ bool crosstask = event->ctx->task && event->ctx->task != current; const u32 max_stack = event->attr.sample_max_stack; + struct perf_callchain_entry *callchain; if (!kernel && !user) - return NULL; + return &__empty_callchain; - return get_perf_callchain(regs, 0, kernel, user, - max_stack, crosstask, true); + callchain = get_perf_callchain(regs, 0, kernel, user, + max_stack, crosstask, true); + return callchain ?: &__empty_callchain; } void perf_prepare_sample(struct perf_event_header *header, @@ -6018,9 +6014,7 @@ void perf_prepare_sample(struct perf_event_header *header, int size = 1; data->callchain = perf_callchain(event, regs); - - if (data->callchain) - size += data->callchain->nr; + size += data->callchain->nr; header->size += size * sizeof(u64); } From 972c14884728bf5f69ec69cfb1beeec1a9cd29ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:51 +0100 Subject: [PATCH 19/27] perf: Update PERF_RECORD_MISC_* comment for perf_event_header::misc bit 13 The perf_event_header::misc bit 13 is shared on different events and next patch is adding yet another bit 13 user. Updating the comment to make it more structured and clear which events use bit 13. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180107160356.28203-8-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 9 ++++++--- tools/include/uapi/linux/perf_event.h | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8bb66e8da945b..c77c9a2ebbbbc 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 8bb66e8da945b..c77c9a2ebbbbc 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) From 28a0b39877f5ed64ae9fadf95dddb90999309dee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:52 +0100 Subject: [PATCH 20/27] perf script: Add support to display sample misc field Adding support to display sample misc field in form of letter for each bit: # perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... sched-messaging 1407 U 28690.636600: 325620 cycles ... sched-messaging 1414 K 28690.636608: 19473 cycles ... misc field __________/ The misc bits are assigned to following letters: PERF_RECORD_MISC_KERNEL K PERF_RECORD_MISC_USER U PERF_RECORD_MISC_HYPERVISOR H PERF_RECORD_MISC_GUEST_KERNEL G PERF_RECORD_MISC_GUEST_USER g PERF_RECORD_MISC_MMAP_DATA* M PERF_RECORD_MISC_COMM_EXEC E PERF_RECORD_MISC_SWITCH_OUT S Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 20 ++++++- tools/perf/builtin-script.c | 74 ++++++++++++++++++++---- tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 1 + 4 files changed, 84 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 7b622a812a725..93ae8d60e3d33 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -225,6 +225,24 @@ OPTIONS that the metric computed is averaged over the whole sampling period, not just for the sample point. + For sample events it's possible to display misc field with -F +misc option, + following letters are displayed for each bit: + + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + + $ perf script -F +misc ... + sched-messaging 1414 K 28690.636582: 4590 cycles ... + sched-messaging 1407 U 28690.636600: 325620 cycles ... + sched-messaging 1414 K 28690.636608: 19473 cycles ... + misc field ___________/ + -k:: --vmlinux=:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 330dcd9b9b8ff..bb603495cf4a5 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -93,6 +93,7 @@ enum perf_output_field { PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, PERF_OUTPUT_METRIC = 1U << 28, + PERF_OUTPUT_MISC = 1U << 29, }; struct output_option { @@ -128,6 +129,7 @@ struct output_option { {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, {.str = "metric", .field = PERF_OUTPUT_METRIC}, + {.str = "misc", .field = PERF_OUTPUT_MISC}, }; enum { @@ -594,7 +596,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample, static int perf_sample__fprintf_start(struct perf_sample *sample, struct thread *thread, - struct perf_evsel *evsel, FILE *fp) + struct perf_evsel *evsel, + u32 type, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; unsigned long secs; @@ -624,6 +627,47 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += fprintf(fp, "[%03d] ", sample->cpu); } + if (PRINT_FIELD(MISC)) { + int ret = 0; + + #define has(m) \ + (sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m + + if (has(KERNEL)) + ret += fprintf(fp, "K"); + if (has(USER)) + ret += fprintf(fp, "U"); + if (has(HYPERVISOR)) + ret += fprintf(fp, "H"); + if (has(GUEST_KERNEL)) + ret += fprintf(fp, "G"); + if (has(GUEST_USER)) + ret += fprintf(fp, "g"); + + switch (type) { + case PERF_RECORD_MMAP: + case PERF_RECORD_MMAP2: + if (has(MMAP_DATA)) + ret += fprintf(fp, "M"); + break; + case PERF_RECORD_COMM: + if (has(COMM_EXEC)) + ret += fprintf(fp, "E"); + break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + if (has(SWITCH_OUT)) + ret += fprintf(fp, "S"); + default: + break; + } + + #undef has + + ret += fprintf(fp, "%*s", 6 - ret, " "); + printed += ret; + } + if (PRINT_FIELD(TIME)) { nsecs = sample->time; secs = nsecs / NSEC_PER_SEC; @@ -1502,7 +1546,7 @@ static void script_print_metric(void *ctx, const char *color, if (!fmt) return; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); if (color) color_fprintf(mctx->fp, color, fmt, val); @@ -1516,7 +1560,7 @@ static void script_new_line(void *ctx) struct metric_ctx *mctx = ctx; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); } @@ -1584,7 +1628,8 @@ static void process_event(struct perf_script *script, ++es->samples; - perf_sample__fprintf_start(sample, thread, evsel, fp); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SAMPLE, fp); if (PRINT_FIELD(PERIOD)) fprintf(fp, "%10" PRIu64 " ", sample->period); @@ -1833,7 +1878,8 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_COMM, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1868,7 +1914,8 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1901,7 +1948,8 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); perf_event__fprintf(event, stdout); thread__put(thread); @@ -1930,7 +1978,8 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); perf_event__fprintf(event, stdout); if (perf_event__process_exit(tool, event, sample, machine) < 0) @@ -1965,7 +2014,8 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -1996,7 +2046,8 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -2022,7 +2073,8 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1ae95efbfb95d..e5fbd6dd1b01c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -205,6 +205,7 @@ struct perf_sample { u32 flags; u16 insn_len; u8 cpumode; + u16 misc; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c435b2444153e..d934f04e3110e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2042,6 +2042,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->misc = event->header.misc; data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; From 3d7c27b6dbca4c90e7d921b45c2240e7c3cb92a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:53 +0100 Subject: [PATCH 21/27] perf script: Add support to display lost events Adding option to display lost events: $ perf script --show-lost-events ... mplayer 13810 [002] 468011.402396: 100 cycles:ppp: ff.. mplayer 13810 [002] 468011.402396: PERF_RECORD_LOST lost 3880 mplayer 13810 [002] 468011.402397: 100 cycles:ppp: ff.. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-10-jolsa@kernel.org [ Use PRIu64 when printing u64 values, fixing the build in some arches ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 28 ++++++++++++++++++++++++ tools/perf/util/event.c | 8 +++++++ 3 files changed, 39 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 93ae8d60e3d33..806ec6391fd6d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -300,6 +300,9 @@ OPTIONS Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. +--show-lost-events + Display lost events i.e. events of type PERF_RECORD_LOST. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb603495cf4a5..c1cce474c0f10 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1489,6 +1489,7 @@ struct perf_script { bool show_mmap_events; bool show_switch_events; bool show_namespace_events; + bool show_lost_events; bool allocated; bool per_event_dump; struct cpu_map *cpus; @@ -2080,6 +2081,29 @@ static int process_switch_event(struct perf_tool *tool, return 0; } +static int +process_lost_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + struct thread *thread; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) + return -1; + + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2174,6 +2198,8 @@ static int __cmd_script(struct perf_script *script) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; + if (script->show_lost_events) + script->tool.lost = process_lost_event; if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3110,6 +3136,8 @@ int cmd_script(int argc, const char **argv) "Show context switch events (if recorded)"), OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), + OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, + "Show lost events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 97a8ef9980db7..44e603c279449 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) event->context_switch.next_prev_tid); } +static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_SWITCH_CPU_WIDE: ret += perf_event__fprintf_switch(event, fp); break; + case PERF_RECORD_LOST: + ret += perf_event__fprintf_lost(event, fp); + break; default: ret += fprintf(fp, "\n"); } From 075ca1ebb25e798e4072a1e3a482b829bb51afb2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:54 +0100 Subject: [PATCH 22/27] perf tools: Make the tool's warning messages optional I want to display the pure events status coming in the next patch and the tool's warnings are superfluous in the output. Making it optional, enabled by default. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 6 ++++-- tools/perf/util/tool.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 54e30f1bcbd7a..8d0fa2f8da169 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1773,7 +1773,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); auxtrace__free_events(session); return err; @@ -1929,7 +1930,8 @@ static int __perf_session__process_events(struct perf_session *session, err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable. diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2532b558099b7..183c914535228 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -76,6 +76,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool no_warn; enum show_feature_header show_feat_hdr; }; From a4a4d0a7a2b20f7880262de4f51685baaf693476 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:55 +0100 Subject: [PATCH 23/27] perf report: Add --stats option to display quick data statistics Add --stats option to display quick data statistics of event numbers, without any further processing, like the one at the end of the perf report -D command. $ perf report --stat Aggregated stats: TOTAL events: 4566 MMAP events: 113 LOST events: 19 COMM events: 3 FORK events: 400 SAMPLE events: 3315 MMAP2 events: 32 FINISHED_ROUND events: 681 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 I found this useful when hunting lost events for another change. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-12-jolsa@kernel.org [ Rename it to --stats, plural ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 26 +++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 1e02c4e1a81f0..a7d11ef2fe258 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -457,6 +457,10 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--stats:: + Display overall events statistics without any further processing. + (like the one at the end of the perf report -D command) + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 770bf8a614f22..8e67a8c25ab1c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,6 +62,7 @@ struct report { bool show_threads; bool inverted_callchain; bool mem_mode; + bool stats_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -588,6 +589,20 @@ static void report__output_resort(struct report *rep) ui_progress__finish(); } +static void stats_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.no_warn = true; +} + +static int stats_print(struct report *rep) +{ + struct perf_session *session = rep->session; + + perf_session__fprintf_nr_events(session, stdout); + return 0; +} + static int __cmd_report(struct report *rep) { int ret; @@ -619,12 +634,18 @@ static int __cmd_report(struct report *rep) return ret; } + if (rep->stats_mode) + stats_setup(rep); + ret = perf_session__process_events(session); if (ret) { ui__error("failed to process sample\n"); return ret; } + if (rep->stats_mode) + return stats_print(rep); + report__warn_kptr_restrict(rep); evlist__for_each_entry(session->evlist, pos) @@ -781,6 +802,7 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1042,6 +1064,8 @@ int cmd_report(int argc, const char **argv) report.tool.show_feat_hdr = SHOW_FEAT_HEADER; if (report.show_full_info) report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; + if (report.stats_mode) + use_browser = 0; if (strcmp(input_name, "-") != 0) setup_browser(true); @@ -1064,7 +1088,7 @@ int cmd_report(int argc, const char **argv) ret = 0; goto error; } - } else if (use_browser == 0 && !quiet) { + } else if (use_browser == 0 && !quiet && !report.stats_mode) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); } From 2d1073def3cb69aa44f99be7ef42da7cc561be1f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Jan 2018 12:03:47 -0300 Subject: [PATCH 24/27] perf trace: Beautify 'gettid' syscall result Before: # trace -a -e gettid sleep 0.01 4.863 ( 0.005 ms): Chrome_ChildIO/26241 gettid() = 26241 4.931 ( 0.004 ms): Chrome_IOThrea/26154 gettid() = 26154 4.942 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154 4.946 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154 4.970 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 # After: # trace -a -e gettid sleep 0.01 0.000 ( 0.009 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) 3.416 ( 0.002 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 3.424 ( 0.001 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 3.343 ( 0.002 ms): chrome/26116 gettid() = 26116 (chrome) 3.386 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) 4.003 ( 0.003 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 4.031 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-kyg4gz2yy0vkrrh2vtq29u71@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7c57898095ea3..71e64bdca86fc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -622,6 +622,7 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, { .name = "getrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, + { .name = "gettid", .errpid = true, }, { .name = "ioctl", .arg = { #if defined(__i386__) || defined(__x86_64__) From 930f8b3479444d264aa33e008c4b00b86e8c62cc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:56 +0100 Subject: [PATCH 25/27] perf report: Add --tasks option to display monitored tasks Add --tasks option to display monitored tasks stored in perf.data. Displaying pid/tid/ppid plus the command string aligned to distinguish parent and child tasks. $ perf record -a ... $ perf report --tasks # pid tid ppid comm 0 0 -1 |swapper 2 2 0 | kthreadd 14080 14080 2 | kworker/u17:1 4 4 2 | kworker/0:0H 6 6 2 | mm_percpu_wq ... 1 1 0 | systemd 23242 23242 1 | firefox 23242 23298 23242 | Cache2 I/O 23242 23304 23242 | GMPThread ... 1195 1195 1 | login 1611 1611 1195 | bash 1639 1639 1611 | startx 1663 1663 1639 | xinit 1673 1673 1663 | xmonad-x86_64-l 23939 23939 1673 | xterm 23941 23941 23939 | bash 23963 23963 23941 | mutt 24954 24954 23963 | offlineimap Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-13-jolsa@kernel.org [ Make it --tasks, plural, --task works as well, as its unambiguous ] [ Use machine__find_thread(), not findnew(), as pointed out by Namhyung ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 + tools/perf/builtin-report.c | 136 ++++++++++++++++++++++- 2 files changed, 138 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index a7d11ef2fe258..856c3c7e94fae 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -461,6 +461,10 @@ include::itrace.txt[] Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) +--tasks:: + Display monitored tasks stored in perf data. Displaying pid/tid/ppid + plus the command string aligned to distinguish parent and child tasks. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8e67a8c25ab1c..2c7bd85651dca 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/color.h" #include #include +#include #include "util/symbol.h" #include "util/callchain.h" #include "util/values.h" @@ -63,6 +64,7 @@ struct report { bool inverted_callchain; bool mem_mode; bool stats_mode; + bool tasks_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -603,6 +605,124 @@ static int stats_print(struct report *rep) return 0; } +static void tasks_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.comm = perf_event__process_comm; + rep->tool.exit = perf_event__process_exit; + rep->tool.fork = perf_event__process_fork; + rep->tool.no_warn = true; +} + +struct task { + struct thread *thread; + struct list_head list; + struct list_head children; +}; + +static struct task *tasks_list(struct task *task, struct machine *machine) +{ + struct thread *parent_thread, *thread = task->thread; + struct task *parent_task; + + /* Already listed. */ + if (!list_empty(&task->list)) + return NULL; + + /* Last one in the chain. */ + if (thread->ppid == -1) + return task; + + parent_thread = machine__find_thread(machine, -1, thread->ppid); + if (!parent_thread) + return ERR_PTR(-ENOENT); + + parent_task = thread__priv(parent_thread); + list_add_tail(&task->list, &parent_task->children); + return tasks_list(parent_task, machine); +} + +static void task__print_level(struct task *task, FILE *fp, int level) +{ + struct thread *thread = task->thread; + struct task *child; + + fprintf(fp, " %8d %8d %8d |%*s%s\n", + thread->pid_, thread->tid, thread->ppid, + level, "", thread__comm_str(thread)); + + if (!list_empty(&task->children)) { + list_for_each_entry(child, &task->children, list) + task__print_level(child, fp, level + 1); + } +} + +static int tasks_print(struct report *rep, FILE *fp) +{ + struct perf_session *session = rep->session; + struct machine *machine = &session->machines.host; + struct task *tasks, *task; + unsigned int nr = 0, itask = 0, i; + struct rb_node *nd; + LIST_HEAD(list); + + /* + * No locking needed while accessing machine->threads, + * because --tasks is single threaded command. + */ + + /* Count all the threads. */ + for (i = 0; i < THREADS__TABLE_SIZE; i++) + nr += machine->threads[i].nr; + + tasks = malloc(sizeof(*tasks) * nr); + if (!tasks) + return -ENOMEM; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + task = tasks + itask++; + + task->thread = rb_entry(nd, struct thread, rb_node); + INIT_LIST_HEAD(&task->children); + INIT_LIST_HEAD(&task->list); + thread__set_priv(task->thread, task); + } + } + + /* + * Iterate every task down to the unprocessed parent + * and link all in task children list. Task with no + * parent is added into 'list'. + */ + for (itask = 0; itask < nr; itask++) { + task = tasks + itask; + + if (!list_empty(&task->list)) + continue; + + task = tasks_list(task, machine); + if (IS_ERR(task)) { + pr_err("Error: failed to process tasks\n"); + free(tasks); + return PTR_ERR(task); + } + + if (task) + list_add_tail(&task->list, &list); + } + + fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm"); + + list_for_each_entry(task, &list, list) + task__print_level(task, fp, 0); + + free(tasks); + return 0; +} + static int __cmd_report(struct report *rep) { int ret; @@ -637,6 +757,9 @@ static int __cmd_report(struct report *rep) if (rep->stats_mode) stats_setup(rep); + if (rep->tasks_mode) + tasks_setup(rep); + ret = perf_session__process_events(session); if (ret) { ui__error("failed to process sample\n"); @@ -646,6 +769,9 @@ static int __cmd_report(struct report *rep) if (rep->stats_mode) return stats_print(rep); + if (rep->tasks_mode) + return tasks_print(rep, stdout); + report__warn_kptr_restrict(rep); evlist__for_each_entry(session->evlist, pos) @@ -803,6 +929,7 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), + OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1064,8 +1191,12 @@ int cmd_report(int argc, const char **argv) report.tool.show_feat_hdr = SHOW_FEAT_HEADER; if (report.show_full_info) report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; - if (report.stats_mode) + if (report.stats_mode || report.tasks_mode) use_browser = 0; + if (report.stats_mode && report.tasks_mode) { + pr_err("Error: --tasks and --stats options cannot be used together\n"); + goto error; + } if (strcmp(input_name, "-") != 0) setup_browser(true); @@ -1088,7 +1219,8 @@ int cmd_report(int argc, const char **argv) ret = 0; goto error; } - } else if (use_browser == 0 && !quiet && !report.stats_mode) { + } else if (use_browser == 0 && !quiet && + !report.stats_mode && !report.tasks_mode) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); } From 6439d7d16c94324300eb392ed85e3632e489e197 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Jan 2018 15:25:03 -0300 Subject: [PATCH 26/27] perf report: Introduce --mmaps Similar to --tasks, producing the same output plus /proc//maps similar lines for each mmap record present in a perf.data file. Please note that not all mmaps are stored, for instance, some of the non-executable mmaps are only stored when 'perf record --data' is used, when the user wants to resolve data accesses in addition to asking for executable mmaps to get the DSO with symtabs. E.g.: # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.018 MB perf.data (7 samples) ] [root@jouet ~]# perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4137 4137 -1 |sleep 5628a35a1000-5628a37aa000 r-xp 00000000 3147148 /usr/bin/sleep 7fb65ad51000-7fb65b134000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fb65b134000-7fb65b35e000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffd94b9f000-7ffd94ba1000 r-xp 00000000 0 [vdso] # # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data (8 samples) ] # perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4161 4161 -1 |sleep 55afae69a000-55afae8a3000 r-xp 00000000 3147148 /usr/bin/sleep 7f569f00d000-7f569f3f0000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7f569f3f0000-7f569f61a000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7fff6fffe000-7fff70000000 r-xp 00000000 0 [vdso] # # perf record time sleep 1 0.00user 0.00system 0:01.00elapsed 0%CPU (0avgtext+0avgdata 2156maxresident)k 0inputs+0outputs (0major+73minor)pagefaults 0swaps [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data (14 samples) ] # perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4281 4281 -1 |time 560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time 7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso] 4282 4282 4281 | sleep 560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time 564b4de3c000-564b4e045000 r-xp 00000000 3147148 /usr/bin/sleep 7f6a5a716000-7f6a5aaf9000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7f6a5aaf9000-7f6a5ad23000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso] 7ffcec7e6000-7ffcec7e8000 r-xp 00000000 0 [vdso] # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-zulwdlg5rfowogr1qznorvvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 9 ++++- tools/perf/builtin-report.c | 50 ++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 856c3c7e94fae..63d0db3184c90 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -457,6 +457,13 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--mmaps:: + Show --tasks output plus mmap information in a format similar to + /proc//maps. + + Please note that not all mmaps are stored, options affecting which ones + are include 'perf record --data', for instance. + --stats:: Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) @@ -469,4 +476,4 @@ include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2c7bd85651dca..dd4df9a5cd061 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,7 @@ #include #include #include +#include #define PTIME_RANGE_MAX 10 @@ -65,6 +66,7 @@ struct report { bool mem_mode; bool stats_mode; bool tasks_mode; + bool mmaps_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -608,6 +610,10 @@ static int stats_print(struct report *rep) static void tasks_setup(struct report *rep) { memset(&rep->tool, 0, sizeof(rep->tool)); + if (rep->mmaps_mode) { + rep->tool.mmap = perf_event__process_mmap; + rep->tool.mmap2 = perf_event__process_mmap2; + } rep->tool.comm = perf_event__process_comm; rep->tool.exit = perf_event__process_exit; rep->tool.fork = perf_event__process_fork; @@ -642,14 +648,46 @@ static struct task *tasks_list(struct task *task, struct machine *machine) return tasks_list(parent_task, machine); } +static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +{ + size_t printed = 0; + struct rb_node *nd; + + for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); + + printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + indent, "", map->start, map->end, + map->prot & PROT_READ ? 'r' : '-', + map->prot & PROT_WRITE ? 'w' : '-', + map->prot & PROT_EXEC ? 'x' : '-', + map->flags & MAP_SHARED ? 's' : 'p', + map->pgoff, + map->ino, map->dso->name); + } + + return printed; +} + +static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) +{ + int printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += maps__fprintf_task(&mg->maps[i], indent, fp); + return printed; +} + static void task__print_level(struct task *task, FILE *fp, int level) { struct thread *thread = task->thread; struct task *child; + int comm_indent = fprintf(fp, " %8d %8d %8d |%*s", + thread->pid_, thread->tid, thread->ppid, + level, ""); + + fprintf(fp, "%s\n", thread__comm_str(thread)); - fprintf(fp, " %8d %8d %8d |%*s%s\n", - thread->pid_, thread->tid, thread->ppid, - level, "", thread__comm_str(thread)); + map_groups__fprintf_task(thread->mg, comm_indent, fp); if (!list_empty(&task->children)) { list_for_each_entry(child, &task->children, list) @@ -930,6 +968,7 @@ int cmd_report(int argc, const char **argv) "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), + OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1077,6 +1116,9 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } + if (report.mmaps_mode) + report.tasks_mode = true; + if (quiet) perf_quiet_option(); @@ -1194,7 +1236,7 @@ int cmd_report(int argc, const char **argv) if (report.stats_mode || report.tasks_mode) use_browser = 0; if (report.stats_mode && report.tasks_mode) { - pr_err("Error: --tasks and --stats options cannot be used together\n"); + pr_err("Error: --tasks and --mmaps can't be used together with --stats\n"); goto error; } From 5d64db2966e38bfd99114ecf0b54f97d33023dcd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Jan 2018 11:36:07 -0300 Subject: [PATCH 27/27] tools headers: Synchronize kernel <-> tooling headers Two kernel headers got modified recently due to meltdown/spectre, in: a89f040fa34e ("x86/cpufeatures: Add X86_BUG_CPU_INSECURE") which are used by tooling as well: arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/disabled-features.h None of those changes have an effect on tooling, so do a plain copy. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qqzcs8ri3vks8cypg0puk0ae@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 4 +++- tools/arch/x86/include/asm/disabled-features.h | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 800104c8a3edf..21ac898df2d8e 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,12 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 14d6d50073142..b027633e73003 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0