From 6703c9771d83ebe092b0d49cb0609a3f9d8b4ff7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 2 Jan 2018 14:59:21 -0300
Subject: [PATCH 01/27] perf test bpf: Improve message about expected samples

When failing on one of the BPF tests we were just stating:

  BPF filter result incorrect

Add some more info to help figuring out the problem:

 BPF filter result incorrect, expected 56, got 0 samples

This came out while investigating this failure, first seen after
updating the kernel to the 4.15.0-rc6 tag:

  [root@jouet ~]# perf test bpf
  39: BPF filter               :
  39.1: Basic BPF filtering    : FAILED!
  39.2: BPF pinning            : Skip
  39.3: BPF prologue generation: Skip
  39.4: BPF relocation checker : Skip
  [root@jouet ~]#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-403npu7daupv6b2bmxliv5pk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index c433dd30975a3..057c6b8fdb53e 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -190,7 +190,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	}
 
 	if (count != expect) {
-		pr_debug("BPF filter result incorrect\n");
+		pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
 		goto out_delete_evlist;
 	}
 

From 13cb2d0f513cf35bf74484a392f745f9e9c5a3f2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 2 Jan 2018 15:18:07 -0300
Subject: [PATCH 02/27] perf test bpf: Use designated struct field initializers

To follow standard practice in the kernel sources, documenting the
initialization better and helping quickly finding the value for some
field in a struct with many entries.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-syn3hz9hz7ukxlxbx5x6hv20@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bpf.c | 59 +++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 057c6b8fdb53e..0512f1b5bfdba 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -63,46 +63,41 @@ static struct {
 	bool	pin;
 } bpf_testcase_table[] = {
 	{
-		LLVM_TESTCASE_BASE,
-		"Basic BPF filtering",
-		"[basic_bpf_test]",
-		"fix 'perf test LLVM' first",
-		"load bpf object failed",
-		&epoll_wait_loop,
-		(NR_ITERS + 1) / 2,
-		false,
+		.prog_id	  = LLVM_TESTCASE_BASE,
+		.desc		  = "Basic BPF filtering",
+		.name		  = "[basic_bpf_test]",
+		.msg_compile_fail = "fix 'perf test LLVM' first",
+		.msg_load_fail	  = "load bpf object failed",
+		.target_func	  = &epoll_wait_loop,
+		.expect_result	  = (NR_ITERS + 1) / 2,
 	},
 	{
-		LLVM_TESTCASE_BASE,
-		"BPF pinning",
-		"[bpf_pinning]",
-		"fix kbuild first",
-		"check your vmlinux setting?",
-		&epoll_wait_loop,
-		(NR_ITERS + 1) / 2,
-		true,
+		.prog_id	  = LLVM_TESTCASE_BASE,
+		.desc		  = "BPF pinning",
+		.name		  = "[bpf_pinning]",
+		.msg_compile_fail = "fix kbuild first",
+		.msg_load_fail	  = "check your vmlinux setting?",
+		.target_func	  = &epoll_wait_loop,
+		.expect_result	  = (NR_ITERS + 1) / 2,
+		.pin 		  = true,
 	},
 #ifdef HAVE_BPF_PROLOGUE
 	{
-		LLVM_TESTCASE_BPF_PROLOGUE,
-		"BPF prologue generation",
-		"[bpf_prologue_test]",
-		"fix kbuild first",
-		"check your vmlinux setting?",
-		&llseek_loop,
-		(NR_ITERS + 1) / 4,
-		false,
+		.prog_id	  = LLVM_TESTCASE_BPF_PROLOGUE,
+		.desc		  = "BPF prologue generation",
+		.name		  = "[bpf_prologue_test]",
+		.msg_compile_fail = "fix kbuild first",
+		.msg_load_fail	  = "check your vmlinux setting?",
+		.target_func	  = &llseek_loop,
+		.expect_result	  = (NR_ITERS + 1) / 4,
 	},
 #endif
 	{
-		LLVM_TESTCASE_BPF_RELOCATION,
-		"BPF relocation checker",
-		"[bpf_relocation_test]",
-		"fix 'perf test LLVM' first",
-		"libbpf error when dealing with relocation",
-		NULL,
-		0,
-		false,
+		.prog_id	  = LLVM_TESTCASE_BPF_RELOCATION,
+		.desc		  = "BPF relocation checker",
+		.name		  = "[bpf_relocation_test]",
+		.msg_compile_fail = "fix 'perf test LLVM' first",
+		.msg_load_fail	  = "libbpf error when dealing with relocation",
 	},
 };
 

From e0337f4f9aff60a19079b0f224136bb03877db58 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 4 Jan 2018 12:43:32 -0300
Subject: [PATCH 03/27] perf test bpf: Hook on epoll_pwait()

The 'perf test bpf' was hooking a eBPF program on the SyS_epoll_wait()
kernel function, that was what the epoll_wait() glibc function ended up
calling, but since at least glibc 2.26, the one that comes with, for
instance, Fedora 27, glibc ends up calling SyS_epoll_pwait() when
epoll_wait() is used, causing this 'perf test' entry to fail.

So switch to using epoll_pwait() and hook the eBPF program to the
SyS_epoll_pwait() kernel function to make it work on a wider range of
glibc and kernel versions.

Tested-by: Wang Nan <wangnan0@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-zynvquy63er8s5mrgsz65pto@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bpf-script-example.c | 4 ++--
 tools/perf/tests/bpf.c                | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index 268e5f8e4aa27..e4123c1b0e883 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -31,8 +31,8 @@ struct bpf_map_def SEC("maps") flip_table = {
 	.max_entries = 1,
 };
 
-SEC("func=SyS_epoll_wait")
-int bpf_func__SyS_epoll_wait(void *ctx)
+SEC("func=SyS_epoll_pwait")
+int bpf_func__SyS_epoll_pwait(void *ctx)
 {
 	int ind =0;
 	int *flag = bpf_map_lookup_elem(&flip_table, &ind);
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 0512f1b5bfdba..8e709c9d512c4 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -19,13 +19,13 @@
 
 #ifdef HAVE_LIBBPF_SUPPORT
 
-static int epoll_wait_loop(void)
+static int epoll_pwait_loop(void)
 {
 	int i;
 
 	/* Should fail NR_ITERS times */
 	for (i = 0; i < NR_ITERS; i++)
-		epoll_wait(-(i + 1), NULL, 0, 0);
+		epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
 	return 0;
 }
 
@@ -68,7 +68,7 @@ static struct {
 		.name		  = "[basic_bpf_test]",
 		.msg_compile_fail = "fix 'perf test LLVM' first",
 		.msg_load_fail	  = "load bpf object failed",
-		.target_func	  = &epoll_wait_loop,
+		.target_func	  = &epoll_pwait_loop,
 		.expect_result	  = (NR_ITERS + 1) / 2,
 	},
 	{
@@ -77,7 +77,7 @@ static struct {
 		.name		  = "[bpf_pinning]",
 		.msg_compile_fail = "fix kbuild first",
 		.msg_load_fail	  = "check your vmlinux setting?",
-		.target_func	  = &epoll_wait_loop,
+		.target_func	  = &epoll_pwait_loop,
 		.expect_result	  = (NR_ITERS + 1) / 2,
 		.pin 		  = true,
 	},

From 44df1afdb174fd6038e419f80efd914c0b5f2f85 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 6 Dec 2017 01:50:40 +0000
Subject: [PATCH 04/27] perf tools: Fix compile error with libunwind x86

Fix a compile error:

 ...
   CC       util/libunwind/x86_32.o
 In file included from util/libunwind/x86_32.c:33:0:
 util/libunwind/../../arch/x86/util/unwind-libunwind.c: In function 'libunwind__x86_reg_id':
 util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: error: 'EINVAL' undeclared (first use in this function)
    return -EINVAL;
            ^
 util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: note: each undeclared identifier is reported only once for each function it appears in
 mv: cannot stat 'util/libunwind/.x86_32.o.tmp': No such file or directory
 make[4]: *** [util/libunwind/x86_32.o] Error 1
 make[3]: *** [util] Error 2
 make[2]: *** [libperf-in.o] Error 2
 make[1]: *** [sub-make] Error 2
 make: *** [all] Error 2

It happens when libunwind-x86 feature is detected.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20171206015040.114574-1-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/unwind-libunwind.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index 9c917f80c9065..05920e3edf7a7 100644
--- a/tools/perf/arch/x86/util/unwind-libunwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifndef REMOTE_UNWIND_LIBUNWIND
 #include <errno.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
 #include <libunwind.h>
 #include "perf_regs.h"
 #include "../../util/unwind.h"

From 935f5a9d4500020879858c9224c98dfabf16101d Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Sat, 30 Dec 2017 00:26:52 +0800
Subject: [PATCH 05/27] perf report: Fix a wrong offset issue when using
 /proc/kcore

When a valid vmlinux is not found, 'perf report' falls back to look at
/proc/kcore. In this case, it will report the impossible large offset.

For example:

  # perf record -b -e cycles:k find /etc/ > /dev/null
  # perf report --stdio --branch-history

    22.77%  _vm_normal_page+18446603336221188162
            |
            ---page_remove_rmap +18446603336221188324
               page_remove_rmap +18446603336221188487 (cycles:5)
               unlock_page_memcg +18446603336221188096
               page_remove_rmap +18446603336221188327 (cycles:1)

The issue is the value which is passed to parameter 'addr' in
__get_srcline() is the objdump address. It's not correct if we calculate
the offset by using 'addr - sym->start'.

This patch creates a new parameter 'ip' in __get_srcline(). It is not
converted to objdump address.

With this patch, the perf report output is:

    22.77%  _vm_normal_page+66
            |
            ---page_remove_rmap +228
               page_remove_rmap +391 (cycles:5)
               unlock_page_memcg +0
               page_remove_rmap +231 (cycles:1)
               page_remove_rmap +236

Committer testing:

Make sure you get any valid vmlinux out of the way, using '-v' on the
'perf report' case and deleting it from places where perf searches them,
like your kernel build dir and the build-id cache, in ~/.debug/.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1514564812-17344-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c |  3 ++-
 tools/perf/util/machine.c  |  2 +-
 tools/perf/util/map.c      |  2 +-
 tools/perf/util/sort.c     | 16 ++++++++++------
 tools/perf/util/srcline.c  |  9 +++++----
 tools/perf/util/srcline.h  |  5 +++--
 6 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 68e687d1bf99a..28b233c3dcbe3 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1960,7 +1960,8 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
 		if (percent_max <= 0.5)
 			continue;
 
-		al->path = get_srcline(map->dso, start + al->offset, NULL, false, true);
+		al->path = get_srcline(map->dso, start + al->offset, NULL,
+				       false, true, start + al->offset);
 		insert_source_line(&tmp_root, al);
 	}
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 64d255f6a5374..b05a67464c03c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1726,7 +1726,7 @@ static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
 		bool show_addr = callchain_param.key == CCKEY_ADDRESS;
 
 		srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
-				      sym, show_sym, show_addr);
+				      sym, show_sym, show_addr, ip);
 		srcline__tree_insert(&map->dso->srclines, ip, srcline);
 	}
 
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 6d40efd744027..8fe57031e1a85 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -419,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
 	if (map && map->dso) {
 		srcline = get_srcline(map->dso,
 				      map__rip_2objdump(map, addr), NULL,
-				      true, true);
+				      true, true, addr);
 		if (srcline != SRCLINE_UNKNOWN)
 			ret = fprintf(fp, "%s%s", prefix, srcline);
 		free_srcline(srcline);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a00eacdf02ed9..211e7f326b9f2 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -336,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he)
 		return SRCLINE_UNKNOWN;
 
 	return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
-			   he->ms.sym, true, true);
+			   he->ms.sym, true, true, he->ip);
 }
 
 static int64_t
@@ -380,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
 					   map__rip_2objdump(map,
 							     left->branch_info->from.al_addr),
 							 left->branch_info->from.sym,
-							 true, true);
+							 true, true,
+							 left->branch_info->from.al_addr);
 	}
 	if (!right->branch_info->srcline_from) {
 		struct map *map = right->branch_info->from.map;
@@ -391,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
 					     map__rip_2objdump(map,
 							       right->branch_info->from.al_addr),
 						     right->branch_info->from.sym,
-						     true, true);
+						     true, true,
+						     right->branch_info->from.al_addr);
 	}
 	return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
 }
@@ -423,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
 					   map__rip_2objdump(map,
 							     left->branch_info->to.al_addr),
 							 left->branch_info->from.sym,
-							 true, true);
+							 true, true,
+							 left->branch_info->to.al_addr);
 	}
 	if (!right->branch_info->srcline_to) {
 		struct map *map = right->branch_info->to.map;
@@ -434,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
 					     map__rip_2objdump(map,
 							       right->branch_info->to.al_addr),
 						     right->branch_info->to.sym,
-						     true, true);
+						     true, true,
+						     right->branch_info->to.al_addr);
 	}
 	return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
 }
@@ -465,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e)
 		return no_srcfile;
 
 	sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
-			 e->ms.sym, false, true, true);
+			 e->ms.sym, false, true, true, e->ip);
 	if (!strcmp(sf, SRCLINE_UNKNOWN))
 		return no_srcfile;
 	p = strchr(sf, ':');
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index d19f05c56de61..3c21fd059b649 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -496,7 +496,8 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 #define A2L_FAIL_LIMIT 123
 
 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
-		  bool show_sym, bool show_addr, bool unwind_inlines)
+		  bool show_sym, bool show_addr, bool unwind_inlines,
+		  u64 ip)
 {
 	char *file = NULL;
 	unsigned line = 0;
@@ -536,7 +537,7 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
 
 	if (sym) {
 		if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
-					addr - sym->start) < 0)
+					ip - sym->start) < 0)
 			return SRCLINE_UNKNOWN;
 	} else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
 		return SRCLINE_UNKNOWN;
@@ -550,9 +551,9 @@ void free_srcline(char *srcline)
 }
 
 char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
-		  bool show_sym, bool show_addr)
+		  bool show_sym, bool show_addr, u64 ip)
 {
-	return __get_srcline(dso, addr, sym, show_sym, show_addr, false);
+	return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip);
 }
 
 struct srcline_node {
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
index 847b7086182c1..b2bb5502fd628 100644
--- a/tools/perf/util/srcline.h
+++ b/tools/perf/util/srcline.h
@@ -11,9 +11,10 @@ struct symbol;
 
 extern bool srcline_full_filename;
 char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
-		  bool show_sym, bool show_addr);
+		  bool show_sym, bool show_addr, u64 ip);
 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
-		  bool show_sym, bool show_addr, bool unwind_inlines);
+		  bool show_sym, bool show_addr, bool unwind_inlines,
+		  u64 ip);
 void free_srcline(char *srcline);
 
 /* insert the srcline into the DSO, which will take ownership */

From 40c39e3046411f84bab82f66783ff3593e2bcd9b Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Tue, 26 Dec 2017 18:42:43 +0800
Subject: [PATCH 06/27] perf report: Fix a no annotate browser displayed issue

When enabling '-b' option in perf record, for example,

  perf record -b ...
  perf report

and then browsing the annotate browser from perf report (press 'A'), it
would fail (annotate browser can't be displayed).

It's because the '.add_entry_cb' op of struct report is overwritten by
hist_iter__branch_callback() in builtin-report.c. But this function doesn't do
something like mapping symbols and sources. So next, do_annotate() will return
directly.

        notes = symbol__annotation(act->ms.sym);
        if (!notes->src)
                return 0;

This patch adds the lost code to hist_iter__branch_callback (refer to
hist_iter__report_callback).

v2:

Fix a crash bug when perform 'perf report --stdio'.

The reason is that we init the symbol annotation only in browser mode, it
doesn't allocate/init resources for stdio mode.

So now in hist_iter__branch_callback(), it will return directly if it's not in
browser mode.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1514284963-18587-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index eb9ce6327e71b..07827cd514803 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -162,12 +162,28 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
 	struct hist_entry *he = iter->he;
 	struct report *rep = arg;
 	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+	struct perf_evsel *evsel = iter->evsel;
+	int err;
+
+	if (!ui__has_annotation())
+		return 0;
+
+	hist__account_cycles(sample->branch_stack, al, sample,
+			     rep->nonany_branch_mode);
 
 	bi = he->branch_info;
+	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+	if (err)
+		goto out;
+
+	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+
 	branch_type_count(&rep->brtype_stat, &bi->flags,
 			  bi->from.addr, bi->to.addr);
 
-	return 0;
+out:
+	return err;
 }
 
 static int process_sample_event(struct perf_tool *tool,

From 6011518db3bd04c80cd3ce3e6aea1c399739adb4 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:41 +0800
Subject: [PATCH 07/27] perf header: Add infrastructure to record first and
 last sample time

perf report/script/... have a --time option to limit the time range of
output. That's very useful to slice large traces, e.g. when processing
the output of perf script for some analysis.

But right now --time only supports absolute time. Also there is no fast
way to get the start/end times of a given trace except for looking at
it.  This makes it hard to e.g. only decode the first half of the trace,
which is useful for parallelization of scripts

Another problem is that perf records are variable size and there is no
synchronization mechanism. So the only way to find the last sample
reliably would be to walk all samples. But we want to avoid that in perf
report/...  because it is already quite expensive. That is why storing
the first sample time and last sample time in perf record is better.

This patch creates a new header feature type HEADER_SAMPLE_TIME and
related ops. Save the first sample time and the last sample time to the
feature section in perf file header. That will be done when, for
instance, processing build-ids, where we already have to process all
samples to create the build-id table, take advantage of that to further
amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf
report/script' faster when using --time.

Committer testing:

After this patch is applied the header is written with zeroes, we need
the next patch, for "perf record" to actually write the timestamps:

  # perf report -D | grep PERF_RECORD_SAMPLE\(
  22501155244406 0x44f0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21be8c5 period: 1 addr: 0
  <SNIP>
  22501155793625 0x4a30 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21ffd50 period: 2828043 addr: 0
  # perf report --header | grep "time of "
  # time of first sample : 0.000000
  # time of last sample : 0.000000
  #

Changelog:

v7: 1. Rebase to latest perf/core branch.

    2. Add following clarification in patch description according to
       Arnaldo's suggestion.

       "That will be done when, for instance, processing build-ids,
	where we already have to process all samples to create the
	build-id table, take advantage of that to further amortize
	that processing by storing HEADER_SAMPLE_TIME to make
	'perf report/script' faster when using --time."

v4: Use perf script time style for timestamp printing. Also add with
    the printing of sample duration.

v3: Remove the definitions of first_sample_time/last_sample_time from
    perf_session. Just define them in perf_evlist

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../Documentation/perf.data-file-format.txt   |  4 ++
 tools/perf/util/evlist.h                      |  2 +
 tools/perf/util/header.c                      | 60 +++++++++++++++++++
 tools/perf/util/header.h                      |  1 +
 4 files changed, 67 insertions(+)

diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index 15e8b48077ba3..f7d85e89a98a6 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -261,6 +261,10 @@ struct {
 	struct perf_header_string map;
 }[number_of_cache_levels];
 
+	HEADER_SAMPLE_TIME = 21,
+
+Two uint64_t for the time of first sample and the time of last sample.
+
 	other bits are reserved and should ignored for now
 	HEADER_FEAT_BITS	= 256,
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 75160666d3051..e7fbca69cbac4 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -50,6 +50,8 @@ struct perf_evlist {
 	struct perf_evsel *selected;
 	struct events_stats stats;
 	struct perf_env	*env;
+	u64		first_sample_time;
+	u64		last_sample_time;
 };
 
 struct perf_evsel_str_handler {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ca73aa7be7082..a326e0d8b5b6b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -16,6 +16,7 @@
 #include <linux/stringify.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
+#include <linux/time64.h>
 
 #include "evlist.h"
 #include "evsel.h"
@@ -35,6 +36,7 @@
 #include <api/fs/fs.h>
 #include "asm/bug.h"
 #include "tool.h"
+#include "time-utils.h"
 
 #include "sane_ctype.h"
 
@@ -1180,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused,
 	return 0;
 }
 
+static int write_sample_time(struct feat_fd *ff,
+			     struct perf_evlist *evlist)
+{
+	int ret;
+
+	ret = do_write(ff, &evlist->first_sample_time,
+		       sizeof(evlist->first_sample_time));
+	if (ret < 0)
+		return ret;
+
+	return do_write(ff, &evlist->last_sample_time,
+			sizeof(evlist->last_sample_time));
+}
+
 static void print_hostname(struct feat_fd *ff, FILE *fp)
 {
 	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1505,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp)
 	}
 }
 
+static void print_sample_time(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_session *session;
+	char time_buf[32];
+	double d;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	timestamp__scnprintf_usec(session->evlist->first_sample_time,
+				  time_buf, sizeof(time_buf));
+	fprintf(fp, "# time of first sample : %s\n", time_buf);
+
+	timestamp__scnprintf_usec(session->evlist->last_sample_time,
+				  time_buf, sizeof(time_buf));
+	fprintf(fp, "# time of last sample : %s\n", time_buf);
+
+	d = (double)(session->evlist->last_sample_time -
+		session->evlist->first_sample_time) / NSEC_PER_MSEC;
+
+	fprintf(fp, "# sample duration : %10.3f ms\n", d);
+}
+
 static int __event_process_build_id(struct build_id_event *bev,
 				    char *filename,
 				    struct perf_session *session)
@@ -2146,6 +2184,27 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
 	return -1;
 }
 
+static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	u64 first_sample_time, last_sample_time;
+	int ret;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	ret = do_read_u64(ff, &first_sample_time);
+	if (ret)
+		return -1;
+
+	ret = do_read_u64(ff, &last_sample_time);
+	if (ret)
+		return -1;
+
+	session->evlist->first_sample_time = first_sample_time;
+	session->evlist->last_sample_time = last_sample_time;
+	return 0;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2203,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPN(AUXTRACE,	auxtrace,	false),
 	FEAT_OPN(STAT,		stat,		false),
 	FEAT_OPN(CACHE,		cache,		true),
+	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 317fb901e47fc..f28aaaa3a4405 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -35,6 +35,7 @@ enum {
 	HEADER_AUXTRACE,
 	HEADER_STAT,
 	HEADER_CACHE,
+	HEADER_SAMPLE_TIME,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };

From 68588baf8d01826673f2874f434123029e519052 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:42 +0800
Subject: [PATCH 08/27] perf record: Record the first and last sample time in
 the header

In the default 'perf record' configuration, all samples are processed,
to create the HEADER_BUILD_ID table. So it's very easy to get the
first/last samples and save the time to perf file header via the
function write_sample_time().

Later, at post processing time, perf report/script will fetch the time
from perf file header.

Committer testing:

  # perf record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.099 MB perf.data (1101 samples) ]
  [root@jouet home]# perf report --header | grep "time of "
  # time of first sample : 22947.909226
  # time of last sample : 22948.910704
  #
  # perf report -D | grep PERF_RECORD_SAMPLE\(
  0 22947909226101 0x20bb68 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa21b1af3 period: 1 addr: 0
  0 22947909229928 0x20bb98 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa200d204 period: 1 addr: 0
  <SNIP>
  3 22948910397351 0x219360 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 28251/28251: 0xffffffffa22071d8 period: 169518 addr: 0
  0 22948910652380 0x20f120 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 198807 addr: 0
  2 22948910704034 0x2172d0 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 88111 addr: 0
  #

Changelog:

v7: Just update the patch description according to Arnaldo's suggestion.

v6: Currently '--buildid-all' is not enabled at default. So the walking
    on all samples is the default operation. There is no big overhead
    to calculate the timestamp boundary in process_sample_event handler
    once we already go through all samples. So the timestamp boundary
    calculation is enabled by default when '--buildid-all' is not enabled.

    While if '--buildid-all' is enabled, we creates a new option
    "--timestamp-boundary" for user to decide if it enables the
    timestamp boundary calculation.

v5: There is an issue that the sample walking can only work when
    '--buildid-all' is not enabled. So we need to let the walking
    be able to work even if '--buildid-all' is enabled and let the
    processing skips the dso hit marking for this case.

    At first, I want to provide a new option "--record-time-boundaries".
    While after consideration, I think a new option is not very
    necessary.

v3: Remove the definitions of first_sample_time and last_sample_time
    from struct record and directly save them in perf_evlist.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-3-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  3 +++
 tools/perf/builtin-record.c              | 18 +++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5a626ef666c29..3eea6de35a38d 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -430,6 +430,9 @@ Configure all used events to run in user space.
 --timestamp-filename
 Append timestamp to output file name.
 
+--timestamp-boundary::
+Record timestamp boundary (time of first/last samples).
+
 --switch-output[=mode]::
 Generate multiple perf.data files, timestamp prefixed, switching to a new one
 based on 'mode' value:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 50385d89c4970..65681a1a292a5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -78,6 +78,7 @@ struct record {
 	bool			no_buildid_cache_set;
 	bool			buildid_all;
 	bool			timestamp_filename;
+	bool			timestamp_boundary;
 	struct switch_output	switch_output;
 	unsigned long long	samples;
 };
@@ -409,8 +410,15 @@ static int process_sample_event(struct perf_tool *tool,
 {
 	struct record *rec = container_of(tool, struct record, tool);
 
-	rec->samples++;
+	if (rec->evlist->first_sample_time == 0)
+		rec->evlist->first_sample_time = sample->time;
+
+	rec->evlist->last_sample_time = sample->time;
 
+	if (rec->buildid_all)
+		return 0;
+
+	rec->samples++;
 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 }
 
@@ -435,9 +443,11 @@ static int process_buildids(struct record *rec)
 
 	/*
 	 * If --buildid-all is given, it marks all DSO regardless of hits,
-	 * so no need to process samples.
+	 * so no need to process samples. But if timestamp_boundary is enabled,
+	 * it still needs to walk on all samples to get the timestamps of
+	 * first/last samples.
 	 */
-	if (rec->buildid_all)
+	if (rec->buildid_all && !rec->timestamp_boundary)
 		rec->tool.sample = NULL;
 
 	return perf_session__process_events(session);
@@ -1621,6 +1631,8 @@ static struct option __record_options[] = {
 		    "Record build-id of all DSOs regardless of hits"),
 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
 		    "append timestamp to output filename"),
+	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
+		    "Record timestamp boundary (time of first/last samples)"),
 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
 			  &record.switch_output.set, "signal,size,time",
 			  "Switch output when receive SIGUSR2 or cross size,time threshold",

From 13a70f350665580708ab11f725d3578eaacbf2d0 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:43 +0800
Subject: [PATCH 09/27] perf tools: Create function to parse time percent

Current perf report/script/... have a --time option to limit the time
range of output. But right now it only supports absolute time, add
support for time percentage.

For example:

1. Select the second 10% time slice
   perf report --time 10%/2

2. Select from 0% to 10% time slice
   perf report --time 0%-10%

It also support the multiple time ranges.

3. Select the first and second 10% time slices
   perf report --time 10%/1,10%/2

4. Select from 0% to 10% and 30% to 40% slices
   perf report --time 0%-10%,30%-40%

Changelog:

v4: An issue is found. Following passes.
    perf script --time 10%/10x12321xsdfdasfdsafdsafdsa

    Now it uses strtol to replace atoi.

Committer notes:

This just puts in place the infrastructure, so the examples in this cset
comment will only work later, after more patches in this series are
applied.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-4-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/time-utils.c | 205 +++++++++++++++++++++++++++++++++--
 tools/perf/util/time-utils.h |   3 +
 2 files changed, 196 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 81927d0274176..61c46022de0bf 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -6,6 +6,7 @@
 #include <time.h>
 #include <errno.h>
 #include <inttypes.h>
+#include <math.h>
 
 #include "perf.h"
 #include "debug.h"
@@ -60,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,
 	return 0;
 }
 
-int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+static int split_start_end(char **start, char **end, const char *ostr, char ch)
 {
 	char *start_str, *end_str;
 	char *d, *str;
-	int rc = 0;
 
 	if (ostr == NULL || *ostr == '\0')
 		return 0;
@@ -74,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
 	if (str == NULL)
 		return -ENOMEM;
 
-	ptime->start = 0;
-	ptime->end = 0;
-
-	/* str has the format: <start>,<stop>
-	 * variations: <start>,
-	 *             ,<stop>
-	 *             ,
-	 */
 	start_str = str;
-	d = strchr(start_str, ',');
+	d = strchr(start_str, ch);
 	if (d) {
 		*d = '\0';
 		++d;
 	}
 	end_str = d;
 
+	*start = start_str;
+	*end = end_str;
+
+	return 0;
+}
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+{
+	char *start_str = NULL, *end_str;
+	int rc;
+
+	rc = split_start_end(&start_str, &end_str, ostr, ',');
+	if (rc || !start_str)
+		return rc;
+
+	ptime->start = 0;
+	ptime->end = 0;
+
 	rc = parse_timestr_sec_nsec(ptime, start_str, end_str);
 
-	free(str);
+	free(start_str);
 
 	/* make sure end time is after start time if it was given */
 	if (rc == 0 && ptime->end && ptime->end < ptime->start)
@@ -104,6 +114,177 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
 	return rc;
 }
 
+static int parse_percent(double *pcnt, char *str)
+{
+	char *c;
+
+	c = strchr(str, '%');
+	if (c)
+		*c = '\0';
+	else
+		return -1;
+
+	*pcnt = atof(str) / 100.0;
+
+	return 0;
+}
+
+static int percent_slash_split(char *str, struct perf_time_interval *ptime,
+			       u64 start, u64 end)
+{
+	char *p, *end_str;
+	double pcnt, start_pcnt, end_pcnt;
+	u64 total = end - start;
+	int i;
+
+	/*
+	 * Example:
+	 * 10%/2: select the second 10% slice and the third 10% slice
+	 */
+
+	/* We can modify this string since the original one is copied */
+	p = strchr(str, '/');
+	if (!p)
+		return -1;
+
+	*p = '\0';
+	if (parse_percent(&pcnt, str) < 0)
+		return -1;
+
+	p++;
+	i = (int)strtol(p, &end_str, 10);
+	if (*end_str)
+		return -1;
+
+	if (pcnt <= 0.0)
+		return -1;
+
+	start_pcnt = pcnt * (i - 1);
+	end_pcnt = pcnt * i;
+
+	if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+	    end_pcnt < 0.0 || end_pcnt > 1.0) {
+		return -1;
+	}
+
+	ptime->start = start + round(start_pcnt * total);
+	ptime->end = start + round(end_pcnt * total);
+
+	return 0;
+}
+
+static int percent_dash_split(char *str, struct perf_time_interval *ptime,
+			      u64 start, u64 end)
+{
+	char *start_str = NULL, *end_str;
+	double start_pcnt, end_pcnt;
+	u64 total = end - start;
+	int ret;
+
+	/*
+	 * Example: 0%-10%
+	 */
+
+	ret = split_start_end(&start_str, &end_str, str, '-');
+	if (ret || !start_str)
+		return ret;
+
+	if ((parse_percent(&start_pcnt, start_str) != 0) ||
+	    (parse_percent(&end_pcnt, end_str) != 0)) {
+		free(start_str);
+		return -1;
+	}
+
+	free(start_str);
+
+	if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+	    end_pcnt < 0.0 || end_pcnt > 1.0 ||
+	    start_pcnt > end_pcnt) {
+		return -1;
+	}
+
+	ptime->start = start + round(start_pcnt * total);
+	ptime->end = start + round(end_pcnt * total);
+
+	return 0;
+}
+
+typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
+				 u64 start, u64 end);
+
+static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
+			       const char *ostr, u64 start, u64 end,
+			       time_pecent_split func)
+{
+	char *str, *p1, *p2;
+	int len, ret, i = 0;
+
+	str = strdup(ostr);
+	if (str == NULL)
+		return -ENOMEM;
+
+	len = strlen(str);
+	p1 = str;
+
+	while (p1 < str + len) {
+		if (i >= num) {
+			free(str);
+			return -1;
+		}
+
+		p2 = strchr(p1, ',');
+		if (p2)
+			*p2 = '\0';
+
+		ret = (func)(p1, &ptime_buf[i], start, end);
+		if (ret < 0) {
+			free(str);
+			return -1;
+		}
+
+		pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start);
+		pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end);
+
+		i++;
+
+		if (p2)
+			p1 = p2 + 1;
+		else
+			break;
+	}
+
+	free(str);
+	return i;
+}
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+				 const char *ostr, u64 start, u64 end)
+{
+	char *c;
+
+	/*
+	 * ostr example:
+	 * 10%/2,10%/3: select the second 10% slice and the third 10% slice
+	 * 0%-10%,30%-40%: multiple time range
+	 */
+
+	memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
+
+	c = strchr(ostr, '/');
+	if (c) {
+		return percent_comma_split(ptime_buf, num, ostr, start,
+					   end, percent_slash_split);
+	}
+
+	c = strchr(ostr, '-');
+	if (c) {
+		return percent_comma_split(ptime_buf, num, ostr, start,
+					   end, percent_dash_split);
+	}
+
+	return -1;
+}
+
 bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
 {
 	/* if time is not set don't drop sample */
diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h
index 15b475c50ccf7..23087231785ad 100644
--- a/tools/perf/util/time-utils.h
+++ b/tools/perf/util/time-utils.h
@@ -13,6 +13,9 @@ int parse_nsec_time(const char *str, u64 *ptime);
 
 int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
 
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+				 const char *ostr, u64 start, u64 end);
+
 bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
 
 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);

From 9a9b8b4b2271e763c1600311a3d4ecc2ac359b55 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:44 +0800
Subject: [PATCH 10/27] perf tools: Create function to perform multiple time
 range checking

Previous patch supports the multiple time range.

For example, select the first and second 10% time slices.
perf report --time 10%/1,10%/2

We need a function to check if a timestamp is in the ranges of
[0, 10%) and [10%, 20%].

Note that it includes the last element in [10%, 20%] but it doesn't
include the last element in [0, 10%). It's to avoid the overlap.

This patch implments a new function perf_time__ranges_skip_sample
for this checking.

Change log:

v4: Let perf_time__ranges_skip_sample be compatible with
    perf_time__skip_sample when only one time range.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-5-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/time-utils.c | 28 ++++++++++++++++++++++++++++
 tools/perf/util/time-utils.h |  3 +++
 2 files changed, 31 insertions(+)

diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 61c46022de0bf..3f7f18f06982e 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -300,6 +300,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
 	return false;
 }
 
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+				   int num, u64 timestamp)
+{
+	struct perf_time_interval *ptime;
+	int i;
+
+	if ((timestamp == 0) || (num == 0))
+		return false;
+
+	if (num == 1)
+		return perf_time__skip_sample(&ptime_buf[0], timestamp);
+
+	/*
+	 * start/end of multiple time ranges must be valid.
+	 */
+	for (i = 0; i < num; i++) {
+		ptime = &ptime_buf[i];
+
+		if (timestamp >= ptime->start &&
+		    ((timestamp < ptime->end && i < num - 1) ||
+		     (timestamp <= ptime->end && i == num - 1))) {
+			break;
+		}
+	}
+
+	return (i == num) ? true : false;
+}
+
 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
 {
 	u64  sec = timestamp / NSEC_PER_SEC;
diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h
index 23087231785ad..34d5eba26bf5b 100644
--- a/tools/perf/util/time-utils.h
+++ b/tools/perf/util/time-utils.h
@@ -18,6 +18,9 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
 
 bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
 
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+				   int num, u64 timestamp);
+
 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
 
 int fetch_current_timestamp(char *buf, size_t sz);

From 5b969bc766807e5c2f184d1d6f97b8471de946f1 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:45 +0800
Subject: [PATCH 11/27] perf report: Support time percent and multiple time
 ranges

perf report has a --time option to limit the time range of output.  It
only supports absolute time.

Now this option is extended to support multiple time ranges and support
the percent of time.

For example:

1. Select the first and second 10% time slices:

perf report --time 10%/1,10%/2

2. Select from 0% to 10% and 30% to 40% slices:

perf report --time 0%-10%,30%-40%

Changelog:

v6: Fix the merge issue with latest perf/core branch.
    No functional changes.

v5: Add checking of first/last sample time to detect if it's recorded
    in perf.data. If it's not recorded, returns error message to user.

v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample

v3: Since the definitions of first_sample_time/last_sample_time
    are moved from perf_session to perf_evlist so change the
    related code.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-6-git-send-email-yao.jin@linux.intel.com
[ Add missing colons at end of examples in the man page ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 20 +++++++++++++++
 tools/perf/builtin-report.c              | 31 ++++++++++++++++++++----
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index ddde2b54af572..1e02c4e1a81f0 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -402,6 +402,26 @@ OPTIONS
 	stop time is not given (i.e, time string is 'x.y,') then analysis goes
 	to end of file.
 
+	Also support time percent with multiple time range. Time string is
+	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10.
+
+	For example:
+	Select the second 10% time slice:
+
+	  perf report --time 10%/2
+
+	Select from 0% to 10% time slice:
+
+	  perf report --time 0%-10%
+
+	Select the first and second 10% time slices:
+
+	  perf report --time 10%/1,10%/2
+
+	Select from 0% to 10% and 30% to 40% slices:
+
+	  perf report --time 0%-10%,30%-40%
+
 --itrace::
 	Options for decoding instruction tracing data. The options are:
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 07827cd514803..770bf8a614f22 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -52,6 +52,8 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
+#define PTIME_RANGE_MAX	10
+
 struct report {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -69,7 +71,8 @@ struct report {
 	const char		*cpu_list;
 	const char		*symbol_filter_str;
 	const char		*time_str;
-	struct perf_time_interval ptime;
+	struct perf_time_interval ptime_range[PTIME_RANGE_MAX];
+	int			range_num;
 	float			min_percent;
 	u64			nr_entries;
 	u64			queue_size;
@@ -202,8 +205,10 @@ static int process_sample_event(struct perf_tool *tool,
 	};
 	int ret = 0;
 
-	if (perf_time__skip_sample(&rep->ptime, sample->time))
+	if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num,
+					  sample->time)) {
 		return 0;
+	}
 
 	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_debug("problem processing %d event, skipping it.\n",
@@ -1093,9 +1098,25 @@ int cmd_report(int argc, const char **argv)
 	if (symbol__init(&session->header.env) < 0)
 		goto error;
 
-	if (perf_time__parse_str(&report.ptime, report.time_str) != 0) {
-		pr_err("Invalid time string\n");
-		return -EINVAL;
+	if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) {
+		if (session->evlist->first_sample_time == 0 &&
+		    session->evlist->last_sample_time == 0) {
+			pr_err("No first/last sample time in perf data\n");
+			return -EINVAL;
+		}
+
+		report.range_num = perf_time__percent_parse_str(
+					report.ptime_range, PTIME_RANGE_MAX,
+					report.time_str,
+					session->evlist->first_sample_time,
+					session->evlist->last_sample_time);
+
+		if (report.range_num < 0) {
+			pr_err("Invalid time string\n");
+			return -EINVAL;
+		}
+	} else {
+		report.range_num = 1;
 	}
 
 	sort__setup_elide(stdout);

From 2ab046cd01e33a854798a3e245c9e3f32b950a7d Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:46 +0800
Subject: [PATCH 12/27] perf script: Support time percent and multiple time
 ranges

perf script has a --time option to limit the time range of output.  It
only supports absolute time.

Now this option is extended to support multiple time ranges and support
the percent of time.

For example:

1. Select the first and second 10% time slices:

   perf script --time 10%/1,10%/2

2. Select from 0% to 10% and 30% to 40% slices:

   perf script --time 0%-10%,30%-40%

Changelog:

v6: Fix the merge issue with latest perf/core branch.
    No functional changes.

v5: Add checking of first/last sample time to detect if it's recorded
    in perf.data. If it's not recorded, returns error message to user.

v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample

v3: Since the definitions of first_sample_time/last_sample_time
    are moved from perf_session to perf_evlist so change the
    related code.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-7-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 16 +++++++++++
 tools/perf/builtin-script.c              | 34 +++++++++++++++++++-----
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 974ceb12c7f3e..7b622a812a725 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -329,6 +329,22 @@ include::itrace.txt[]
 	stop time is not given (i.e, time string is 'x.y,') then analysis goes
 	to end of file.
 
+	Also support time percent with multipe time range. Time string is
+	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10.
+
+	For example:
+	Select the second 10% time slice
+	perf script --time 10%/2
+
+	Select from 0% to 10% time slice
+	perf script --time 0%-10%
+
+	Select the first and second 10% time slices
+	perf script --time 10%/1,10%/2
+
+	Select from 0% to 10% and 30% to 40% slices
+	perf script --time 0%-10%,30%-40%
+
 --max-blocks::
 	Set the maximum number of program blocks to print with brstackasm for
 	each sample.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 77e47cf39f2ca..330dcd9b9b8ff 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1436,6 +1436,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
 	return 0;
 }
 
+#define PTIME_RANGE_MAX	10
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -1449,7 +1451,8 @@ struct perf_script {
 	struct thread_map	*threads;
 	int			name_width;
 	const char              *time_str;
-	struct perf_time_interval ptime;
+	struct perf_time_interval ptime_range[PTIME_RANGE_MAX];
+	int			range_num;
 };
 
 static int perf_evlist__max_name_len(struct perf_evlist *evlist)
@@ -1734,8 +1737,10 @@ static int process_sample_event(struct perf_tool *tool,
 	struct perf_script *scr = container_of(tool, struct perf_script, tool);
 	struct addr_location al;
 
-	if (perf_time__skip_sample(&scr->ptime, sample->time))
+	if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+					  sample->time)) {
 		return 0;
+	}
 
 	if (debug_mode) {
 		if (sample->time < last_timestamp) {
@@ -3360,10 +3365,27 @@ int cmd_script(int argc, const char **argv)
 		goto out_delete;
 
 	/* needs to be parsed after looking up reference time */
-	if (perf_time__parse_str(&script.ptime, script.time_str) != 0) {
-		pr_err("Invalid time string\n");
-		err = -EINVAL;
-		goto out_delete;
+	if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
+		if (session->evlist->first_sample_time == 0 &&
+		    session->evlist->last_sample_time == 0) {
+			pr_err("No first/last sample time in perf data\n");
+			err = -EINVAL;
+			goto out_delete;
+		}
+
+		script.range_num = perf_time__percent_parse_str(
+					script.ptime_range, PTIME_RANGE_MAX,
+					script.time_str,
+					session->evlist->first_sample_time,
+					session->evlist->last_sample_time);
+
+		if (script.range_num < 0) {
+			pr_err("Invalid time string\n");
+			err = -EINVAL;
+			goto out_delete;
+		}
+	} else {
+		script.range_num = 1;
 	}
 
 	err = __cmd_script(&script);

From 24787afbcd0127859394eb9230659ee6d5dc4644 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:45 +0100
Subject: [PATCH 13/27] perf tools: Enable LIBBABELTRACE by default

There's no reason anymore to treat babel trace in a special way, because
a) we no longer display its state b) the needed babeltrace library is
now out and well adopted among distros.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 2 +-
 tools/perf/Makefile.perf   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index f050f38d8fa3a..12dec6ea5ed21 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -780,7 +780,7 @@ else
   NO_PERF_READ_VDSOX32 := 1
 endif
 
-ifdef LIBBABELTRACE
+ifndef NO_LIBBABELTRACE
   $(call feature_check,libbabeltrace)
   ifeq ($(feature-libbabeltrace), 1)
     CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 68cf1360a3f39..9fdefd748e2e3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -77,7 +77,7 @@ include ../scripts/utilities.mak
 #
 # Define NO_ZLIB if you do not want to support compressed kernel modules
 #
-# Define LIBBABELTRACE if you DO want libbabeltrace support
+# Define NO_LIBBABELTRACE if you do not want libbabeltrace support
 # for CTF data format.
 #
 # Define NO_LZMA if you do not want to support compressed (xz) kernel modules

From db9fc765e8f4d0144d13cdfa4be32d81eae01710 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:46 +0100
Subject: [PATCH 14/27] perf tools: Display perf_event_attr::namespaces debug
 info

Display namespaces bit in -vv debug display:

  $ perf record -vv --namespaces ...
  ...
  perf_event_attr:
    size                             112
    ...
    namespaces                       1

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a4d256ea0dc42..c435b2444153e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1577,6 +1577,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(use_clockid, p_unsigned);
 	PRINT_ATTRf(context_switch, p_unsigned);
 	PRINT_ATTRf(write_backward, p_unsigned);
+	PRINT_ATTRf(namespaces, p_unsigned);
 
 	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
 	PRINT_ATTRf(bp_type, p_unsigned);

From 313ccb96159489eabdbdcf4deb34e7fbac17557d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:47 +0100
Subject: [PATCH 15/27] perf: Allocate context task_ctx_data for child event

Currently we use perf_event_context::task_ctx_data to save and restore
the LBR status when the task is scheduled out and in.

We don't allocate it for child contexts, which results in shorter task's
LBR stack, because we don't save the history from previous run and start
over every time we schedule the task in.

I made a test to generate samples with LBR call stack and got higher
numbers on bigger chain depths:

                            before:     after:
  LBR call chain: nr: 1       60561     498127
  LBR call chain: nr: 2           0          0
  LBR call chain: nr: 3      107030       2172
  LBR call chain: nr: 4      466685      62758
  LBR call chain: nr: 5     2307319     878046
  LBR call chain: nr: 6       48713     495218
  LBR call chain: nr: 7        1040       4551
  LBR call chain: nr: 8         481        172
  LBR call chain: nr: 9         878        120
  LBR call chain: nr: 10       2377       6698
  LBR call chain: nr: 11      28830     151487
  LBR call chain: nr: 12      29347     339867
  LBR call chain: nr: 13          4         22
  LBR call chain: nr: 14          3         53

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Fixes: 4af57ef28c2c ("perf: Add pmu specific data for perf task context")
Link: http://lkml.kernel.org/r/20180107160356.28203-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 kernel/events/core.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4df5b695bf0db..55fb648a32b07 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10703,6 +10703,19 @@ inherit_event(struct perf_event *parent_event,
 	if (IS_ERR(child_event))
 		return child_event;
 
+
+	if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
+	    !child_ctx->task_ctx_data) {
+		struct pmu *pmu = child_event->pmu;
+
+		child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
+						   GFP_KERNEL);
+		if (!child_ctx->task_ctx_data) {
+			free_event(child_event);
+			return NULL;
+		}
+	}
+
 	/*
 	 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
 	 * must be under the same lock in order to serialize against
@@ -10713,6 +10726,7 @@ inherit_event(struct perf_event *parent_event,
 	if (is_orphaned_event(parent_event) ||
 	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 		mutex_unlock(&parent_event->child_mutex);
+		/* task_ctx_data is freed with child_ctx */
 		free_event(child_event);
 		return NULL;
 	}

From 81df978c49379481716aef591de77313c286d747 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:48 +0100
Subject: [PATCH 16/27] perf: Add sample_id to PERF_RECORD_ITRACE_START event
 comment

Adding missing sample_id line into PERF_RECORD_ITRACE_START
event comment.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-5-jolsa@kernel.org
[ Update the tools/include/uapi/linux copy ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h       | 1 +
 tools/include/uapi/linux/perf_event.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b9a4953018ede..8bb66e8da945b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -864,6 +864,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid;
 	 *	u32				tid;
+	 *	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_ITRACE_START		= 12,
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b9a4953018ede..8bb66e8da945b 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -864,6 +864,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid;
 	 *	u32				tid;
+	 *	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_ITRACE_START		= 12,

From 8cf7e0e22414f5acf85ecb7cd0d4482e6c9696ae Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:49 +0100
Subject: [PATCH 17/27] perf: Make perf_callchain function static

And move it to core.c, because there's no caller of this function other
than the one in core.c

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-6-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 kernel/events/callchain.c | 15 ---------------
 kernel/events/core.c      | 16 ++++++++++++++++
 kernel/events/internal.h  |  4 ----
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 1b2be63c85282..772a43fea825c 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -178,21 +178,6 @@ put_callchain_entry(int rctx)
 	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
 }
 
-struct perf_callchain_entry *
-perf_callchain(struct perf_event *event, struct pt_regs *regs)
-{
-	bool kernel = !event->attr.exclude_callchain_kernel;
-	bool user   = !event->attr.exclude_callchain_user;
-	/* Disallow cross-task user callchains. */
-	bool crosstask = event->ctx->task && event->ctx->task != current;
-	const u32 max_stack = event->attr.sample_max_stack;
-
-	if (!kernel && !user)
-		return NULL;
-
-	return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
-}
-
 struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55fb648a32b07..5fc1ded4b450d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5980,6 +5980,22 @@ static u64 perf_virt_to_phys(u64 virt)
 	return phys_addr;
 }
 
+static struct perf_callchain_entry *
+perf_callchain(struct perf_event *event, struct pt_regs *regs)
+{
+	bool kernel = !event->attr.exclude_callchain_kernel;
+	bool user   = !event->attr.exclude_callchain_user;
+	/* Disallow cross-task user callchains. */
+	bool crosstask = event->ctx->task && event->ctx->task != current;
+	const u32 max_stack = event->attr.sample_max_stack;
+
+	if (!kernel && !user)
+		return NULL;
+
+	return get_perf_callchain(regs, 0, kernel, user,
+				  max_stack, crosstask, true);
+}
+
 void perf_prepare_sample(struct perf_event_header *header,
 			 struct perf_sample_data *data,
 			 struct perf_event *event,
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 09b1537ae06cd..6dc725a7e7bc9 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -201,10 +201,6 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
 
 DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
 
-/* Callchain handling */
-extern struct perf_callchain_entry *
-perf_callchain(struct perf_event *event, struct pt_regs *regs);
-
 static inline int get_recursion_context(int *recursion)
 {
 	int rctx;

From 99e818cc88889a2fa2f483b91b372c47b94b7c98 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:50 +0100
Subject: [PATCH 18/27] perf: Return empty callchain instead of NULL

It simplifies the code a bit, because we dump the callchain
Link: http://lkml.kernel.org/n/tip-uqp7qd6aif47g39glnbu95yl@git.kernel.org
even if it's empty. With 'empty' callchain we can remove
all the NULL-checking code paths.

Original-patch-from: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-7-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 kernel/events/core.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5fc1ded4b450d..4e1a1bf8d8670 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5815,19 +5815,11 @@ void perf_output_sample(struct perf_output_handle *handle,
 		perf_output_read(handle, event);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		if (data->callchain) {
-			int size = 1;
-
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
+		int size = 1;
 
-			__output_copy(handle, data->callchain, size);
-		} else {
-			u64 nr = 0;
-			perf_output_put(handle, nr);
-		}
+		size += data->callchain->nr;
+		size *= sizeof(u64);
+		__output_copy(handle, data->callchain, size);
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
@@ -5980,6 +5972,8 @@ static u64 perf_virt_to_phys(u64 virt)
 	return phys_addr;
 }
 
+static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
+
 static struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
@@ -5988,12 +5982,14 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 	/* Disallow cross-task user callchains. */
 	bool crosstask = event->ctx->task && event->ctx->task != current;
 	const u32 max_stack = event->attr.sample_max_stack;
+	struct perf_callchain_entry *callchain;
 
 	if (!kernel && !user)
-		return NULL;
+		return &__empty_callchain;
 
-	return get_perf_callchain(regs, 0, kernel, user,
-				  max_stack, crosstask, true);
+	callchain = get_perf_callchain(regs, 0, kernel, user,
+				       max_stack, crosstask, true);
+	return callchain ?: &__empty_callchain;
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -6018,9 +6014,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 		int size = 1;
 
 		data->callchain = perf_callchain(event, regs);
-
-		if (data->callchain)
-			size += data->callchain->nr;
+		size += data->callchain->nr;
 
 		header->size += size * sizeof(u64);
 	}

From 972c14884728bf5f69ec69cfb1beeec1a9cd29ee Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:51 +0100
Subject: [PATCH 19/27] perf: Update PERF_RECORD_MISC_* comment for
 perf_event_header::misc bit 13

The perf_event_header::misc bit 13 is shared on different events and
next patch is adding yet another bit 13 user.  Updating the comment to
make it more structured and clear which events use bit 13.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-8-jolsa@kernel.org
[ Update the tools/include/uapi/linux copy ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h       | 9 ++++++---
 tools/include/uapi/linux/perf_event.h | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 8bb66e8da945b..c77c9a2ebbbbc 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -612,9 +612,12 @@ struct perf_event_mmap_page {
  */
 #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
 /*
- * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
- * different events so can reuse the same bit position.
- * Ditto PERF_RECORD_MISC_SWITCH_OUT.
+ * Following PERF_RECORD_MISC_* are used on different
+ * events, so can reuse the same bit position:
+ *
+ *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
+ *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 8bb66e8da945b..c77c9a2ebbbbc 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -612,9 +612,12 @@ struct perf_event_mmap_page {
  */
 #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
 /*
- * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
- * different events so can reuse the same bit position.
- * Ditto PERF_RECORD_MISC_SWITCH_OUT.
+ * Following PERF_RECORD_MISC_* are used on different
+ * events, so can reuse the same bit position:
+ *
+ *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
+ *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)

From 28a0b39877f5ed64ae9fadf95dddb90999309dee Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:52 +0100
Subject: [PATCH 20/27] perf script: Add support to display sample misc field

Adding support to display sample misc field in form
of letter for each bit:

  # perf script -F +misc ...
   sched-messaging  1414 K     28690.636582:       4590 cycles ...
   sched-messaging  1407 U     28690.636600:     325620 cycles ...
   sched-messaging  1414 K     28690.636608:      19473 cycles ...
  misc field  __________/

The misc bits are assigned to following letters:

  PERF_RECORD_MISC_KERNEL        K
  PERF_RECORD_MISC_USER          U
  PERF_RECORD_MISC_HYPERVISOR    H
  PERF_RECORD_MISC_GUEST_KERNEL  G
  PERF_RECORD_MISC_GUEST_USER    g
  PERF_RECORD_MISC_MMAP_DATA*    M
  PERF_RECORD_MISC_COMM_EXEC     E
  PERF_RECORD_MISC_SWITCH_OUT    S

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-9-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 20 ++++++-
 tools/perf/builtin-script.c              | 74 ++++++++++++++++++++----
 tools/perf/util/event.h                  |  1 +
 tools/perf/util/evsel.c                  |  1 +
 4 files changed, 84 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 7b622a812a725..93ae8d60e3d33 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-	brstackoff, callindent, insn, insnlen, synth, phys_addr, metric.
+        brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
@@ -225,6 +225,24 @@ OPTIONS
 	that the metric computed is averaged over the whole sampling
 	period, not just for the sample point.
 
+	For sample events it's possible to display misc field with -F +misc option,
+	following letters are displayed for each bit:
+
+	  PERF_RECORD_MISC_KERNEL        K
+	  PERF_RECORD_MISC_USER          U
+	  PERF_RECORD_MISC_HYPERVISOR    H
+	  PERF_RECORD_MISC_GUEST_KERNEL  G
+	  PERF_RECORD_MISC_GUEST_USER    g
+	  PERF_RECORD_MISC_MMAP_DATA*    M
+	  PERF_RECORD_MISC_COMM_EXEC     E
+	  PERF_RECORD_MISC_SWITCH_OUT    S
+
+	  $ perf script -F +misc ...
+	   sched-messaging  1414 K     28690.636582:       4590 cycles ...
+	   sched-messaging  1407 U     28690.636600:     325620 cycles ...
+	   sched-messaging  1414 K     28690.636608:      19473 cycles ...
+	  misc field ___________/
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 330dcd9b9b8ff..bb603495cf4a5 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -93,6 +93,7 @@ enum perf_output_field {
 	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
 	PERF_OUTPUT_UREGS	    = 1U << 27,
 	PERF_OUTPUT_METRIC	    = 1U << 28,
+	PERF_OUTPUT_MISC            = 1U << 29,
 };
 
 struct output_option {
@@ -128,6 +129,7 @@ struct output_option {
 	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
 	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
 	{.str = "metric", .field = PERF_OUTPUT_METRIC},
+	{.str = "misc", .field = PERF_OUTPUT_MISC},
 };
 
 enum {
@@ -594,7 +596,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
 
 static int perf_sample__fprintf_start(struct perf_sample *sample,
 				      struct thread *thread,
-				      struct perf_evsel *evsel, FILE *fp)
+				      struct perf_evsel *evsel,
+				      u32 type, FILE *fp)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	unsigned long secs;
@@ -624,6 +627,47 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
 			printed += fprintf(fp, "[%03d] ", sample->cpu);
 	}
 
+	if (PRINT_FIELD(MISC)) {
+		int ret = 0;
+
+		#define has(m) \
+			(sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m
+
+		if (has(KERNEL))
+			ret += fprintf(fp, "K");
+		if (has(USER))
+			ret += fprintf(fp, "U");
+		if (has(HYPERVISOR))
+			ret += fprintf(fp, "H");
+		if (has(GUEST_KERNEL))
+			ret += fprintf(fp, "G");
+		if (has(GUEST_USER))
+			ret += fprintf(fp, "g");
+
+		switch (type) {
+		case PERF_RECORD_MMAP:
+		case PERF_RECORD_MMAP2:
+			if (has(MMAP_DATA))
+				ret += fprintf(fp, "M");
+			break;
+		case PERF_RECORD_COMM:
+			if (has(COMM_EXEC))
+				ret += fprintf(fp, "E");
+			break;
+		case PERF_RECORD_SWITCH:
+		case PERF_RECORD_SWITCH_CPU_WIDE:
+			if (has(SWITCH_OUT))
+				ret += fprintf(fp, "S");
+		default:
+			break;
+		}
+
+		#undef has
+
+		ret += fprintf(fp, "%*s", 6 - ret, " ");
+		printed += ret;
+	}
+
 	if (PRINT_FIELD(TIME)) {
 		nsecs = sample->time;
 		secs = nsecs / NSEC_PER_SEC;
@@ -1502,7 +1546,7 @@ static void script_print_metric(void *ctx, const char *color,
 	if (!fmt)
 		return;
 	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
-				   mctx->fp);
+				   PERF_RECORD_SAMPLE, mctx->fp);
 	fputs("\tmetric: ", mctx->fp);
 	if (color)
 		color_fprintf(mctx->fp, color, fmt, val);
@@ -1516,7 +1560,7 @@ static void script_new_line(void *ctx)
 	struct metric_ctx *mctx = ctx;
 
 	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
-				   mctx->fp);
+				   PERF_RECORD_SAMPLE, mctx->fp);
 	fputs("\tmetric: ", mctx->fp);
 }
 
@@ -1584,7 +1628,8 @@ static void process_event(struct perf_script *script,
 
 	++es->samples;
 
-	perf_sample__fprintf_start(sample, thread, evsel, fp);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_SAMPLE, fp);
 
 	if (PRINT_FIELD(PERIOD))
 		fprintf(fp, "%10" PRIu64 " ", sample->period);
@@ -1833,7 +1878,8 @@ static int process_comm_event(struct perf_tool *tool,
 		sample->tid = event->comm.tid;
 		sample->pid = event->comm.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_COMM, stdout);
 	perf_event__fprintf(event, stdout);
 	ret = 0;
 out:
@@ -1868,7 +1914,8 @@ static int process_namespaces_event(struct perf_tool *tool,
 		sample->tid = event->namespaces.tid;
 		sample->pid = event->namespaces.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_NAMESPACES, stdout);
 	perf_event__fprintf(event, stdout);
 	ret = 0;
 out:
@@ -1901,7 +1948,8 @@ static int process_fork_event(struct perf_tool *tool,
 		sample->tid = event->fork.tid;
 		sample->pid = event->fork.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_FORK, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 
@@ -1930,7 +1978,8 @@ static int process_exit_event(struct perf_tool *tool,
 		sample->tid = event->fork.tid;
 		sample->pid = event->fork.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_EXIT, stdout);
 	perf_event__fprintf(event, stdout);
 
 	if (perf_event__process_exit(tool, event, sample, machine) < 0)
@@ -1965,7 +2014,8 @@ static int process_mmap_event(struct perf_tool *tool,
 		sample->tid = event->mmap.tid;
 		sample->pid = event->mmap.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_MMAP, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 	return 0;
@@ -1996,7 +2046,8 @@ static int process_mmap2_event(struct perf_tool *tool,
 		sample->tid = event->mmap2.tid;
 		sample->pid = event->mmap2.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_MMAP2, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 	return 0;
@@ -2022,7 +2073,8 @@ static int process_switch_event(struct perf_tool *tool,
 		return -1;
 	}
 
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_SWITCH, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 	return 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1ae95efbfb95d..e5fbd6dd1b01c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -205,6 +205,7 @@ struct perf_sample {
 	u32 flags;
 	u16 insn_len;
 	u8  cpumode;
+	u16 misc;
 	char insn[MAX_INSN];
 	void *raw_data;
 	struct ip_callchain *callchain;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c435b2444153e..d934f04e3110e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2042,6 +2042,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	data->stream_id = data->id = data->time = -1ULL;
 	data->period = evsel->attr.sample_period;
 	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	data->misc    = event->header.misc;
 	data->id = -1ULL;
 	data->data_src = PERF_MEM_DATA_SRC_NONE;
 

From 3d7c27b6dbca4c90e7d921b45c2240e7c3cb92a2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:53 +0100
Subject: [PATCH 21/27] perf script: Add support to display lost events

Adding option to display lost events:

  $ perf script --show-lost-events ...
   mplayer 13810 [002] 468011.402396:        100 cycles:ppp:  ff..
   mplayer 13810 [002] 468011.402396: PERF_RECORD_LOST lost 3880
   mplayer 13810 [002] 468011.402397:        100 cycles:ppp:  ff..

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-10-jolsa@kernel.org
[ Use PRIu64 when printing u64 values, fixing the build in some arches ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  3 +++
 tools/perf/builtin-script.c              | 28 ++++++++++++++++++++++++
 tools/perf/util/event.c                  |  8 +++++++
 3 files changed, 39 insertions(+)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 93ae8d60e3d33..806ec6391fd6d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -300,6 +300,9 @@ OPTIONS
 	Display context switch events i.e. events of type PERF_RECORD_SWITCH or
 	PERF_RECORD_SWITCH_CPU_WIDE.
 
+--show-lost-events
+	Display lost events i.e. events of type PERF_RECORD_LOST.
+
 --demangle::
 	Demangle symbol names to human readable form. It's enabled by default,
 	disable with --no-demangle.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bb603495cf4a5..c1cce474c0f10 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1489,6 +1489,7 @@ struct perf_script {
 	bool			show_mmap_events;
 	bool			show_switch_events;
 	bool			show_namespace_events;
+	bool			show_lost_events;
 	bool			allocated;
 	bool			per_event_dump;
 	struct cpu_map		*cpus;
@@ -2080,6 +2081,29 @@ static int process_switch_event(struct perf_tool *tool,
 	return 0;
 }
 
+static int
+process_lost_event(struct perf_tool *tool,
+		   union perf_event *event,
+		   struct perf_sample *sample,
+		   struct machine *machine)
+{
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	struct thread *thread;
+
+	thread = machine__findnew_thread(machine, sample->pid,
+					 sample->tid);
+	if (thread == NULL)
+		return -1;
+
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_LOST, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+	return 0;
+}
+
 static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
@@ -2174,6 +2198,8 @@ static int __cmd_script(struct perf_script *script)
 		script->tool.context_switch = process_switch_event;
 	if (script->show_namespace_events)
 		script->tool.namespaces = process_namespaces_event;
+	if (script->show_lost_events)
+		script->tool.lost = process_lost_event;
 
 	if (perf_script__setup_per_event_dump(script)) {
 		pr_err("Couldn't create the per event dump files\n");
@@ -3110,6 +3136,8 @@ int cmd_script(int argc, const char **argv)
 		    "Show context switch events (if recorded)"),
 	OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
 		    "Show namespace events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
+		    "Show lost events (if recorded)"),
 	OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
 		    "Dump trace output to files named by the monitored events"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 97a8ef9980db7..44e603c279449 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
 		       event->context_switch.next_prev_tid);
 }
 
+static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
+}
+
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 {
 	size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 	case PERF_RECORD_SWITCH_CPU_WIDE:
 		ret += perf_event__fprintf_switch(event, fp);
 		break;
+	case PERF_RECORD_LOST:
+		ret += perf_event__fprintf_lost(event, fp);
+		break;
 	default:
 		ret += fprintf(fp, "\n");
 	}

From 075ca1ebb25e798e4072a1e3a482b829bb51afb2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:54 +0100
Subject: [PATCH 22/27] perf tools: Make the tool's warning messages optional

I want to display the pure events status coming in the next patch and
the tool's warnings are superfluous in the output. Making it optional,
enabled by default.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-11-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 6 ++++--
 tools/perf/util/tool.h    | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 54e30f1bcbd7a..8d0fa2f8da169 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1773,7 +1773,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
 	err = perf_session__flush_thread_stacks(session);
 out_err:
 	free(buf);
-	perf_session__warn_about_errors(session);
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
 	ordered_events__free(&session->ordered_events);
 	auxtrace__free_events(session);
 	return err;
@@ -1929,7 +1930,8 @@ static int __perf_session__process_events(struct perf_session *session,
 	err = perf_session__flush_thread_stacks(session);
 out_err:
 	ui_progress__finish();
-	perf_session__warn_about_errors(session);
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
 	/*
 	 * We may switching perf.data output, make ordered_events
 	 * reusable.
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 2532b558099b7..183c914535228 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -76,6 +76,7 @@ struct perf_tool {
 	bool		ordered_events;
 	bool		ordering_requires_timestamps;
 	bool		namespace_events;
+	bool		no_warn;
 	enum show_feature_header show_feat_hdr;
 };
 

From a4a4d0a7a2b20f7880262de4f51685baaf693476 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:55 +0100
Subject: [PATCH 23/27] perf report: Add --stats option to display quick data
 statistics

Add --stats option to display quick data statistics of event numbers,
without any further processing, like the one at the end of the perf
report -D command.

  $ perf report --stat

  Aggregated stats:
             TOTAL events:       4566
              MMAP events:        113
              LOST events:         19
              COMM events:          3
              FORK events:        400
            SAMPLE events:       3315
             MMAP2 events:         32
    FINISHED_ROUND events:        681
        THREAD_MAP events:          1
           CPU_MAP events:          1
         TIME_CONV events:          1

I found this useful when hunting lost events for another change.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-12-jolsa@kernel.org
[ Rename it to --stats, plural ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  4 ++++
 tools/perf/builtin-report.c              | 26 +++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 1e02c4e1a81f0..a7d11ef2fe258 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -457,6 +457,10 @@ include::itrace.txt[]
 	will be printed. Each entry is function name or file/line. Enabled by
 	default, disable with --no-inline.
 
+--stats::
+	Display overall events statistics without any further processing.
+	(like the one at the end of the perf report -D command)
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 770bf8a614f22..8e67a8c25ab1c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -62,6 +62,7 @@ struct report {
 	bool			show_threads;
 	bool			inverted_callchain;
 	bool			mem_mode;
+	bool			stats_mode;
 	bool			header;
 	bool			header_only;
 	bool			nonany_branch_mode;
@@ -588,6 +589,20 @@ static void report__output_resort(struct report *rep)
 	ui_progress__finish();
 }
 
+static void stats_setup(struct report *rep)
+{
+	memset(&rep->tool, 0, sizeof(rep->tool));
+	rep->tool.no_warn = true;
+}
+
+static int stats_print(struct report *rep)
+{
+	struct perf_session *session = rep->session;
+
+	perf_session__fprintf_nr_events(session, stdout);
+	return 0;
+}
+
 static int __cmd_report(struct report *rep)
 {
 	int ret;
@@ -619,12 +634,18 @@ static int __cmd_report(struct report *rep)
 		return ret;
 	}
 
+	if (rep->stats_mode)
+		stats_setup(rep);
+
 	ret = perf_session__process_events(session);
 	if (ret) {
 		ui__error("failed to process sample\n");
 		return ret;
 	}
 
+	if (rep->stats_mode)
+		return stats_print(rep);
+
 	report__warn_kptr_restrict(rep);
 
 	evlist__for_each_entry(session->evlist, pos)
@@ -781,6 +802,7 @@ int cmd_report(int argc, const char **argv)
 	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
@@ -1042,6 +1064,8 @@ int cmd_report(int argc, const char **argv)
 		report.tool.show_feat_hdr = SHOW_FEAT_HEADER;
 	if (report.show_full_info)
 		report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
+	if (report.stats_mode)
+		use_browser = 0;
 
 	if (strcmp(input_name, "-") != 0)
 		setup_browser(true);
@@ -1064,7 +1088,7 @@ int cmd_report(int argc, const char **argv)
 			ret = 0;
 			goto error;
 		}
-	} else if (use_browser == 0 && !quiet) {
+	} else if (use_browser == 0 && !quiet && !report.stats_mode) {
 		fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
 		      stdout);
 	}

From 2d1073def3cb69aa44f99be7ef42da7cc561be1f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 9 Jan 2018 12:03:47 -0300
Subject: [PATCH 24/27] perf trace: Beautify 'gettid' syscall result

Before:

  # trace -a -e gettid sleep 0.01
<SNIP>
     4.863 ( 0.005 ms): Chrome_ChildIO/26241 gettid() = 26241
     4.931 ( 0.004 ms): Chrome_IOThrea/26154 gettid() = 26154
     4.942 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154
     4.946 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154
     4.970 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154
  #

After:

  # trace -a -e gettid sleep 0.01
     0.000 ( 0.009 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread)
<SNIP>
     3.416 ( 0.002 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT)
     3.424 ( 0.001 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT)
     3.343 ( 0.002 ms): chrome/26116 gettid() = 26116 (chrome)
     3.386 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread)
     4.003 ( 0.003 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT)
     4.031 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread)
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-kyg4gz2yy0vkrrh2vtq29u71@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 7c57898095ea3..71e64bdca86fc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -622,6 +622,7 @@ static struct syscall_fmt {
 	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
 	{ .name	    = "getrlimit",
 	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+	{ .name	    = "gettid",	    .errpid = true, },
 	{ .name	    = "ioctl",
 	  .arg = {
 #if defined(__i386__) || defined(__x86_64__)

From 930f8b3479444d264aa33e008c4b00b86e8c62cc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:56 +0100
Subject: [PATCH 25/27] perf report: Add --tasks option to display monitored
 tasks

Add --tasks option to display monitored tasks stored in perf.data.
Displaying pid/tid/ppid plus the command string aligned to distinguish
parent and child tasks.

  $ perf record -a
  ...
  $ perf report --tasks
  #     pid     tid    ppid  comm
          0       0      -1 |swapper
          2       2       0 | kthreadd
      14080   14080       2 |  kworker/u17:1
          4       4       2 |  kworker/0:0H
          6       6       2 |  mm_percpu_wq
  ...
          1       1       0 | systemd
      23242   23242       1 |  firefox
      23242   23298   23242 |   Cache2 I/O
      23242   23304   23242 |   GMPThread
  ...
       1195    1195       1 |  login
       1611    1611    1195 |   bash
       1639    1639    1611 |    startx
       1663    1663    1639 |     xinit
       1673    1673    1663 |      xmonad-x86_64-l
      23939   23939    1673 |       xterm
      23941   23941   23939 |        bash
      23963   23963   23941 |         mutt
      24954   24954   23963 |          offlineimap

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-13-jolsa@kernel.org
[ Make it --tasks, plural, --task works as well, as its unambiguous ]
[ Use machine__find_thread(), not findnew(), as pointed out by Namhyung ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |   4 +
 tools/perf/builtin-report.c              | 136 ++++++++++++++++++++++-
 2 files changed, 138 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a7d11ef2fe258..856c3c7e94fae 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -461,6 +461,10 @@ include::itrace.txt[]
 	Display overall events statistics without any further processing.
 	(like the one at the end of the perf report -D command)
 
+--tasks::
+	Display monitored tasks stored in perf data. Displaying pid/tid/ppid
+	plus the command string aligned to distinguish parent and child tasks.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8e67a8c25ab1c..2c7bd85651dca 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -15,6 +15,7 @@
 #include "util/color.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/err.h>
 #include "util/symbol.h"
 #include "util/callchain.h"
 #include "util/values.h"
@@ -63,6 +64,7 @@ struct report {
 	bool			inverted_callchain;
 	bool			mem_mode;
 	bool			stats_mode;
+	bool			tasks_mode;
 	bool			header;
 	bool			header_only;
 	bool			nonany_branch_mode;
@@ -603,6 +605,124 @@ static int stats_print(struct report *rep)
 	return 0;
 }
 
+static void tasks_setup(struct report *rep)
+{
+	memset(&rep->tool, 0, sizeof(rep->tool));
+	rep->tool.comm = perf_event__process_comm;
+	rep->tool.exit = perf_event__process_exit;
+	rep->tool.fork = perf_event__process_fork;
+	rep->tool.no_warn = true;
+}
+
+struct task {
+	struct thread		*thread;
+	struct list_head	 list;
+	struct list_head	 children;
+};
+
+static struct task *tasks_list(struct task *task, struct machine *machine)
+{
+	struct thread *parent_thread, *thread = task->thread;
+	struct task   *parent_task;
+
+	/* Already listed. */
+	if (!list_empty(&task->list))
+		return NULL;
+
+	/* Last one in the chain. */
+	if (thread->ppid == -1)
+		return task;
+
+	parent_thread = machine__find_thread(machine, -1, thread->ppid);
+	if (!parent_thread)
+		return ERR_PTR(-ENOENT);
+
+	parent_task = thread__priv(parent_thread);
+	list_add_tail(&task->list, &parent_task->children);
+	return tasks_list(parent_task, machine);
+}
+
+static void task__print_level(struct task *task, FILE *fp, int level)
+{
+	struct thread *thread = task->thread;
+	struct task *child;
+
+	fprintf(fp, "  %8d %8d %8d |%*s%s\n",
+		thread->pid_, thread->tid, thread->ppid,
+		level, "", thread__comm_str(thread));
+
+	if (!list_empty(&task->children)) {
+		list_for_each_entry(child, &task->children, list)
+			task__print_level(child, fp, level + 1);
+	}
+}
+
+static int tasks_print(struct report *rep, FILE *fp)
+{
+	struct perf_session *session = rep->session;
+	struct machine      *machine = &session->machines.host;
+	struct task *tasks, *task;
+	unsigned int nr = 0, itask = 0, i;
+	struct rb_node *nd;
+	LIST_HEAD(list);
+
+	/*
+	 * No locking needed while accessing machine->threads,
+	 * because --tasks is single threaded command.
+	 */
+
+	/* Count all the threads. */
+	for (i = 0; i < THREADS__TABLE_SIZE; i++)
+		nr += machine->threads[i].nr;
+
+	tasks = malloc(sizeof(*tasks) * nr);
+	if (!tasks)
+		return -ENOMEM;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+
+		for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+			task = tasks + itask++;
+
+			task->thread = rb_entry(nd, struct thread, rb_node);
+			INIT_LIST_HEAD(&task->children);
+			INIT_LIST_HEAD(&task->list);
+			thread__set_priv(task->thread, task);
+		}
+	}
+
+	/*
+	 * Iterate every task down to the unprocessed parent
+	 * and link all in task children list. Task with no
+	 * parent is added into 'list'.
+	 */
+	for (itask = 0; itask < nr; itask++) {
+		task = tasks + itask;
+
+		if (!list_empty(&task->list))
+			continue;
+
+		task = tasks_list(task, machine);
+		if (IS_ERR(task)) {
+			pr_err("Error: failed to process tasks\n");
+			free(tasks);
+			return PTR_ERR(task);
+		}
+
+		if (task)
+			list_add_tail(&task->list, &list);
+	}
+
+	fprintf(fp, "# %8s %8s %8s  %s\n", "pid", "tid", "ppid", "comm");
+
+	list_for_each_entry(task, &list, list)
+		task__print_level(task, fp, 0);
+
+	free(tasks);
+	return 0;
+}
+
 static int __cmd_report(struct report *rep)
 {
 	int ret;
@@ -637,6 +757,9 @@ static int __cmd_report(struct report *rep)
 	if (rep->stats_mode)
 		stats_setup(rep);
 
+	if (rep->tasks_mode)
+		tasks_setup(rep);
+
 	ret = perf_session__process_events(session);
 	if (ret) {
 		ui__error("failed to process sample\n");
@@ -646,6 +769,9 @@ static int __cmd_report(struct report *rep)
 	if (rep->stats_mode)
 		return stats_print(rep);
 
+	if (rep->tasks_mode)
+		return tasks_print(rep, stdout);
+
 	report__warn_kptr_restrict(rep);
 
 	evlist__for_each_entry(session->evlist, pos)
@@ -803,6 +929,7 @@ int cmd_report(int argc, const char **argv)
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
+	OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
@@ -1064,8 +1191,12 @@ int cmd_report(int argc, const char **argv)
 		report.tool.show_feat_hdr = SHOW_FEAT_HEADER;
 	if (report.show_full_info)
 		report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
-	if (report.stats_mode)
+	if (report.stats_mode || report.tasks_mode)
 		use_browser = 0;
+	if (report.stats_mode && report.tasks_mode) {
+		pr_err("Error: --tasks and --stats options cannot be used together\n");
+		goto error;
+	}
 
 	if (strcmp(input_name, "-") != 0)
 		setup_browser(true);
@@ -1088,7 +1219,8 @@ int cmd_report(int argc, const char **argv)
 			ret = 0;
 			goto error;
 		}
-	} else if (use_browser == 0 && !quiet && !report.stats_mode) {
+	} else if (use_browser == 0 && !quiet &&
+		   !report.stats_mode && !report.tasks_mode) {
 		fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
 		      stdout);
 	}

From 6439d7d16c94324300eb392ed85e3632e489e197 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 9 Jan 2018 15:25:03 -0300
Subject: [PATCH 26/27] perf report: Introduce --mmaps

Similar to --tasks, producing the same output plus /proc/<PID>/maps
similar lines for each mmap record present in a perf.data file.

Please note that not all mmaps are stored, for instance, some of the
non-executable mmaps are only stored when 'perf record --data' is used,
when the user wants to resolve data accesses in addition to asking for
executable mmaps to get the DSO with symtabs.

E.g.:

  # perf record sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.018 MB perf.data (7 samples) ]
  [root@jouet ~]# perf report --mmaps
  #      pid      tid     ppid  comm
           0        0       -1 |swapper
        4137     4137       -1 |sleep
                                  5628a35a1000-5628a37aa000 r-xp 00000000 3147148 /usr/bin/sleep
                                  7fb65ad51000-7fb65b134000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so
                                  7fb65b134000-7fb65b35e000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so
                                  7ffd94b9f000-7ffd94ba1000 r-xp 00000000 0 [vdso]
  #
  # perf record sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.019 MB perf.data (8 samples) ]
  # perf report --mmaps
  #      pid      tid     ppid  comm
           0        0       -1 |swapper
        4161     4161       -1 |sleep
                                  55afae69a000-55afae8a3000 r-xp 00000000 3147148 /usr/bin/sleep
                                  7f569f00d000-7f569f3f0000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so
                                  7f569f3f0000-7f569f61a000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so
                                  7fff6fffe000-7fff70000000 r-xp 00000000 0 [vdso]
  #
  # perf record time sleep 1
  0.00user 0.00system 0:01.00elapsed 0%CPU (0avgtext+0avgdata 2156maxresident)k
  0inputs+0outputs (0major+73minor)pagefaults 0swaps
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.019 MB perf.data (14 samples) ]
  # perf report --mmaps
  #      pid      tid     ppid  comm
           0        0       -1 |swapper
        4281     4281       -1 |time
                                  560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time
                                  7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so
                                  7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so
                                  7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso]
        4282     4282     4281 | sleep
                                   560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time
                                   564b4de3c000-564b4e045000 r-xp 00000000 3147148 /usr/bin/sleep
                                   7f6a5a716000-7f6a5aaf9000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so
                                   7f6a5aaf9000-7f6a5ad23000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so
                                   7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so
                                   7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so
                                   7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso]
                                   7ffcec7e6000-7ffcec7e8000 r-xp 00000000 0 [vdso]
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-zulwdlg5rfowogr1qznorvvc@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  9 ++++-
 tools/perf/builtin-report.c              | 50 ++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 856c3c7e94fae..63d0db3184c90 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -457,6 +457,13 @@ include::itrace.txt[]
 	will be printed. Each entry is function name or file/line. Enabled by
 	default, disable with --no-inline.
 
+--mmaps::
+	Show --tasks output plus mmap information in a format similar to
+	/proc/<PID>/maps.
+
+	Please note that not all mmaps are stored, options affecting which ones
+	are include 'perf record --data', for instance.
+
 --stats::
 	Display overall events statistics without any further processing.
 	(like the one at the end of the perf report -D command)
@@ -469,4 +476,4 @@ include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
 --------
-linkperf:perf-stat[1], linkperf:perf-annotate[1]
+linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2c7bd85651dca..dd4df9a5cd061 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -52,6 +52,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <linux/mman.h>
 
 #define PTIME_RANGE_MAX	10
 
@@ -65,6 +66,7 @@ struct report {
 	bool			mem_mode;
 	bool			stats_mode;
 	bool			tasks_mode;
+	bool			mmaps_mode;
 	bool			header;
 	bool			header_only;
 	bool			nonany_branch_mode;
@@ -608,6 +610,10 @@ static int stats_print(struct report *rep)
 static void tasks_setup(struct report *rep)
 {
 	memset(&rep->tool, 0, sizeof(rep->tool));
+	if (rep->mmaps_mode) {
+		rep->tool.mmap = perf_event__process_mmap;
+		rep->tool.mmap2 = perf_event__process_mmap2;
+	}
 	rep->tool.comm = perf_event__process_comm;
 	rep->tool.exit = perf_event__process_exit;
 	rep->tool.fork = perf_event__process_fork;
@@ -642,14 +648,46 @@ static struct task *tasks_list(struct task *task, struct machine *machine)
 	return tasks_list(parent_task, machine);
 }
 
+static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
+{
+	size_t printed = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+		struct map *map = rb_entry(nd, struct map, rb_node);
+
+		printed += fprintf(fp, "%*s  %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n",
+				   indent, "", map->start, map->end,
+				   map->prot & PROT_READ ? 'r' : '-',
+				   map->prot & PROT_WRITE ? 'w' : '-',
+				   map->prot & PROT_EXEC ? 'x' : '-',
+				   map->flags & MAP_SHARED ? 's' : 'p',
+				   map->pgoff,
+				   map->ino, map->dso->name);
+	}
+
+	return printed;
+}
+
+static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
+{
+	int printed = 0, i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		printed += maps__fprintf_task(&mg->maps[i], indent, fp);
+	return printed;
+}
+
 static void task__print_level(struct task *task, FILE *fp, int level)
 {
 	struct thread *thread = task->thread;
 	struct task *child;
+	int comm_indent = fprintf(fp, "  %8d %8d %8d |%*s",
+				  thread->pid_, thread->tid, thread->ppid,
+				  level, "");
+
+	fprintf(fp, "%s\n", thread__comm_str(thread));
 
-	fprintf(fp, "  %8d %8d %8d |%*s%s\n",
-		thread->pid_, thread->tid, thread->ppid,
-		level, "", thread__comm_str(thread));
+	map_groups__fprintf_task(thread->mg, comm_indent, fp);
 
 	if (!list_empty(&task->children)) {
 		list_for_each_entry(child, &task->children, list)
@@ -930,6 +968,7 @@ int cmd_report(int argc, const char **argv)
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
 	OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"),
+	OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
@@ -1077,6 +1116,9 @@ int cmd_report(int argc, const char **argv)
 		report.symbol_filter_str = argv[0];
 	}
 
+	if (report.mmaps_mode)
+		report.tasks_mode = true;
+
 	if (quiet)
 		perf_quiet_option();
 
@@ -1194,7 +1236,7 @@ int cmd_report(int argc, const char **argv)
 	if (report.stats_mode || report.tasks_mode)
 		use_browser = 0;
 	if (report.stats_mode && report.tasks_mode) {
-		pr_err("Error: --tasks and --stats options cannot be used together\n");
+		pr_err("Error: --tasks and --mmaps can't be used together with --stats\n");
 		goto error;
 	}
 

From 5d64db2966e38bfd99114ecf0b54f97d33023dcd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 10 Jan 2018 11:36:07 -0300
Subject: [PATCH 27/27] tools headers: Synchronize kernel <-> tooling headers

Two kernel headers got modified recently due to meltdown/spectre, in:

  a89f040fa34e ("x86/cpufeatures: Add X86_BUG_CPU_INSECURE")

which are used by tooling as well:

  arch/x86/include/asm/cpufeatures.h
  arch/x86/include/asm/disabled-features.h

None of those changes have an effect on tooling, so do a plain copy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qqzcs8ri3vks8cypg0puk0ae@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h       | 4 +++-
 tools/arch/x86/include/asm/disabled-features.h | 8 +++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 800104c8a3edf..21ac898df2d8e 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,12 @@
 #define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -340,5 +341,6 @@
 #define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 14d6d50073142..b027633e73003 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
 # define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
 #endif
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI		0
+#else
+# define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -60,7 +66,7 @@
 #define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
-#define DISABLED_MASK7	0
+#define DISABLED_MASK7	(DISABLE_PTI)
 #define DISABLED_MASK8	0
 #define DISABLED_MASK9	(DISABLE_MPX)
 #define DISABLED_MASK10	0