From 726647d0526c5c2f3472010677122b89d9e4ef88 Mon Sep 17 00:00:00 2001 From: Jack Henschel Date: Thu, 24 Aug 2017 15:20:22 +0200 Subject: [PATCH 01/18] perf stat: Fix path to PMU formats in documentation As defined in tools/perf/util/pmu.c, the EVENT_SOURCE_DEVICE_PATH is /sys/bus/event_source/devices/ (no traling 's' in event_source) This patch corrects the path in the perf stat documentation Signed-off-by: Jack Henschel Cc: Alexander Shishkin Cc: Jack Henschel Cc: Peter Zijlstra Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/20170824132022.10934-1-jackdev@mailbox.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 698076313606a..c37d61682dfb1 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -41,13 +41,13 @@ report:: - a symbolically formed event like 'pmu/param1=0x3,param2/' where param1 and param2 are defined as formats for the PMU in - /sys/bus/event_sources/devices//format/* + /sys/bus/event_source/devices//format/* - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' where M, N, K are numbers (in decimal, hex, octal format). Acceptable values for each of 'config', 'config1' and 'config2' parameters are defined by corresponding entries in - /sys/bus/event_sources/devices//format/* + /sys/bus/event_source/devices//format/* -i:: --no-inherit:: From 6bd76b8fabe157233e498931c3f9298ee7128a28 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 25 Aug 2017 15:45:10 -0300 Subject: [PATCH 02/18] perf tools: Fix static build with newer toolchains We can't pass --dynamic-list list into static build anymore, because compilers starts to scream about that. Fedora 26 started to fail build with following error: $ make LDFLAGS=-static ... /usr/bin/ld: dynamic STT_GNU_IFUNC symbol `strcmp' with pointer equality in `/usr/lib/gcc/x86_64-redhat-linux/7/../../../../lib64/libc.a(strcmp.o +)' can not be used when making an executable; recompile with -fPIE and relink with -pie There's no sense for --dynamic-list in static build, because there's no .dynsym table in static binary. Consequently the traceevent plugins have never worked with static build, but it was quietly passed by. To fix this in future I think we should add support to compile plugins within the perf binary directly for static build. Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Link: http://lkml.kernel.org/n/tip-jeg6a7ff9j9hlqn8k4gllzvv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 70ddc65f898dd..a700a079a2181 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -279,7 +279,13 @@ LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) + +# +# The static build has no dynsym table, so this does not work for +# static build. Looks like linker starts to scream about that now +# (in Fedora 26) so we need to switch it off for static build. +DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) +LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) LIBAPI = $(API_PATH)libapi.a export LIBAPI From 12c15302dd4b768105d4b7a487ed4858ccab94fc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 10:57:32 +0200 Subject: [PATCH 03/18] perf c2c: Fix remote HITM detection for Skylake Skylake introduced new mem_remote bit in union perf_mem_data_src [1]. It applies to any other memory level to express Remote unknown level, as is reported by Skylake. Adding this extra check to c2c_decode_stats to properly decode remote HITMs on Skylake. [1] http://lkml.kernel.org/r/20170816222156.19953-4-andi@firstfloor.org Signed-off-by: Jiri Olsa Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824085732.28481-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ced4f3fff0357..28afe5fa84d61 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -316,6 +316,11 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) u64 lvl = data_src->mem_lvl; u64 snoop = data_src->mem_snoop; u64 lock = data_src->mem_lock; + /* + * Skylake might report unknown remote level via this + * bit, consider it when evaluating remote HITMs. + */ + bool mrem = data_src->mem_remote; int err = 0; #define HITM_INC(__f) \ @@ -361,7 +366,8 @@ do { \ } if ((lvl & P(LVL, REM_RAM1)) || - (lvl & P(LVL, REM_RAM2))) { + (lvl & P(LVL, REM_RAM2)) || + mrem) { stats->rmt_dram++; if (snoop & P(SNOOP, HIT)) stats->ld_shared++; @@ -371,7 +377,8 @@ do { \ } if ((lvl & P(LVL, REM_CCE1)) || - (lvl & P(LVL, REM_CCE2))) { + (lvl & P(LVL, REM_CCE2)) || + mrem) { if (snoop & P(SNOOP, HIT)) stats->rmt_hit++; else if (snoop & P(SNOOP, HITM)) From a17f06978769735ab5c7598c46881fa201e9b1a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:31 +0200 Subject: [PATCH 04/18] perf record: Set read_format for inherit_stat Set read_format for what we expect to get from read event generated by perf_event_attr::inherit_stat. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a5888c704e017..d9bd632ed7db3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -902,8 +902,13 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, if (opts->no_samples) attr->sample_freq = 0; - if (opts->inherit_stat) + if (opts->inherit_stat) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; attr->inherit_stat = 1; + } if (opts->sample_address) { perf_evsel__set_sample_bit(evsel, ADDR); From dac7f6b7ed1c8601358357f60e9764a4c6a68d71 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:32 +0200 Subject: [PATCH 05/18] perf report: Add dump_read function Adding dump_read function to gather all the dump output of read function. Adding output of enabled and running times and id if enabled (3 new lines with '...' prefix below). $ perf record -s ... $ perf report -D 958358311769 0x91f8 [0x40]: PERF_RECORD_READ: 3339 3339 cycles:u 0 ... time enabled : 958358313731 ... time running : 958358313731 ... id : 80 Committer note: Do not use 'read' as a variable name as it breaks the build on older systems, such as RHEL6: CC /tmp/build/perf/util/session.o cc1: warnings being treated as errors util/session.c: In function 'dump_read': util/session.c:1132: error: declaration of 'read' shadows a global declaration /usr/include/bits/unistd.h:35: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/util/.session.o.tmp': No such file or directory Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ---- tools/perf/util/session.c | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bace3429c0309..9e4004b08f559 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,10 +249,6 @@ static int process_read_event(struct perf_tool *tool, return err; } - dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - evsel ? perf_evsel__name(evsel) : "FAIL", - event->read.value); - return 0; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dc453f84a14c2..ac863691605f3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1127,6 +1127,30 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } +static void dump_read(struct perf_evsel *evsel, union perf_event *event) +{ + struct read_event *read_event = &event->read; + u64 read_format; + + if (!dump_trace) + return; + + printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, + evsel ? perf_evsel__name(evsel) : "FAIL", + event->read.value); + + read_format = evsel->attr.read_format; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + printf("... time enabled : %" PRIu64 "\n", read_event->time_enabled); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + printf("... time running : %" PRIu64 "\n", read_event->time_running); + + if (read_format & PERF_FORMAT_ID) + printf("... id : %" PRIu64 "\n", read_event->id); +} + static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) @@ -1271,6 +1295,7 @@ static int machines__deliver_event(struct machines *machines, evlist->stats.total_lost_samples += event->lost_samples.lost; return tool->lost_samples(tool, event, sample, machine); case PERF_RECORD_READ: + dump_read(evsel, event); return tool->read(tool, event, sample, evsel, machine); case PERF_RECORD_THROTTLE: return tool->throttle(tool, event, sample, machine); From 64eed1deb6d87f4c0efe03297f50367a3689eb56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:33 +0200 Subject: [PATCH 06/18] perf values: Fix thread index bug We are taking wrong index (+1) for first thread, which leaves thread with index 0 unused and uninitialized. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 5de2e15e2eda9..9ac36bf2c4389 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -98,7 +98,7 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, return i; } - i = values->threads + 1; + i = values->threads; values->value[i] = malloc(values->counters_max * sizeof(**values->value)); if (!values->value[i]) { pr_debug("failed to allocate read_values counters array"); @@ -106,7 +106,7 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, } values->pid[i] = pid; values->tid[i] = tid; - values->threads = i; + values->threads = i + 1; return i; } From f4ef3b7c184c4c269f953f226f7158347d007622 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:34 +0200 Subject: [PATCH 07/18] perf values: Fix allocation check Bailing out in case the allocation failed, not the other way round. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 9ac36bf2c4389..2c4af02f08cde 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -131,7 +131,7 @@ static int perf_read_values__enlarge_counters(struct perf_read_values *values) for (i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); - if (value) { + if (!value) { pr_debug("failed to enlarge read_values ->values array"); goto out_free_name; } From a1834fc938344dd3015a1df64ee7f2af70ded147 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:35 +0200 Subject: [PATCH 08/18] perf values: Zero value buffers We need to make sure the array of value pointers are zero initialized, because we use them in realloc later on and uninitialized non zero value will cause allocation error and aborted execution. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 2c4af02f08cde..3b56aeaa8cbb5 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -12,7 +12,7 @@ int perf_read_values_init(struct perf_read_values *values) values->threads_max = 16; values->pid = malloc(values->threads_max * sizeof(*values->pid)); values->tid = malloc(values->threads_max * sizeof(*values->tid)); - values->value = malloc(values->threads_max * sizeof(*values->value)); + values->value = zalloc(values->threads_max * sizeof(*values->value)); if (!values->pid || !values->tid || !values->value) { pr_debug("failed to allocate read_values threads arrays"); goto out_free_pid; @@ -99,7 +99,8 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, } i = values->threads; - values->value[i] = malloc(values->counters_max * sizeof(**values->value)); + + values->value[i] = zalloc(values->counters_max * sizeof(**values->value)); if (!values->value[i]) { pr_debug("failed to allocate read_values counters array"); return -ENOMEM; @@ -130,12 +131,16 @@ static int perf_read_values__enlarge_counters(struct perf_read_values *values) for (i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); + int j; if (!value) { pr_debug("failed to enlarge read_values ->values array"); goto out_free_name; } + for (j = values->counters_max; j < counters_max; j++) + value[j] = 0; + values->value[i] = value; } From 9933183e365f7dd3a79507f1ffb4bcf9433a73ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:36 +0200 Subject: [PATCH 09/18] perf report: Group stat values on global event id There's no big value on displaying counts for every event ID, which is one per every CPU. Rather than that, displaying the whole sum for the event. $ perf record -c 100000 -e cycles:u -s test $ perf report -T Before: # PID TID cycles:u cycles:u cycles:u cycles:u ... [20 more columns of 'cycles:u'] 3339 3339 0 0 0 0 3340 3340 0 0 0 0 3341 3341 0 0 0 0 3342 3342 0 0 0 0 Now: # PID TID cycles:u 3339 3339 19678 3340 3340 18744 3341 3341 17335 3342 3342 26414 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/util/values.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9e4004b08f559..f9dff652dcbdb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -241,7 +241,7 @@ static int process_read_event(struct perf_tool *tool, const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, - event->read.id, + evsel->idx, name, event->read.value); diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 3b56aeaa8cbb5..8a32bb0095e5e 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -192,7 +192,7 @@ int perf_read_values_add_value(struct perf_read_values *values, if (cindex < 0) return cindex; - values->value[tindex][cindex] = value; + values->value[tindex][cindex] += value; return 0; } From ba5d1a48aab56a2677113d071b5b1446877b9a1a Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:37 -0700 Subject: [PATCH 10/18] tools build tests: Don't hardcode gcc name Use $(CC) instead of harcoded gcc binary name. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-2-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/tests/ex/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index c50d5782ad5a9..027d6c8a58a74 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -8,7 +8,7 @@ ex: include $(srctree)/tools/build/Makefile.include ex: ex-in.o libex-in.o - gcc -o $@ $^ + $(CC) -o $@ $^ ex.%: fixdep FORCE make -f $(srctree)/tools/build/Makefile.build dir=. $@ From 39a59f1e3ea541035637432db39158a461f29146 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:38 -0700 Subject: [PATCH 11/18] perf tools: Allow external definition of flex and bison binary names Allow user to define flex and bison binary names by passing FLEX and BISON variables. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a700a079a2181..58924eb0f40b5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -164,8 +164,8 @@ LN = ln -f MKDIR = mkdir FIND = find INSTALL = install -FLEX = flex -BISON = bison +FLEX ?= flex +BISON ?= bison STRIP = strip AWK = awk From 12024aacb0170779cd0b976b06d2e9b1767cf142 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:39 -0700 Subject: [PATCH 12/18] tools lib: Allow external definition of CC, AR and LD Use already defined values for CC, AR and LD when available. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-4-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index eb6e0b36bfc19..2538675731c7a 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -8,9 +8,9 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar -LD = $(CROSS_COMPILE)ld +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory From 3866058ef15b6ae6f4ff48e088428b46bcc43fa1 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:40 -0700 Subject: [PATCH 13/18] perf tools: Robustify detection of clang binary Prior to this patch, make scripts tested for CLANG with ifeq ($(CC), clang), failing to detect CLANG binaries with different names. Fix it by testing for the existence of __clang__ macro in the list of compiler defined macros. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 2 +- tools/lib/subcmd/Makefile | 2 +- tools/perf/Makefile.config | 4 ++-- tools/perf/util/intel-pt-decoder/Build | 2 +- tools/scripts/Makefile.include | 4 +++- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 2538675731c7a..4563ba7ede6f7 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -19,7 +19,7 @@ LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 3d1c3b5b51504..7e9f03c97e4c6 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -21,7 +21,7 @@ LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6a64c6bbd9a50..63f534a0902f2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -148,7 +148,7 @@ ifndef DEBUG endif ifeq ($(DEBUG),0) -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 @@ -184,7 +184,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC), clang) + ifeq ($(CC_NO_CLANG), 1) PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) endif FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 7aca5d6d7e1f0..10e0814bb8d2c 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -25,6 +25,6 @@ $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/in CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -ifneq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 1) CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index ccad8ce925e42..1e8b6116ba3c4 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -39,7 +39,9 @@ EXTRA_WARNINGS += -Wundef EXTRA_WARNINGS += -Wwrite-strings EXTRA_WARNINGS += -Wformat -ifneq ($(CC), clang) +CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) + +ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif From 70ff7c6caa2f2cee4a7621f5cb3b73b0a38327f1 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:42 -0700 Subject: [PATCH 14/18] perf tools: Pass full path of FEATURES_DUMP When building with an external FEATURES_DUMP, bpf complains that features dump file is not found. Fix it by passing full file path. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-7-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 58924eb0f40b5..a5bf3100f812f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -240,7 +240,7 @@ endif ifeq ($(FEATURES_DUMP),) FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP) else -FEATURE_DUMP_EXPORT := $(FEATURES_DUMP) +FEATURE_DUMP_EXPORT := $(realpath $(FEATURES_DUMP)) endif export prefix bindir sharedir sysconfdir DESTDIR From a2105f8a9c38f8298f501cf1cd3218407ecdb1a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Aug 2017 11:26:14 -0300 Subject: [PATCH 15/18] tools headers: Sync cpu features kernel ABI headers with tooling headers These changes made the tools/arch/x86/include/ headers to drift from its kernel origins: 910448bbed06 ("perf/x86/amd/uncore: Rename cpufeatures macro for cache counters") 5442c2699552 ("x86/cpufeature, kvm/svm: Rename (shorten) the new "virtualized VMSAVE/VMLOAD" CPUID flag") cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels") Which was detected while building perf: make: Entering directory '/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j4' parallel build Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' This sync causes just these perf object files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And the changes in the above changesets don't entail any need for change in the above 'perf bench' files. Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Ahern Cc: Janakarajan Natarajan Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-456aafouj911a4x4zwt8stkm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 5 ++--- tools/arch/x86/include/asm/disabled-features.h | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 14f0f29133640..8ea315a11fe0d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -177,7 +177,7 @@ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* @@ -196,7 +196,6 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ @@ -287,7 +286,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index c10c9128f54e6..5dff775af7cd6 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -21,13 +21,11 @@ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) -# define DISABLE_PCID 0 #else # define DISABLE_VME 0 # define DISABLE_K6_MTRR 0 # define DISABLE_CYRIX_ARR 0 # define DISABLE_CENTAUR_MCR 0 -# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -51,7 +49,7 @@ #define DISABLED_MASK1 0 #define DISABLED_MASK2 0 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 (DISABLE_PCID) +#define DISABLED_MASK4 0 #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 0 From 83bc9c371eaf21466f43b12d942b66c3f0d60ae5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Aug 2017 11:47:11 -0300 Subject: [PATCH 16/18] perf trace beauty: Beautify pkey_{alloc,free,mprotect} arguments Reuse 'mprotect' beautifiers for 'pkey_mprotect'. System wide tracing pkey_alloc, pkey_free and pkey_mprotect calls, with backtraces: # perf trace -e pkey_alloc,pkey_mprotect,pkey_free --max-stack=5 0.000 ( 0.011 ms): pkey/7818 pkey_alloc(init_val: DISABLE_ACCESS|DISABLE_WRITE) = -1 EINVAL Invalid argument syscall (/usr/lib64/libc-2.25.so) pkey_alloc (/home/acme/c/pkey) 0.022 ( 0.003 ms): pkey/7818 pkey_mprotect(start: 0x7f28c3890000, len: 4096, prot: READ|WRITE, pkey: -1) = 0 syscall (/usr/lib64/libc-2.25.so) pkey_mprotect (/home/acme/c/pkey) 0.030 ( 0.002 ms): pkey/7818 pkey_free(pkey: -1 ) = -1 EINVAL Invalid argument syscall (/usr/lib64/libc-2.25.so) pkey_free (/home/acme/c/pkey) The tools/include/uapi/asm-generic/mman-common.h file is used to find the access rights defines for the pkey_alloc syscall second argument. Since we have the detector of changes for the tools/include header files versus its kernel origin (include/uapi/asm-generic/mman-common.h), we'll get whatever new flag appears for that argument automatically. This method should be used in other cases where it is easy to generate those flags tables because the header has properly namespaced defines like PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3xq5312qlks7wtfzv2sk3nct@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 12 ++++- tools/perf/builtin-trace.c | 8 +++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 ++ tools/perf/trace/beauty/pkey_alloc.c | 50 +++++++++++++++++++ .../trace/beauty/pkey_alloc_access_rights.sh | 10 ++++ 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/pkey_alloc.c create mode 100755 tools/perf/trace/beauty/pkey_alloc_access_rights.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a5bf3100f812f..91ef44bfaf3e3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -387,7 +387,8 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) -beauty_ioctl_outdir := $(OUTPUT)trace/beauty/generated/ioctl +beauty_outdir := $(OUTPUT)trace/beauty/generated +beauty_ioctl_outdir := $(beauty_outdir)/ioctl drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c drm_hdr_dir := $(srctree)/tools/include/uapi/drm drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh @@ -398,6 +399,13 @@ _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_ou $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ +pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c +asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ +pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh + +$(pkey_alloc_access_rights_array): $(asm_generic_hdr_dir)/mman-common.h $(pkey_alloc_access_rights_tbl) + $(Q)$(SHELL) '$(pkey_alloc_access_rights_tbl)' $(asm_generic_hdr_dir) > $@ + sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -528,6 +536,7 @@ __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ + $(pkey_alloc_access_rights_array) \ $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ $(kvm_ioctl_array) \ @@ -803,6 +812,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(drm_ioctl_array) \ + $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 91905839e386d..d59cdadf3a791 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -693,6 +693,14 @@ static struct syscall_fmt { [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, + { .name = "pkey_alloc", + .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, + { .name = "pkey_free", + .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, + { .name = "pkey_mprotect", + .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, { .name = "pread", .alias = "pread64", }, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 6f3f159f97e0d..175d633c6b491 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -3,4 +3,5 @@ libperf-y += fcntl.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif +libperf-y += pkey_alloc.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 47a36a8eb8422..4b58581a60530 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -78,6 +78,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd +size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights + size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c new file mode 100644 index 0000000000000..2ba784a3734ad --- /dev/null +++ b/tools/perf/trace/beauty/pkey_alloc.c @@ -0,0 +1,50 @@ +/* + * trace/beauty/pkey_alloc.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include + +static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +{ + int i, printed = 0; + +#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" + static DEFINE_STRARRAY(pkey_alloc_access_rights); + + if (access_rights == 0) { + const char *s = strarray__pkey_alloc_access_rights.entries[0]; + if (s) + return scnprintf(bf, size, "%s", s); + return scnprintf(bf, size, "%d", 0); + } + + for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) { + int bit = 1 << (i - 1); + + if (!(access_rights & bit)) + continue; + + if (printed != 0) + printed += scnprintf(bf + printed, size - printed, "|"); + + if (strarray__pkey_alloc_access_rights.entries[i] != NULL) + printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]); + else + printed += scnprintf(bf + printed, size - printed, "0x%#", bit); + } + + return printed; +} + +size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long cmd = arg->val; + + return pkey_alloc__scnprintf_access_rights(cmd, bf, size); +} diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh new file mode 100755 index 0000000000000..62e51a02b8398 --- /dev/null +++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *pkey_alloc_access_rights[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*' +egrep $regex ${header_dir}/mman-common.h | \ + sed -r "s/$regex/\2 \2 \1/g" | \ + sort | xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n" +printf "};\n" From 2c29461e273abaf149cf8220c3403e9d67dd8b61 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Tue, 29 Aug 2017 20:57:23 +0800 Subject: [PATCH 17/18] perf probe: Fix kprobe blacklist checking condition The commit 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new events"), 'perf probe' supports checking the blacklist of the fuctions which can not be probed. But the checking condition is wrong, that the end_addr of the symbol which is the start_addr of the next symbol can't be included. Committer notes: IOW make it match its kernel counterpart in kernel/kprobes.c: bool within_kprobe_blacklist(unsigned long addr) Each entry have as its end address not its end address, but the first address _outside_ that symbol, which for related functions, is the first address of the next symbol, like these from kernel/trace/trace_probe.c: 0xffffffffbd198df0-0xffffffffbd198e40 print_type_u8 0xffffffffbd198e40-0xffffffffbd198e90 print_type_u16 0xffffffffbd198e90-0xffffffffbd198ee0 print_type_u32 0xffffffffbd198ee0-0xffffffffbd198f30 print_type_u64 0xffffffffbd198f30-0xffffffffbd198f80 print_type_s8 0xffffffffbd198f80-0xffffffffbd198fd0 print_type_s16 0xffffffffbd198fd0-0xffffffffbd199020 print_type_s32 0xffffffffbd199020-0xffffffffbd199070 print_type_s64 0xffffffffbd199070-0xffffffffbd1990c0 print_type_x8 0xffffffffbd1990c0-0xffffffffbd199110 print_type_x16 0xffffffffbd199110-0xffffffffbd199160 print_type_x32 0xffffffffbd199160-0xffffffffbd1991b0 print_type_x64 But not always: 0xffffffffbd1997b0-0xffffffffbd1997c0 fetch_kernel_stack_address (kernel/trace/trace_probe.c) 0xffffffffbd1c57f0-0xffffffffbd1c58b0 __context_tracking_enter (kernel/context_tracking.c) Signed-off-by: Li Bin Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Fixes: 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new events") Link: http://lkml.kernel.org/r/1504011443-7269-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d7cd1142f4c6a..b7aaf9b2294d8 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2395,7 +2395,7 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist, struct kprobe_blacklist_node *node; list_for_each_entry(node, blacklist, list) { - if (node->start <= address && address <= node->end) + if (node->start <= address && address < node->end) return node; } From b2f7605076d6cdd68162c42c34caadafbbe4c69f Mon Sep 17 00:00:00 2001 From: Li Bin Date: Mon, 5 Jun 2017 08:34:09 +0800 Subject: [PATCH 18/18] perf symbols: Fix plt entry calculation for ARM and AARCH64 On x86, the plt header size is as same as the plt entry size, and can be identified from shdr's sh_entsize of the plt. But we can't assume that the sh_entsize of the plt shdr is always the plt entry size in all architecture, and the plt header size may be not as same as the plt entry size in some architecure. On ARM, the plt header size is 20 bytes and the plt entry size is 12 bytes (don't consider the FOUR_WORD_PLT case) that refer to the binutils implementation. The plt section is as follows: Disassembly of section .plt: 000004a0 <__cxa_finalize@plt-0x14>: 4a0: e52de004 push {lr} ; (str lr, [sp, #-4]!) 4a4: e59fe004 ldr lr, [pc, #4] ; 4b0 <_init+0x1c> 4a8: e08fe00e add lr, pc, lr 4ac: e5bef008 ldr pc, [lr, #8]! 4b0: 00008424 .word 0x00008424 000004b4 <__cxa_finalize@plt>: 4b4: e28fc600 add ip, pc, #0, 12 4b8: e28cca08 add ip, ip, #8, 20 ; 0x8000 4bc: e5bcf424 ldr pc, [ip, #1060]! ; 0x424 000004c0 : 4c0: e28fc600 add ip, pc, #0, 12 4c4: e28cca08 add ip, ip, #8, 20 ; 0x8000 4c8: e5bcf41c ldr pc, [ip, #1052]! ; 0x41c On AARCH64, the plt header size is 32 bytes and the plt entry size is 16 bytes. The plt section is as follows: Disassembly of section .plt: 0000000000000560 <__cxa_finalize@plt-0x20>: 560: a9bf7bf0 stp x16, x30, [sp,#-16]! 564: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 568: f944be11 ldr x17, [x16,#2424] 56c: 9125e210 add x16, x16, #0x978 570: d61f0220 br x17 574: d503201f nop 578: d503201f nop 57c: d503201f nop 0000000000000580 <__cxa_finalize@plt>: 580: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 584: f944c211 ldr x17, [x16,#2432] 588: 91260210 add x16, x16, #0x980 58c: d61f0220 br x17 0000000000000590 <__gmon_start__@plt>: 590: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 594: f944c611 ldr x17, [x16,#2440] 598: 91262210 add x16, x16, #0x988 59c: d61f0220 br x17 NOTES: In addition to ARM and AARCH64, other architectures, such as s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa also need to consider this issue. Signed-off-by: Li Bin Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: David Tolnay Cc: Hanjun Guo Cc: Hemant Kumar Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Link: http://lkml.kernel.org/r/1496622849-21877-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a70479061fce0..5c39f420111e5 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -259,7 +259,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * { uint32_t nr_rel_entries, idx; GElf_Sym sym; - u64 plt_offset; + u64 plt_offset, plt_header_size, plt_entry_size; GElf_Shdr shdr_plt; struct symbol *f; GElf_Shdr shdr_rel_plt, shdr_dynsym; @@ -326,6 +326,23 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; + switch (ehdr.e_machine) { + case EM_ARM: + plt_header_size = 20; + plt_entry_size = 12; + break; + + case EM_AARCH64: + plt_header_size = 32; + plt_entry_size = 16; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + plt_header_size = shdr_plt.sh_entsize; + plt_entry_size = shdr_plt.sh_entsize; + break; + } + plt_offset += plt_header_size; if (shdr_rel_plt.sh_type == SHT_RELA) { GElf_Rela pos_mem, *pos; @@ -335,7 +352,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * const char *elf_name = NULL; char *demangled = NULL; symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; gelf_getsym(syms, symidx, &sym); elf_name = elf_sym__name(&sym, symstrs); @@ -346,11 +362,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * "%s@plt", elf_name); free(demangled); - f = symbol__new(plt_offset, shdr_plt.sh_entsize, + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, sympltname); if (!f) goto out_elf_end; + plt_offset += plt_entry_size; symbols__insert(&dso->symbols[map->type], f); ++nr; } @@ -361,7 +378,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * const char *elf_name = NULL; char *demangled = NULL; symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; gelf_getsym(syms, symidx, &sym); elf_name = elf_sym__name(&sym, symstrs); @@ -372,11 +388,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * "%s@plt", elf_name); free(demangled); - f = symbol__new(plt_offset, shdr_plt.sh_entsize, + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, sympltname); if (!f) goto out_elf_end; + plt_offset += plt_entry_size; symbols__insert(&dso->symbols[map->type], f); ++nr; }