From c56cb33b56c13493eeb95612f80e4dd6e35cd109 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 10 Aug 2016 15:52:28 +0200 Subject: [PATCH 01/15] perf callchain: Fixup help/config for no-unwinding Since 841e3558b2d ("perf callchain: Recording 'dwarf' callchains do not need DWARF unwinding support"), --call-graph dwarf is allowed in 'perf record' even without unwind support. A couple of other places don't reflect this yet though: the help text should list dwarf as a valid record mode and the dump_size config should be respected too. Signed-off-by: Rabin Vincent Cc: He Kuang Fixes: 841e3558b2de ("perf callchain: Recording 'dwarf' callchains do not need DWARF unwinding support") Link: http://lkml.kernel.org/r/1470837148-7642-1-git-send-email-rabin.vincent@axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 2 -- tools/perf/util/callchain.h | 4 ---- 2 files changed, 6 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 07fd30bc2f816..ae58b493af454 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value) if (!strcmp(var, "record-mode")) return parse_callchain_record_opt(value, &callchain_param); -#ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; int ret; @@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value) return ret; } -#endif if (!strcmp(var, "print-type")) return parse_callchain_mode(value); if (!strcmp(var, "order")) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 13e75549c4407..47cfd10809755 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,11 +11,7 @@ #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" -#ifdef HAVE_DWARF_UNWIND_SUPPORT # define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" -#else -# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" -#endif #define RECORD_SIZE_HELP \ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording ()\n" \ From 699c12a7cca376d16f75672f9047faa37b184eda Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Nov 2016 11:04:05 -0300 Subject: [PATCH 02/15] perf intel-pt: Update documentation about context switch events Since the unprivileged sched switch event was added in perf, PT doesn't need need perf_event_paranoid=-1 anymore for per cpu decoding. Add a note stating that that is only needed for kernels < 4.2. Reported-by: Andi Kleen Report-Link: http://lkml.kernel.org/r/http://lkml.kernel.org/n/tip-x2ybghpqxxn3zu0m8o7qi42r@git.kernel.org Acked-by: Adrian Hunter Fixes: 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context switches") Link: http://lkml.kernel.org/n/tip-x2ybghpqxxn3zu0m8o7qi42r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index c6c8318e38a2e..b0b3007d3c9c0 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -550,6 +550,18 @@ Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users have memory limits imposed upon them. That affects what buffer sizes they can have as outlined above. +The v4.2 kernel introduced support for a context switch metadata event, +PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes +are scheduled out and in, just not by whom, which is left for the +PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, +which in turn requires CAP_SYS_ADMIN. + +Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context +switches") commit, that introduces these metadata events for further info. + +When working with kernels < v4.2, the following considerations must be taken, +as the sched:sched_switch tracepoints will be used to receive such information: + Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are not permitted to use tracepoints which means there is insufficient side-band information to decode Intel PT in per-cpu mode, and potentially workload-only @@ -564,8 +576,11 @@ sched_switch tracepoint ----------------------- The sched_switch tracepoint is used to provide side-band data for Intel PT -decoding. sched_switch events are automatically added. e.g. the second event -shown below +decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't +available. + +The sched_switch events are automatically added. e.g. the second event shown +below: $ perf record -vv -e intel_pt//u uname ------------------------------------------------------------ From 2ec8107d8e0d1d285b2bbf1999e7f4e46b6b535b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Nov 2016 14:35:47 +0100 Subject: [PATCH 03/15] tools build: Add CFLAGS_REMOVE_* support Adding support to remove options from final CFLAGS for both object file and build target. It's now possible to remove CFLAGS options like: CFLAGS_REMOVE_krava.o += -Wstrict-prototypes Committer notes: This comes from the kernel's kbuild infrastructure, the subset that is supported in tools/ is being documented at tools/build/Documentation/Build.txt. Signed-off-by: Jiri Olsa Tested-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: William Cohen Link: http://lkml.kernel.org/r/1478093749-5602-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Build.include | 4 +++- tools/build/Documentation/Build.txt | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index 1dcb95e76f70f..c4ae12a5d0a52 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -89,7 +89,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ # - per target C flags # - per object C flags # - BUILD_STR macro to allow '-D"$(variable)"' constructs -c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) +c_flags_1 = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) +c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1)) +c_flags = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2)) cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) ### diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index a47bffbae1595..a22587475dbe6 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -135,8 +135,10 @@ CFLAGS It's possible to alter the standard object C flags in the following way: - CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object - CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + CFLAGS_perf.o += '...' - adds CFLAGS for perf.o object + CFLAGS_gtk += '...' - adds CFLAGS for gtk build object + CFLAGS_REMOVE_perf.o += '...' - removes CFLAGS for perf.o object + CFLAGS_REMOVE_gtk += '...' - removes CFLAGS for gtk build object This C flags changes has the scope of the Build makefile they are defined in. From 285932a25879602407f207e862bc5b8416711f42 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Nov 2016 14:35:48 +0100 Subject: [PATCH 04/15] tools build: Add jvmti feature detection support Adding support to detect jvmti support. It is not plugged into the FEATURE_TESTS machinery, because it's quite rare and will be used separately from perf via feature_check call. Signed-off-by: Jiri Olsa Tested-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: William Cohen Link: http://lkml.kernel.org/r/1478093749-5602-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 6 +++++- tools/build/feature/test-jvmti.c | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-jvmti.c diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index ac9c477a2a485..8f668bce8996d 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -47,7 +47,8 @@ FILES= \ test-bpf.bin \ test-get_cpuid.bin \ test-sdt.bin \ - test-cxx.bin + test-cxx.bin \ + test-jvmti.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -225,6 +226,9 @@ $(OUTPUT)test-sdt.bin: $(OUTPUT)test-cxx.bin: $(BUILDXX) -std=gnu++11 +$(OUTPUT)test-jvmti.bin: + $(BUILD) + -include $(OUTPUT)*.d ############################### diff --git a/tools/build/feature/test-jvmti.c b/tools/build/feature/test-jvmti.c new file mode 100644 index 0000000000000..1c665f09b9d65 --- /dev/null +++ b/tools/build/feature/test-jvmti.c @@ -0,0 +1,13 @@ +#include +#include + +int main(void) +{ + JavaVM jvm __attribute__((unused)); + jvmtiEventCallbacks cb __attribute__((unused)); + jvmtiCapabilities caps __attribute__((unused)); + jvmtiJlocationFormat format __attribute__((unused)); + jvmtiEnv jvmti __attribute__((unused)); + + return 0; +} From d4dfdf00d43e017dc57372566ceba0e5e1595eba Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Nov 2016 14:35:49 +0100 Subject: [PATCH 05/15] perf jvmti: Plug compilation into perf build Compile jvmti agent as part of the perf build. The agent library is called libperf-jvmti.so and is installed in default place together with other files: $ make libperf-jvmti.so BUILD: Doing 'make -j4' parallel build ... CC jvmti/libjvmti.o CC jvmti/jvmti_agent.o LD jvmti/jvmti-in.o LINK libperf-jvmti.so $ make DESTDIR=/tmp/krava/ install-bin ... $ find /tmp/krava/ | grep libperf /tmp/krava/lib64/libperf-jvmti.so /tmp/krava/lib64/libperf-gtk.so Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: William Cohen Link: http://lkml.kernel.org/r/1478093749-5602-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 26 ++++++++++++++++++++++++++ tools/perf/Makefile.perf | 24 +++++++++++++++++++++++- tools/perf/jvmti/Build | 8 ++++++++ tools/perf/tests/make | 2 +- 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 tools/perf/jvmti/Build diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cffdd9cf3ebf7..8a493d46fab94 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -758,6 +758,31 @@ ifndef NO_AUXTRACE endif endif +ifndef NO_JVMTI + ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) + JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') + else + ifneq (,$(wildcard /usr/sbin/alternatives)) + JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + endif + endif + ifndef JDIR + $(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory) + NO_JVMTI := 1 + endif +endif + +ifndef NO_JVMTI + FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux + $(call feature_check,jvmti) + ifeq ($(feature-jvmti), 1) + $(call detected_var,JDIR) + else + $(warning No openjdk development package found, please install JDK package) + NO_JVMTI := 1 + endif +endif + # Among the variables below, these: # perfexecdir # template_dir @@ -850,6 +875,7 @@ ifeq ($(VF),1) $(call print_var,sysconfdir) $(call print_var,LIBUNWIND_DIR) $(call print_var,LIBDW_DIR) + $(call print_var,JDIR) ifeq ($(dwarf-post-unwind),1) $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7de14f470f3ca..3cb1df43ad3e9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -86,6 +86,8 @@ include ../scripts/utilities.mak # # Define FEATURES_DUMP to provide features detection dump file # and bypass the feature detection +# +# Define NO_JVMTI if you do not want jvmti agent built # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -283,6 +285,12 @@ ifndef NO_PERF_READ_VDSOX32 PROGRAMS += $(OUTPUT)perf-read-vdsox32 endif +LIBJVMTI = libperf-jvmti.so + +ifndef NO_JVMTI +PROGRAMS += $(OUTPUT)$(LIBJVMTI) +endif + # what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) @@ -551,6 +559,16 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif +ifndef NO_JVMTI +LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o + +$(LIBJVMTI_IN): FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti + +$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN) + $(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt +endif + $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o @@ -687,6 +705,10 @@ endif ifndef NO_PERF_READ_VDSOX32 $(call QUIET_INSTALL, perf-read-vdsox32) \ $(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)'; +endif +ifndef NO_JVMTI + $(call QUIET_INSTALL, $(LIBJVMTI)) \ + $(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)'; endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' @@ -754,7 +776,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected - $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build new file mode 100644 index 0000000000000..eaeb8cb5379bd --- /dev/null +++ b/tools/perf/jvmti/Build @@ -0,0 +1,8 @@ +jvmti-y += libjvmti.o +jvmti-y += jvmti_agent.o + +CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux +CFLAGS_REMOVE_jvmti = -Wmissing-declarations +CFLAGS_REMOVE_jvmti += -Wstrict-prototypes +CFLAGS_REMOVE_jvmti += -Wextra +CFLAGS_REMOVE_jvmti += -Wwrite-strings diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 143f4d549769a..08ed7f12cc37d 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -106,7 +106,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 -make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 +make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 # $(run) contains all available tests run := make_pure From 8c9c3d2f950cca57f5fa9330c4d15d8f0dfda092 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 12 Nov 2016 13:10:16 +0100 Subject: [PATCH 06/15] perf kvmti: Remove unused Makefile file Now when jvmti compilation is plugged into Makefile.perf, there's no need for this makefile. Signed-off-by: Jiri Olsa Acked-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: William Cohen Link: http://lkml.kernel.org/r/20161112121016.GA17194@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Makefile | 89 --------------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 tools/perf/jvmti/Makefile diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile deleted file mode 100644 index df14e6b67b63b..0000000000000 --- a/tools/perf/jvmti/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -ARCH=$(shell uname -m) - -ifeq ($(ARCH), x86_64) -JARCH=amd64 -endif -ifeq ($(ARCH), armv7l) -JARCH=armhf -endif -ifeq ($(ARCH), armv6l) -JARCH=armhf -endif -ifeq ($(ARCH), aarch64) -JARCH=aarch64 -endif -ifeq ($(ARCH), ppc64) -JARCH=powerpc -endif -ifeq ($(ARCH), ppc64le) -JARCH=powerpc -endif - -DESTDIR=/usr/local - -VERSION=1 -REVISION=0 -AGE=0 - -LN=ln -sf -RM=rm - -SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE) -VLIBJVMTI=libjvmti.so.$(VERSION) -SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI) -SOLIBEXT=so - -# The following works at least on fedora 23, you may need the next -# line for other distros. -ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) -JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') -else - ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') - endif -endif -ifndef JDIR -$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory) -else - ifeq (,$(wildcard $(JDIR)/include/jvmti.h)) - $(error the openjdk development package appears to me missing, install and try again) - endif -endif -$(info Using Java from $(JDIR)) -# -lrt required in 32-bit mode for clock_gettime() -LIBS=-lelf -lrt -INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux - -TARGETS=$(SLIBJVMTI) - -SRCS=libjvmti.c jvmti_agent.c -OBJS=$(SRCS:.c=.o) -SOBJS=$(OBJS:.o=.lo) -OPT=-O2 -g -Werror -Wall - -CFLAGS=$(INCDIR) $(OPT) - -all: $(TARGETS) - -.c.o: - $(CC) $(CFLAGS) -c $*.c -.c.lo: - $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo - -$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h - -$(SLIBJVMTI): $(SOBJS) - $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS) - $(LN) $@ libjvmti.$(SOLIBEXT) - -clean: - $(RM) -f *.o *.so.* *.so *.lo - -install: - -mkdir -p $(DESTDIR)/lib - install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/ - (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI)) - (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT)) - ldconfig - -.SUFFIXES: .c .S .o .lo From 909236083ee58399b371d085fef5cfac9bce3ec8 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 4 Nov 2016 15:44:17 +0900 Subject: [PATCH 07/15] perf config: Add support for getting config key-value pairs Add a functionality getting specific config key-value pairs. For the syntax examples, perf config [] [section.name ...] e.g. To query config items 'report.queue-size' and 'report.children', do # perf config report.queue-size report.children Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Nambong Ha Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Wookje Kwon Link: http://lkml.kernel.org/r/1478241862-31230-2-git-send-email-treeze.taeung@gmail.com [ Combined patch with docs update with this one ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 18 +++++++++++ tools/perf/builtin-config.c | 40 ++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index cb081ac59fd11..1714b0c8c8e10 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -8,6 +8,8 @@ perf-config - Get and set variables in a configuration file. SYNOPSIS -------- [verse] +'perf config' [] [section.name ...] +or 'perf config' [] -l | --list DESCRIPTION @@ -118,6 +120,22 @@ Given a $HOME/.perfconfig like this: children = true group = true +To query the record mode of call graph, do + + % perf config call-graph.record-mode + +If you want to know multiple config key/value pairs, you can do like + + % perf config report.queue-size call-graph.order report.children + +To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do + + % perf config --user call-graph.sort-order + +To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do + + % perf config --system buildid.dir + Variables ~~~~~~~~~ diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index e4207a23b52c0..df3fa1c18e551 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -17,7 +17,7 @@ static bool use_system_config, use_user_config; static const char * const config_usage[] = { - "perf config [] [options]", + "perf config [] [options] [section.name ...]", NULL }; @@ -33,6 +33,36 @@ static struct option config_options[] = { OPT_END() }; +static int show_spec_config(struct perf_config_set *set, const char *var) +{ + struct perf_config_section *section; + struct perf_config_item *item; + + if (set == NULL) + return -1; + + perf_config_items__for_each_entry(&set->sections, section) { + if (prefixcmp(var, section->name) != 0) + continue; + + perf_config_items__for_each_entry(§ion->items, item) { + const char *name = var + strlen(section->name) + 1; + + if (strcmp(name, item->name) == 0) { + char *value = item->value; + + if (value) { + printf("%s=%s\n", var, value); + return 0; + } + } + + } + } + + return 0; +} + static int show_config(struct perf_config_set *set) { struct perf_config_section *section; @@ -54,7 +84,7 @@ static int show_config(struct perf_config_set *set) int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { - int ret = 0; + int i, ret = 0; struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); @@ -100,7 +130,11 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) } break; default: - usage_with_options(config_usage, config_options); + if (argc) + for (i = 0; argv[i]; i++) + ret = show_spec_config(set, argv[i]); + else + usage_with_options(config_usage, config_options); } perf_config_set__delete(set); From 36662794bb520be828df8e2f3404264f5e7a7973 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 4 Nov 2016 15:44:19 +0900 Subject: [PATCH 08/15] perf config: Validate config variable arguments before trying use them You can show the values for several config items as below: # perf config report.queue-size call-graph.record-mode but it is necessary to more precisely check arguments, before passing them to show_spec_config(). This validation function would be also used when parsing config key-value pairs arguments in the near future. Committer notes: Testing it: $ perf config bla. The config variable does not contain a variable name: bla. $ perf config .bla The config variable does not contain a section name: .bla $ perf config bla.bla $ Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Nambong Ha Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Wookje Kwon Link: http://lkml.kernel.org/r/1478241862-31230-4-git-send-email-treeze.taeung@gmail.com [ Fix some spelling errors ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 45 +++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index df3fa1c18e551..88a43fe4963c0 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -82,6 +82,27 @@ static int show_config(struct perf_config_set *set) return 0; } +static int parse_config_arg(char *arg, char **var) +{ + const char *last_dot = strchr(arg, '.'); + + /* + * Since "var" actually contains the section name and the real + * config variable name separated by a dot, we have to know where the dot is. + */ + if (last_dot == NULL || last_dot == arg) { + pr_err("The config variable does not contain a section name: %s\n", arg); + return -1; + } + if (!last_dot[1]) { + pr_err("The config variable does not contain a variable name: %s\n", arg); + return -1; + } + + *var = arg; + return 0; +} + int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { int i, ret = 0; @@ -130,10 +151,26 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) } break; default: - if (argc) - for (i = 0; argv[i]; i++) - ret = show_spec_config(set, argv[i]); - else + if (argc) { + for (i = 0; argv[i]; i++) { + char *var, *arg = strdup(argv[i]); + + if (!arg) { + pr_err("%s: strdup failed\n", __func__); + ret = -1; + break; + } + + if (parse_config_arg(arg, &var) < 0) { + free(arg); + ret = -1; + break; + } + + ret = show_spec_config(set, var); + free(arg); + } + } else usage_with_options(config_usage, config_options); } From c6fc018a7a64c2c3ea56529fd8d0ca0f43408b0f Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 4 Nov 2016 15:44:20 +0900 Subject: [PATCH 09/15] perf config: Add support setting variables in a config file Add setting feature that can add config variables with their values to a config file (i.e. user or system config file) or modify config key-value pairs in a config file. For the syntax examples: perf config [] [section.name[=value] ...] e.g. You can set the ui.show-headers to false with # perf config ui.show-headers=false If you want to add or modify several config items, you can do like # perf config annotate.show_nr_jumps=false kmem.default=slab Committer notes: Testing it: $ perf config -l top.children=true report.children=false $ $ perf config top.children=false $ perf config -l top.children=false report.children=false $ $ perf config kmem.default=slab $ perf config -l top.children=false report.children=false kmem.default=slab $ Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Nambong Ha Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Wookje Kwon Link: http://lkml.kernel.org/r/1478241862-31230-5-git-send-email-treeze.taeung@gmail.com [ Combined patch with docs update with this one ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 19 ++++++- tools/perf/builtin-config.c | 68 +++++++++++++++++++++--- tools/perf/util/config.c | 6 +++ tools/perf/util/config.h | 2 + 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 1714b0c8c8e10..9365b75fd04fd 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -8,7 +8,7 @@ perf-config - Get and set variables in a configuration file. SYNOPSIS -------- [verse] -'perf config' [] [section.name ...] +'perf config' [] [section.name[=value] ...] or 'perf config' [] -l | --list @@ -120,6 +120,23 @@ Given a $HOME/.perfconfig like this: children = true group = true +You can hide source code of annotate feature setting the config to false with + + % perf config annotate.hide_src_code=true + +If you want to add or modify several config items, you can do like + + % perf config ui.show-headers=false kmem.default=slab + +To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do + + % perf config --user report sort-order=srcline + +To change colors of selected line to other foreground and background colors +in system config file (i.e. `$(sysconf)/perfconfig`), do + + % perf config --system colors.selected=yellow,green + To query the record mode of call graph, do % perf config call-graph.record-mode diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 88a43fe4963c0..7c861b54f3a64 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -17,7 +17,7 @@ static bool use_system_config, use_user_config; static const char * const config_usage[] = { - "perf config [] [options] [section.name ...]", + "perf config [] [options] [section.name[=value] ...]", NULL }; @@ -33,6 +33,39 @@ static struct option config_options[] = { OPT_END() }; +static int set_config(struct perf_config_set *set, const char *file_name, + const char *var, const char *value) +{ + struct perf_config_section *section = NULL; + struct perf_config_item *item = NULL; + const char *first_line = "# this file is auto-generated."; + FILE *fp; + + if (set == NULL) + return -1; + + fp = fopen(file_name, "w"); + if (!fp) + return -1; + + perf_config_set__collect(set, var, value); + fprintf(fp, "%s\n", first_line); + + /* overwrite configvariables */ + perf_config_items__for_each_entry(&set->sections, section) { + fprintf(fp, "[%s]\n", section->name); + + perf_config_items__for_each_entry(§ion->items, item) { + if (item->value) + fprintf(fp, "\t%s = %s\n", + item->name, item->value); + } + } + fclose(fp); + + return 0; +} + static int show_spec_config(struct perf_config_set *set, const char *var) { struct perf_config_section *section; @@ -82,7 +115,7 @@ static int show_config(struct perf_config_set *set) return 0; } -static int parse_config_arg(char *arg, char **var) +static int parse_config_arg(char *arg, char **var, char **value) { const char *last_dot = strchr(arg, '.'); @@ -99,7 +132,21 @@ static int parse_config_arg(char *arg, char **var) return -1; } - *var = arg; + *value = strchr(arg, '='); + if (*value == NULL) + *var = arg; + else if (!strcmp(*value, "=")) { + pr_err("The config variable does not contain a value: %s\n", arg); + return -1; + } else { + *value = *value + 1; /* excluding a first character '=' */ + *var = strsep(&arg, "="); + if (*var[0] == '\0') { + pr_err("invalid config variable: %s\n", arg); + return -1; + } + } + return 0; } @@ -153,7 +200,8 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) default: if (argc) { for (i = 0; argv[i]; i++) { - char *var, *arg = strdup(argv[i]); + char *var, *value; + char *arg = strdup(argv[i]); if (!arg) { pr_err("%s: strdup failed\n", __func__); @@ -161,13 +209,21 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) break; } - if (parse_config_arg(arg, &var) < 0) { + if (parse_config_arg(arg, &var, &value) < 0) { free(arg); ret = -1; break; } - ret = show_spec_config(set, var); + if (value == NULL) + ret = show_spec_config(set, var); + else { + const char *config_filename = config_exclusive_filename; + + if (!config_exclusive_filename) + config_filename = user_config; + ret = set_config(set, config_filename, var, value); + } free(arg); } } else diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 18dae745034f7..c8fb65d923cb2 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -602,6 +602,12 @@ static int collect_config(const char *var, const char *value, return -1; } +int perf_config_set__collect(struct perf_config_set *set, + const char *var, const char *value) +{ + return collect_config(var, value, set); +} + static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 6f813d46045e1..0fcdb8c594b09 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -33,6 +33,8 @@ const char *perf_etc_perfconfig(void); struct perf_config_set *perf_config_set__new(void); void perf_config_set__delete(struct perf_config_set *set); +int perf_config_set__collect(struct perf_config_set *set, + const char *var, const char *value); void perf_config__init(void); void perf_config__exit(void); void perf_config__refresh(void); From 08d090cfed8cc2ce5821ddb2b91118979e511019 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 4 Nov 2016 15:44:22 +0900 Subject: [PATCH 10/15] perf config: Mark where are config items from (user or system) To write config items to a particular config file, we should know where is each config section and item from. Current setting functionality of perf-config use autogenerating way by overwriting collected config items to a config file. For example, when collecting config items from user and system config files (i.e. ~/.perfconfig and $(sysconf)/perfconfig), perf_config_set can contain both user and system config items. So we should know where each value is from to avoid merging user and system config items on user config file. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Nambong Ha Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Wookje Kwon Link: http://lkml.kernel.org/r/1478241862-31230-7-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 6 +++++- tools/perf/util/config.c | 16 +++++++++++++++- tools/perf/util/config.h | 4 +++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 7c861b54f3a64..8c0d93b7c2f03 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -48,14 +48,18 @@ static int set_config(struct perf_config_set *set, const char *file_name, if (!fp) return -1; - perf_config_set__collect(set, var, value); + perf_config_set__collect(set, file_name, var, value); fprintf(fp, "%s\n", first_line); /* overwrite configvariables */ perf_config_items__for_each_entry(&set->sections, section) { + if (!use_system_config && section->from_system_config) + continue; fprintf(fp, "[%s]\n", section->name); perf_config_items__for_each_entry(§ion->items, item) { + if (!use_system_config && section->from_system_config) + continue; if (item->value) fprintf(fp, "\t%s = %s\n", item->name, item->value); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index c8fb65d923cb2..3d906dbbef74b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -594,6 +594,19 @@ static int collect_config(const char *var, const char *value, goto out_free; } + /* perf_config_set can contain both user and system config items. + * So we should know where each value is from. + * The classification would be needed when a particular config file + * is overwrited by setting feature i.e. set_config(). + */ + if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { + section->from_system_config = true; + item->from_system_config = true; + } else { + section->from_system_config = false; + item->from_system_config = false; + } + ret = set_value(item, value); return ret; @@ -602,9 +615,10 @@ static int collect_config(const char *var, const char *value, return -1; } -int perf_config_set__collect(struct perf_config_set *set, +int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value) { + config_file_name = file_name; return collect_config(var, value, set); } diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 0fcdb8c594b09..1a59a6b43f8bd 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -7,12 +7,14 @@ struct perf_config_item { char *name; char *value; + bool from_system_config; struct list_head node; }; struct perf_config_section { char *name; struct list_head items; + bool from_system_config; struct list_head node; }; @@ -33,7 +35,7 @@ const char *perf_etc_perfconfig(void); struct perf_config_set *perf_config_set__new(void); void perf_config_set__delete(struct perf_config_set *set); -int perf_config_set__collect(struct perf_config_set *set, +int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); void perf_config__init(void); void perf_config__exit(void); From 410024dbbcb1df5b8140a812b4f1a4dbd62ef924 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 31 Oct 2016 09:19:49 +0800 Subject: [PATCH 11/15] perf report: Add branch flag to callchain cursor node Since the branch ip has been added to call stack for easier browsing, this patch adds more branch information. For example, add a flag to indicate if this ip is a branch, and also add with the branch flag. Then we can know if the cursor node represents a branch and know what the branch flag it has. The branch history code has a loop detection pass that removes loops. It would be nice for knowing how many loops were removed then in next steps, we can compute out the average number of iterations. For example: Before remove_loops(), entry0: from = 0x100, to = 0x200 entry1: from = 0x300, to = 0x250 entry2: from = 0x300, to = 0x250 entry3: from = 0x300, to = 0x250 entry4: from = 0x700, to = 0x800 After remove_loops() entry0: from = 0x100, to = 0x200 entry1: from = 0x300, to = 0x250 entry2: from = 0x700, to = 0x800 The original entry2 and entry3 are removed. So the number of iterations (from = 0x300, to = 0x250) is equal to removed number + 1 (2 + 1). iterations = removed number + 1; average iteractions = Sum(iteractions) / number of samples This formula ignores other cases, for example, iterations cross multiple buffers and one buffer contains 2+ loops. Because in practice, it's good enough. Signed-off-by: Yao Jin Acked-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Linux-kernel@vger.kernel.org Cc: Yao Jin Link: http://lkml.kernel.org/n/1477876794-30749-2-git-send-email-yao.jin@linux.intel.com [ Renamed 'iter' to 'nr_loop_iter' for clarity ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 14 ++++++- tools/perf/util/callchain.h | 8 +++- tools/perf/util/machine.c | 82 ++++++++++++++++++++++++++++++------- 3 files changed, 86 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index ae58b493af454..138a415fad0d9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -728,7 +728,8 @@ merge_chain_branch(struct callchain_cursor *cursor, list_for_each_entry_safe(list, next_list, &src->val, list) { callchain_cursor_append(cursor, list->ip, - list->ms.map, list->ms.sym); + list->ms.map, list->ms.sym, + false, NULL, 0, 0); list_del(&list->list); free(list); } @@ -765,7 +766,9 @@ int callchain_merge(struct callchain_cursor *cursor, } int callchain_cursor_append(struct callchain_cursor *cursor, - u64 ip, struct map *map, struct symbol *sym) + u64 ip, struct map *map, struct symbol *sym, + bool branch, struct branch_flags *flags, + int nr_loop_iter, int samples) { struct callchain_cursor_node *node = *cursor->last; @@ -780,6 +783,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->ip = ip; node->map = map; node->sym = sym; + node->branch = branch; + node->nr_loop_iter = nr_loop_iter; + node->samples = samples; + + if (flags) + memcpy(&node->branch_flags, flags, + sizeof(struct branch_flags)); cursor->nr++; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 47cfd10809755..df6329d1c3503 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -125,6 +125,10 @@ struct callchain_cursor_node { u64 ip; struct map *map; struct symbol *sym; + bool branch; + struct branch_flags branch_flags; + int nr_loop_iter; + int samples; struct callchain_cursor_node *next; }; @@ -179,7 +183,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, - struct map *map, struct symbol *sym); + struct map *map, struct symbol *sym, + bool branch, struct branch_flags *flags, + int nr_loop_iter, int samples); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df85b9efd80f4..9b33bef545818 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u8 *cpumode, - u64 ip) + u64 ip, + bool branch, + struct branch_flags *flags, + int nr_loop_iter, + int samples) { struct addr_location al; @@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread, if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; - return callchain_cursor_append(cursor, al.addr, al.map, al.sym); + return callchain_cursor_append(cursor, al.addr, al.map, al.sym, + branch, flags, nr_loop_iter, samples); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; - int lbr_nr = lbr_stack->nr, j; + int lbr_nr = lbr_stack->nr, j, k; + bool branch; + struct branch_flags *flags; /* * LBR callstack can only get user call chain. * The mix_chain_nr is kernel call chain @@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread, for (j = 0; j < mix_chain_nr; j++) { int err; + branch = false; + flags = NULL; + if (callchain_param.order == ORDER_CALLEE) { if (j < i + 1) ip = chain->ips[j]; - else if (j > i + 1) - ip = lbr_stack->entries[j - i - 2].from; - else + else if (j > i + 1) { + k = j - i - 2; + ip = lbr_stack->entries[k].from; + branch = true; + flags = &lbr_stack->entries[k].flags; + } else { ip = lbr_stack->entries[0].to; + branch = true; + flags = &lbr_stack->entries[0].flags; + } } else { - if (j < lbr_nr) - ip = lbr_stack->entries[lbr_nr - j - 1].from; + if (j < lbr_nr) { + k = lbr_nr - j - 1; + ip = lbr_stack->entries[k].from; + branch = true; + flags = &lbr_stack->entries[k].flags; + } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; - else + else { ip = lbr_stack->entries[0].to; + branch = true; + flags = &lbr_stack->entries[0].flags; + } } - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + branch, flags, 0, 0); if (err) return (err < 0) ? err : 0; } @@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; + int nr_loop_iter; if (perf_evsel__has_branch_callstack(evsel)) { err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, @@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i] = branch->entries[branch->nr - i - 1]; } + nr_loop_iter = nr; nr = remove_loops(be, nr); + /* + * Get the number of iterations. + * It's only approximation, but good enough in practice. + */ + if (nr_loop_iter > nr) + nr_loop_iter = nr_loop_iter - nr + 1; + else + nr_loop_iter = 0; + for (i = 0; i < nr; i++) { - err = add_callchain_ip(thread, cursor, parent, root_al, - NULL, be[i].to); + if (i == nr - 1) + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + nr_loop_iter, 1); + else + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + 0, 0); + if (!err) err = add_callchain_ip(thread, cursor, parent, root_al, - NULL, be[i].from); + NULL, be[i].from, + true, &be[i].flags, + 0, 0); if (err == -EINVAL) break; if (err) @@ -1903,7 +1952,9 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (ip < PERF_CONTEXT_MAX) ++nr_entries; - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + false, NULL, 0, 0); if (err) return (err < 0) ? err : 0; @@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; return callchain_cursor_append(cursor, entry->ip, - entry->map, entry->sym); + entry->map, entry->sym, + false, NULL, 0, 0); } static int thread__resolve_callchain_unwind(struct thread *thread, From f9a7be7c024319423623f58f5233234cad714e6b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 31 Oct 2016 09:19:50 +0800 Subject: [PATCH 12/15] perf report: Create a symbol_conf flag for showing branch flag counting Create a new flag show_branchflag_count in symbol_conf. The flag is used to control if showing the branch flag counting information. The flag depends on if the perf.data has branch data and if user chooses the "branch-history" option in perf report command line. Signed-off-by: Yao Jin Acked-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Linux-kernel@vger.kernel.org Cc: Yao Jin Link: http://lkml.kernel.org/r/1477876794-30749-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ tools/perf/util/symbol.h | 1 + 2 files changed, 4 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8064de8ceedc4..3dfbfffe2ecdb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -911,6 +911,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (itrace_synth_opts.last_branch) has_br_stack = true; + if (has_br_stack && branch_call_mode) + symbol_conf.show_branchflag_count = true; + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d964844eb3141..2d0a905c879af 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -100,6 +100,7 @@ struct symbol_conf { show_total_period, use_callchain, cumulate_callchain, + show_branchflag_count, exclude_other, show_cpu_utilization, initialized, From 3dd029ef94018dfa499c05778dd67d03c00b637c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 31 Oct 2016 09:19:51 +0800 Subject: [PATCH 13/15] perf report: Calculate and return the branch flag counting Create some branch counters in per callchain list entry. Each counter is for a branch flag. For example, predicted_count counts all the *predicted* branches. The counters get updated by processing the callchain cursor nodes. It also provides functions to retrieve or print the values of counters in callchain list. Besides the counting for branch flags, it also counts and returns the average number of iterations. Signed-off-by: Yao Jin Acked-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Linux-kernel@vger.kernel.org Cc: Yao Jin Link: http://lkml.kernel.org/r/1477876794-30749-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 189 +++++++++++++++++++++++++++++++++++- tools/perf/util/callchain.h | 14 +++ 2 files changed, 202 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 138a415fad0d9..823befd8209a4 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -438,6 +438,21 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; call->ms.map = cursor_node->map; + + if (cursor_node->branch) { + call->branch_count = 1; + + if (cursor_node->branch_flags.predicted) + call->predicted_count = 1; + + if (cursor_node->branch_flags.abort) + call->abort_count = 1; + + call->cycles_count = cursor_node->branch_flags.cycles; + call->iter_count = cursor_node->nr_loop_iter; + call->samples_count = cursor_node->samples; + } + list_add_tail(&call->list, &node->val); callchain_cursor_advance(cursor); @@ -497,8 +512,23 @@ static enum match_result match_chain(struct callchain_cursor_node *node, right = node->ip; } - if (left == right) + if (left == right) { + if (node->branch) { + cnode->branch_count++; + + if (node->branch_flags.predicted) + cnode->predicted_count++; + + if (node->branch_flags.abort) + cnode->abort_count++; + + cnode->cycles_count += node->branch_flags.cycles; + cnode->iter_count += node->nr_loop_iter; + cnode->samples_count += node->samples; + } + return MATCH_EQ; + } return left > right ? MATCH_GT : MATCH_LT; } @@ -947,6 +977,163 @@ int callchain_node__fprintf_value(struct callchain_node *node, return 0; } +static void callchain_counts_value(struct callchain_node *node, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count) +{ + struct callchain_list *clist; + + list_for_each_entry(clist, &node->val, list) { + if (branch_count) + *branch_count += clist->branch_count; + + if (predicted_count) + *predicted_count += clist->predicted_count; + + if (abort_count) + *abort_count += clist->abort_count; + + if (cycles_count) + *cycles_count += clist->cycles_count; + } +} + +static int callchain_node_branch_counts_cumul(struct callchain_node *node, + u64 *branch_count, + u64 *predicted_count, + u64 *abort_count, + u64 *cycles_count) +{ + struct callchain_node *child; + struct rb_node *n; + + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + + callchain_node_branch_counts_cumul(child, branch_count, + predicted_count, + abort_count, + cycles_count); + + callchain_counts_value(child, branch_count, + predicted_count, abort_count, + cycles_count); + } + + return 0; +} + +int callchain_branch_counts(struct callchain_root *root, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count) +{ + if (branch_count) + *branch_count = 0; + + if (predicted_count) + *predicted_count = 0; + + if (abort_count) + *abort_count = 0; + + if (cycles_count) + *cycles_count = 0; + + return callchain_node_branch_counts_cumul(&root->node, + branch_count, + predicted_count, + abort_count, + cycles_count); +} + +static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) +{ + double predicted_percent = 0.0; + const char *null_str = ""; + char iter_str[32]; + char *str; + u64 cycles = 0; + + if (branch_count == 0) { + if (fp) + return fprintf(fp, " (calltrace)"); + + return scnprintf(bf, bfsize, " (calltrace)"); + } + + if (iter_count && samples_count) { + scnprintf(iter_str, sizeof(iter_str), + ", iterations:%" PRId64 "", + iter_count / samples_count); + str = iter_str; + } else + str = (char *)null_str; + + predicted_percent = predicted_count * 100.0 / branch_count; + cycles = cycles_count / branch_count; + + if ((predicted_percent >= 100.0) && (abort_count == 0)) { + if (fp) + return fprintf(fp, " (cycles:%" PRId64 "%s)", + cycles, str); + + return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)", + cycles, str); + } + + if ((predicted_percent < 100.0) && (abort_count == 0)) { + if (fp) + return fprintf(fp, + " (predicted:%.1f%%, cycles:%" PRId64 "%s)", + predicted_percent, cycles, str); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%%, cycles:%" PRId64 "%s)", + predicted_percent, cycles, str); + } + + if (fp) + return fprintf(fp, + " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", + predicted_percent, abort_count, cycles, str); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", + predicted_percent, abort_count, cycles, str); +} + +int callchain_list_counts__printf_value(struct callchain_node *node, + struct callchain_list *clist, + FILE *fp, char *bf, int bfsize) +{ + u64 branch_count, predicted_count; + u64 abort_count, cycles_count; + u64 iter_count = 0, samples_count = 0; + + branch_count = clist->branch_count; + predicted_count = clist->predicted_count; + abort_count = clist->abort_count; + cycles_count = clist->cycles_count; + + if (node) { + struct callchain_list *call; + + list_for_each_entry(call, &node->val, list) { + iter_count += call->iter_count; + samples_count += call->samples_count; + } + } + + return callchain_counts_printf(fp, bf, bfsize, branch_count, + predicted_count, abort_count, + cycles_count, iter_count, samples_count); +} + static void free_callchain_node(struct callchain_node *node) { struct callchain_list *list, *tmp; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index df6329d1c3503..d9c70dccf06ad 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -111,6 +111,12 @@ struct callchain_list { bool unfolded; bool has_children; }; + u64 branch_count; + u64 predicted_count; + u64 abort_count; + u64 cycles_count; + u64 iter_count; + u64 samples_count; char *srcline; struct list_head list; }; @@ -263,8 +269,16 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, int callchain_node__fprintf_value(struct callchain_node *node, FILE *fp, u64 total); +int callchain_list_counts__printf_value(struct callchain_node *node, + struct callchain_list *clist, + FILE *fp, char *bf, int bfsize); + void free_callchain(struct callchain_root *root); void decay_callchain(struct callchain_root *root); int callchain_node__make_parent_list(struct callchain_node *node); +int callchain_branch_counts(struct callchain_root *root, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count); + #endif /* __PERF_CALLCHAIN_H */ From 8577ae6b040022ed3ecd11dc395df7af59cce503 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 31 Oct 2016 09:19:52 +0800 Subject: [PATCH 14/15] perf report: Show branch info in callchain entry for stdio mode If the branch is 100% predicted then the "predicted" is hidden. Similarly, if there is no branch tsx abort, the "abort" is hidden. There is only cycles shown (cycle is supported on skylake platform, older platform would be 0). If no iterations, the "iterations" is hidden. For example: |--29.93%--main div.c:39 (predicted:50.6%, cycles:1, iterations:18) | main div.c:44 (predicted:50.6%, cycles:1) | | | --22.69%--main div.c:42 (cycles:2, iterations:17) | compute_flag div.c:28 (cycles:2) | | | --10.52%--compute_flag div.c:27 (cycles:1) | rand rand.c:28 (cycles:1) | rand rand.c:28 (cycles:1) | __random random.c:298 (cycles:1) | __random random.c:297 (cycles:1) | __random random.c:295 (cycles:1) | __random random.c:295 (cycles:1) | __random random.c:295 (cycles:1) | __random random.c:295 (cycles:6) Signed-off-by: Yao Jin Acked-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Linux-kernel@vger.kernel.org Cc: Yao Jin Link: http://lkml.kernel.org/r/1477876794-30749-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 89d8441f98905..668f4aecf2e6d 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -41,7 +41,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, { int i; size_t ret = 0; - char bf[1024]; + char bf[1024], *alloc_str = NULL; + char buf[64]; + const char *str; ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) { @@ -56,8 +58,26 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, } else ret += fprintf(fp, "%s", " "); } - fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp); + + str = callchain_list__sym_name(chain, bf, sizeof(bf), false); + + if (symbol_conf.show_branchflag_count) { + if (!period) + callchain_list_counts__printf_value(node, chain, NULL, + buf, sizeof(buf)); + else + callchain_list_counts__printf_value(NULL, chain, NULL, + buf, sizeof(buf)); + + if (asprintf(&alloc_str, "%s%s", str, buf) < 0) + str = "Not enough memory!"; + else + str = alloc_str; + } + + fputs(str, fp); fputc('\n', fp); + free(alloc_str); return ret; } @@ -219,8 +239,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, } else ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf), - false)); + ret += fprintf(fp, "%s", + callchain_list__sym_name(chain, bf, + sizeof(bf), + false)); + + if (symbol_conf.show_branchflag_count) + ret += callchain_list_counts__printf_value( + NULL, chain, fp, NULL, 0); + ret += fprintf(fp, "\n"); if (++entries_printed == callchain_param.print_limit) break; From fef51ecd1056b5e090c9fb73e0833bd751389572 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 31 Oct 2016 09:19:53 +0800 Subject: [PATCH 15/15] perf report: Show branch info in callchain entry for browser mode If the branch is 100% predicted then the "predicted" is hidden. Similarly, if there is no branch tsx abort, the "abort" is hidden. There is only cycles shown (cycle is supported on skylake platform, older platform would be 0). If no iterations, the "iterations" is hidden. Signed-off-by: Yao Jin Acked-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Linux-kernel@vger.kernel.org Cc: Yao Jin Link: http://lkml.kernel.org/r/1477876794-30749-6-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 84f5dd2fb59cb..66676cb8effe8 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -738,6 +738,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, struct callchain_print_arg *arg) { char bf[1024], *alloc_str; + char buf[64], *alloc_str2; const char *str; if (arg->row_offset != 0) { @@ -746,12 +747,26 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, } alloc_str = NULL; + alloc_str2 = NULL; + str = callchain_list__sym_name(chain, bf, sizeof(bf), browser->show_dso); - if (need_percent) { - char buf[64]; + if (symbol_conf.show_branchflag_count) { + if (need_percent) + callchain_list_counts__printf_value(node, chain, NULL, + buf, sizeof(buf)); + else + callchain_list_counts__printf_value(NULL, chain, NULL, + buf, sizeof(buf)); + + if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) + str = "Not enough memory!"; + else + str = alloc_str2; + } + if (need_percent) { callchain_node__scnprintf_value(node, buf, sizeof(buf), total); @@ -764,6 +779,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, print(browser, chain, str, offset, row, arg); free(alloc_str); + free(alloc_str2); return 1; }