Skip to content

Commit

Permalink
Merge tag 'perf-tools-fixes-for-v6.0-2022-08-13' of git://git.kernel.…
Browse files Browse the repository at this point in the history
…org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tool updates from Arnaldo Carvalho de Melo:

 - 'perf c2c' now supports ARM64, adjust its output to cope with
   differences with what is in x86_64. Now go find false sharing on
   ARM64 (at least Neoverse) as well!

 - Refactor the JSON processing, making the output more compact and thus
   reducing the size of the resulting perf binary

 - Improvements for 'perf offcpu' profiling, including tracking child
   processes

 - Update Intel JSON metrics and events files for broadwellde,
   broadwellx, cascadelakex, haswellx, icelakex, ivytown, jaketown,
   knightslanding, sapphirerapids, skylakex and snowridgex

 - Add 'perf stat' JSON output and a 'perf test' entry for it

 - Ignore memfd and anonymous mmap events if jitdump present

 - Refactor 'perf test' shell tests allowing subdirs

 - Fix an error handling path in 'parse_perf_probe_command()'

 - Fixes for the guest Intel PT tracing patchkit in the 1st batch of
   this merge window

 - Print debuginfod queries if -v option is used, to explain delays in
   processing when debuginfo servers are enabled to fetch DSOs with
   richer symbol tables

 - Improve error message for 'perf record -p not_existing_pid'

 - Fix openssl and libbpf feature detection

 - Add PMU pai_crypto event description for IBM z16 on 'perf list'

 - Fix typos and duplicated words on comments in various places

* tag 'perf-tools-fixes-for-v6.0-2022-08-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (81 commits)
  perf test: Refactor shell tests allowing subdirs
  perf vendor events: Update events for snowridgex
  perf vendor events: Update events and metrics for skylakex
  perf vendor events: Update metrics for sapphirerapids
  perf vendor events: Update events for knightslanding
  perf vendor events: Update metrics for jaketown
  perf vendor events: Update metrics for ivytown
  perf vendor events: Update events and metrics for icelakex
  perf vendor events: Update events and metrics for haswellx
  perf vendor events: Update events and metrics for cascadelakex
  perf vendor events: Update events and metrics for broadwellx
  perf vendor events: Update metrics for broadwellde
  perf jevents: Fold strings optimization
  perf jevents: Compress the pmu_events_table
  perf metrics: Copy entire pmu_event in find metric
  perf pmu-events: Hide the pmu_events
  perf pmu-events: Don't assume pmu_event is an array
  perf pmu-events: Move test events/metrics to JSON
  perf test: Use full metric resolution
  perf pmu-events: Hide pmu_events_map
  ...
  • Loading branch information
Linus Torvalds committed Aug 14, 2022
2 parents d785610 + 7391db6 commit 96f86ff
Show file tree
Hide file tree
Showing 112 changed files with 95,286 additions and 6,479 deletions.
6 changes: 4 additions & 2 deletions tools/bpf/bpftool/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,11 @@ INSTALL ?= install
RM ?= rm -f

FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled libcap \
FEATURE_TESTS = libbfd libbfd-liberty libbfd-liberty-z \
disassembler-four-args disassembler-init-styled libcap \
clang-bpf-co-re
FEATURE_DISPLAY = libbfd libcap clang-bpf-co-re
FEATURE_DISPLAY = libbfd libbfd-liberty libbfd-liberty-z \
libcap clang-bpf-co-re

check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
Expand Down
14 changes: 9 additions & 5 deletions tools/build/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,16 @@ all: $(FILES)

__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap

__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1

###############################

$(OUTPUT)test-all.bin:
$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
$(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty

$(OUTPUT)test-hello.bin:
$(BUILD)
Expand Down Expand Up @@ -241,16 +243,18 @@ $(OUTPUT)test-libpython.bin:
$(BUILD) $(FLAGS_PYTHON_EMBED)

$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(BUILD_BFD)

$(OUTPUT)test-libbfd-buildid.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(BUILD_BFD) || $(BUILD_BFD) -liberty || $(BUILD_BFD) -liberty -lz

$(OUTPUT)test-disassembler-four-args.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(BUILD_BFD) -lopcodes || $(BUILD_BFD) -lopcodes -liberty || \
$(BUILD_BFD) -lopcodes -liberty -lz

$(OUTPUT)test-disassembler-init-styled.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(BUILD_BFD) -lopcodes || $(BUILD_BFD) -lopcodes -liberty || \
$(BUILD_BFD) -lopcodes -liberty -lz

$(OUTPUT)test-reallocarray.bin:
$(BUILD)
Expand Down
21 changes: 11 additions & 10 deletions tools/build/feature/test-libcrypto.c
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
#include <openssl/evp.h>
#include <openssl/sha.h>
#include <openssl/md5.h>

/*
* The MD5_* API have been deprecated since OpenSSL 3.0, which causes the
* feature test to fail silently. This is a workaround.
*/
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"

int main(void)
{
MD5_CTX context;
EVP_MD_CTX *mdctx;
unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
unsigned char dat[] = "12345";
unsigned int digest_len;

mdctx = EVP_MD_CTX_new();
if (!mdctx)
return 0;

MD5_Init(&context);
MD5_Update(&context, &dat[0], sizeof(dat));
MD5_Final(&md[0], &context);
EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
EVP_DigestUpdate(mdctx, &dat[0], sizeof(dat));
EVP_DigestFinal_ex(mdctx, &md[0], &digest_len);
EVP_MD_CTX_free(mdctx);

SHA1(&dat[0], sizeof(dat), &md[0]);

Expand Down
2 changes: 1 addition & 1 deletion tools/include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1310,7 +1310,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_SHIFT 19

#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
#define PERF_MEM_SNOOPX_SHIFT 38

/* locked instruction */
Expand Down
16 changes: 16 additions & 0 deletions tools/perf/Documentation/guest-files.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
include::guestmount.txt[]

--guestkallsyms=<path>::
Guest OS /proc/kallsyms file copy. perf reads it to get guest
kernel symbols. Users copy it out from guest OS.

--guestmodules=<path>::
Guest OS /proc/modules file copy. perf reads it to get guest
kernel module information. Users copy it out from guest OS.

--guestvmlinux=<path>::
Guest OS kernel vmlinux.

--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
11 changes: 11 additions & 0 deletions tools/perf/Documentation/guestmount.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--guestmount=<path>::
Guest OS root file system mount directory. Users mount guest OS
root directories under <path> by a specific filesystem access method,
typically, sshfs.
For example, start 2 guest OS, one's pid is 8888 and the other's is 9999:
[verse]
$ mkdir \~/guestmount
$ cd \~/guestmount
$ sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
$ sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
$ perf {GMEXAMPLECMD} --guestmount=~/guestmount {GMEXAMPLESUBCMD}
31 changes: 24 additions & 7 deletions tools/perf/Documentation/perf-c2c.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ REPORT OPTIONS

-d::
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
Switch to HITM type (rmt, lcl) or peer snooping type (peer) to display
and sort on. Total HITMs (tot) as default, except Arm64 uses peer mode
as default.

--stitch-lbr::
Show callgraph with stitched LBRs, which may have more complete
Expand Down Expand Up @@ -174,12 +176,18 @@ For each cacheline in the 1) list we display following data:
Cacheline
- cacheline address (hex number)

Rmt/Lcl Hitm
Rmt/Lcl Hitm (Display with HITM types)
- cacheline percentage of all Remote/Local HITM accesses

LLC Load Hitm - Total, LclHitm, RmtHitm
Peer Snoop (Display with peer type)
- cacheline percentage of all peer accesses

LLC Load Hitm - Total, LclHitm, RmtHitm (For display with HITM types)
- count of Total/Local/Remote load HITMs

Load Peer - Total, Local, Remote (For display with peer type)
- count of Total/Local/Remote load from peer cache or DRAM

Total records
- sum of all cachelines accesses

Expand All @@ -201,16 +209,21 @@ For each cacheline in the 1) list we display following data:
- count of LLC load accesses, includes LLC hits and LLC HITMs

RMT Load Hit - RmtHit, RmtHitm
- count of remote load accesses, includes remote hits and remote HITMs
- count of remote load accesses, includes remote hits and remote HITMs;
on Arm neoverse cores, RmtHit is used to account remote accesses,
includes remote DRAM or any upward cache level in remote node

Load Dram - Lcl, Rmt
- count of local and remote DRAM accesses

For each offset in the 2) list we display following data:

HITM - Rmt, Lcl
HITM - Rmt, Lcl (Display with HITM types)
- % of Remote/Local HITM accesses for given offset within cacheline

Peer Snoop - Rmt, Lcl (Display with peer type)
- % of Remote/Local peer accesses for given offset within cacheline

Store Refs - L1 Hit, L1 Miss, N/A
- % of store accesses that hit L1, missed L1 and N/A (no available) memory
level for given offset within cacheline
Expand All @@ -227,9 +240,12 @@ For each offset in the 2) list we display following data:
Code address
- code address responsible for the accesses

cycles - rmt hitm, lcl hitm, load
cycles - rmt hitm, lcl hitm, load (Display with HITM types)
- sum of cycles for given accesses - Remote/Local HITM and generic load

cycles - rmt peer, lcl peer, load (Display with peer type)
- sum of cycles for given accesses - Remote/Local peer load and generic load

cpu cnt
- number of cpus that participated on the access

Expand All @@ -251,7 +267,8 @@ The 'Node' field displays nodes that accesses given cacheline
offset. Its output comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
Node{cpus %hitms %stores} (Display with HITM types)
Node{cpus %peers %stores} (Display with peer type)
- node IDs with list of affected CPUs in following format:
Node{cpu list}

Expand Down
4 changes: 4 additions & 0 deletions tools/perf/Documentation/perf-inject.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ include::itrace.txt[]
should be used, and also --buildid-all and --switch-events may be
useful.

:GMEXAMPLECMD: inject
:GMEXAMPLESUBCMD:
include::guestmount.txt[]

SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],
Expand Down
25 changes: 5 additions & 20 deletions tools/perf/Documentation/perf-kvm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,11 @@ OPTIONS
Collect host side performance profile.
--guest::
Collect guest side performance profile.
--guestmount=<path>::
Guest os root file system mount directory. Users mounts guest os
root directories under <path> by a specific filesystem access method,
typically, sshfs. For example, start 2 guest os. The one's pid is 8888
and the other's is 9999.
#mkdir ~/guestmount; cd ~/guestmount
#sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
#sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
#perf kvm --host --guest --guestmount=~/guestmount top
--guestkallsyms=<path>::
Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
kernel symbols. Users copy it out from guest os.
--guestmodules=<path>::
Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>::
Guest os kernel vmlinux.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.

:GMEXAMPLECMD: kvm --host --guest
:GMEXAMPLESUBCMD: top
include::guest-files.txt[]

-v::
--verbose::
Be more verbose (show counter open errors, etc).
Expand Down
8 changes: 4 additions & 4 deletions tools/perf/Documentation/perf-script.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ OPTIONS
Instruction Trace decoding.

The machine_pid and vcpu fields are derived from data resulting from using
perf insert to insert a perf.data file recorded inside a virtual machine into
perf inject to insert a perf.data file recorded inside a virtual machine into
a perf.data file recorded on the host at the same time.

Finally, a user may not set fields to none for all event types.
Expand Down Expand Up @@ -507,9 +507,9 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.

--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]

SEE ALSO
--------
Expand Down
21 changes: 21 additions & 0 deletions tools/perf/Documentation/perf-stat.txt
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,27 @@ Additional metrics may be printed with all earlier fields being empty.

include::intel-hybrid.txt[]

JSON FORMAT
-----------

With -j, perf stat is able to print out a JSON format output
that can be used for parsing.

- timestamp : optional usec time stamp in fractions of second (with -I)
- optional aggregate options:
- core : core identifier (with --per-core)
- die : die identifier (with --per-die)
- socket : socket identifier (with --per-socket)
- node : node identifier (with --per-node)
- thread : thread identifier (with --per-thread)
- counter-value : counter value
- unit : unit of the counter value or empty
- event : event name
- variance : optional variance if multiple values are collected (with -r)
- runtime : run time of counter
- metric-value : optional metric value
- metric-unit : optional unit of metric

SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-list[1]
13 changes: 3 additions & 10 deletions tools/perf/Makefile.config
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)

FEATURE_CHECK_LDFLAGS-libaio = -lrt

FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl

CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
Expand Down Expand Up @@ -329,8 +326,8 @@ ifneq ($(TCMALLOC),)
endif

ifeq ($(FEATURES_DUMP),)
# We will display at the end of this Makefile.config, using $(call feature_display_entries)
# As we may retry some feature detection here, see the disassembler-four-args case, for instance
# We will display at the end of this Makefile.config, using $(call feature_display_entries),
# as we may retry some feature detection here.
FEATURE_DISPLAY_DEFERRED := 1
include $(srctree)/tools/build/Makefile.feature
else
Expand Down Expand Up @@ -924,13 +921,9 @@ ifndef NO_LIBBFD

ifeq ($(feature-libbfd-liberty), 1)
EXTLIBS += -lbfd -lopcodes -liberty
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl
else
ifeq ($(feature-libbfd-liberty-z), 1)
EXTLIBS += -lbfd -lopcodes -liberty -lz
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl
endif
endif
$(call feature_check,disassembler-four-args)
Expand Down Expand Up @@ -1356,7 +1349,7 @@ endif

# re-generate FEATURE-DUMP as we may have called feature_check, found out
# extra libraries to add to LDFLAGS of some other test and then redo those
# tests, see the block about libbfd, disassembler-four-args, for instance.
# tests.
$(shell rm -f $(FEATURE_DUMP_FILENAME))
$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))

Expand Down
3 changes: 2 additions & 1 deletion tools/perf/Makefile.perf
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,8 @@ install-tests: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'

install-bin: install-tools install-tests install-traceevent-plugins

Expand Down
2 changes: 1 addition & 1 deletion tools/perf/arch/arm/util/cs-etm.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
if (opts->full_auxtrace) {
struct evsel *tracking_evsel;

err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
goto out;

Expand Down
2 changes: 1 addition & 1 deletion tools/perf/arch/arm64/util/arm-spe.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);

/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
return err;

Expand Down
4 changes: 2 additions & 2 deletions tools/perf/arch/arm64/util/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"

const struct pmu_events_map *pmu_events_map__find(void)
const struct pmu_events_table *pmu_events_table__find(void)
{
struct perf_pmu *pmu = NULL;

Expand All @@ -18,7 +18,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;

return perf_pmu__find_map(pmu);
return perf_pmu__find_table(pmu);
}

return NULL;
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/arch/x86/tests/intel-cqm.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, in
return TEST_FAIL;
}

ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
ret = parse_event(evlist, "intel_cqm/llc_occupancy/");
if (ret) {
pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
err = TEST_SKIP;
Expand Down
Loading

0 comments on commit 96f86ff

Please sign in to comment.