diff --git a/Documentation/admin-guide/perf/alibaba_pmu.rst b/Documentation/admin-guide/perf/alibaba_pmu.rst index 11de998bb4806..7d840023903f5 100644 --- a/Documentation/admin-guide/perf/alibaba_pmu.rst +++ b/Documentation/admin-guide/perf/alibaba_pmu.rst @@ -88,6 +88,11 @@ data bandwidth:: -e ali_drw_27080/hif_rmw/ \ -e ali_drw_27080/cycle/ -- sleep 10 +Example usage of counting all memory read/write bandwidth by metric:: + + perf stat -M ddr_read_bandwidth.all -- sleep 10 + perf stat -M ddr_write_bandwidth.all -- sleep 10 + The average DRAM bandwidth can be calculated as follows: - Read Bandwidth = perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle diff --git a/MAINTAINERS b/MAINTAINERS index ed37d0466d41b..42459a3259cc1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16763,6 +16763,8 @@ L: linux-kernel@vger.kernel.org S: Supported W: https://perf.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core +T: git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools.git perf-tools +T: git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next F: arch/*/events/* F: arch/*/events/*/* F: arch/*/include/asm/perf_event.h diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 89430338a3d92..fac42486a8cf0 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -117,6 +117,16 @@ $(OUTPUT)%.s: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_s_c) +# bison and flex files are generated in the OUTPUT directory +# so it needs a separate rule to depend on them properly +$(OUTPUT)%-bison.o: $(OUTPUT)%-bison.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,$(host)cc_o_c) + +$(OUTPUT)%-flex.o: $(OUTPUT)%-flex.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,$(host)cc_o_c) + # Gather build data: # obj-y - list of build objects # subdir-y - list of directories to nest diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index f0c5de018a956..dad79ede4e0ae 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -340,7 +340,7 @@ $(OUTPUT)test-jvmti-cmlr.bin: $(BUILD) $(OUTPUT)test-llvm.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++17 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ $(shell $(LLVM_CONFIG) --libs Core BPF) \ @@ -348,17 +348,15 @@ $(OUTPUT)test-llvm.bin: > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-llvm-version.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++17 \ -I$(shell $(LLVM_CONFIG) --includedir) \ > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-clang.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++17 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ - -Wl,--start-group -lclangBasic -lclangDriver \ - -lclangFrontend -lclangEdit -lclangLex \ - -lclangAST -Wl,--end-group \ + -Wl,--start-group -lclang-cpp -Wl,--end-group \ $(shell $(LLVM_CONFIG) --libs Core option) \ $(shell $(LLVM_CONFIG) --system-libs) \ > $(@:.bin=.make.output) 2>&1 diff --git a/tools/build/feature/test-clang.cpp b/tools/build/feature/test-clang.cpp deleted file mode 100644 index 7d87075cd1c5d..0000000000000 --- a/tools/build/feature/test-clang.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "clang/Basic/Version.h" -#if CLANG_VERSION_MAJOR < 8 -#include "clang/Basic/VirtualFileSystem.h" -#endif -#include "clang/Driver/Driver.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/Support/ManagedStatic.h" -#if CLANG_VERSION_MAJOR >= 8 -#include "llvm/Support/VirtualFileSystem.h" -#endif -#include "llvm/Support/raw_ostream.h" - -using namespace clang; -using namespace clang::driver; - -int main() -{ - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts); - Driver TheDriver("test", "bpf-pc-linux", Diags); - - llvm::llvm_shutdown(); - return 0; -} diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp deleted file mode 100644 index 396aaedd2418d..0000000000000 --- a/tools/build/feature/test-cxx.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -static void print_str(std::string s) -{ - std::cout << s << std::endl; -} - -int main() -{ - std::string s("Hello World!"); - print_str(std::move(s)); - std::cout << "|" << s << "|" << std::endl; - return 0; -} diff --git a/tools/build/feature/test-llvm-version.cpp b/tools/build/feature/test-llvm-version.cpp deleted file mode 100644 index 8a091625446af..0000000000000 --- a/tools/build/feature/test-llvm-version.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "llvm/Config/llvm-config.h" - -#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH) -#define pass int main() {printf("%x\n", NUM_VERSION); return 0;} - -#if NUM_VERSION >= 0x030900 -pass -#else -# error This LLVM is not tested yet. -#endif diff --git a/tools/build/feature/test-llvm.cpp b/tools/build/feature/test-llvm.cpp deleted file mode 100644 index 88a3d1bdd9f69..0000000000000 --- a/tools/build/feature/test-llvm.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/raw_ostream.h" -#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH) - -#if NUM_VERSION < 0x030900 -# error "LLVM version too low" -#endif -int main() -{ - llvm::errs() << "Hello World!\n"; - llvm::llvm_shutdown(); - return 0; -} diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index ba2dcf64f4e63..ae64090184d32 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -148,8 +148,18 @@ struct perf_record_switch { struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; - __u64 id[]; -}; + /* + * Array of u64 id follows here but we cannot use a flexible array + * because size of attr in the data can be different then current + * version. Please use perf_record_header_attr_id() below. + * + * __u64 id[]; // do not use this + */ +}; + +/* Returns the pointer to id array based on the actual attr size. */ +#define perf_record_header_attr_id(evt) \ + ((void *)&(evt)->attr.attr + (evt)->attr.attr.size) enum { PERF_CPU_MAP__CPUS = 0, diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index f04f0eaded985..ca5789625cd2b 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -67,6 +67,9 @@ SUBSYSTEM 'internals':: Benchmark internal perf functionality. +'uprobe':: + Benchmark overhead of uprobe + BPF. + 'all':: All benchmark subsystems. diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 1478068ad5dd4..0b4e79dbd3f68 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -125,9 +125,6 @@ Given a $HOME/.perfconfig like this: group = true skip-empty = true - [llvm] - dump-obj = true - clang-opt = -g You can hide source code of annotate feature setting the config to false with @@ -657,36 +654,6 @@ ftrace.*:: -F option is not specified. Possible values are 'function' and 'function_graph'. -llvm.*:: - llvm.clang-path:: - Path to clang. If omit, search it from $PATH. - - llvm.clang-bpf-cmd-template:: - Cmdline template. Below lines show its default value. Environment - variable is used to pass options. - "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ - "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ - "-Wno-unused-value -Wno-pointer-sign " \ - "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" - - llvm.clang-opt:: - Options passed to clang. - - llvm.kbuild-dir:: - kbuild directory. If not set, use /lib/modules/`uname -r`/build. - If set to "" deliberately, skip kernel header auto-detector. - - llvm.kbuild-opts:: - Options passed to 'make' when detecting kernel header options. - - llvm.dump-obj:: - Enable perf dump BPF object files compiled by LLVM. - - llvm.opts:: - Options passed to llc. - samples.*:: samples.context:: diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt index fb22e3b31dc5c..8887cc20a809e 100644 --- a/tools/perf/Documentation/perf-dlfilter.txt +++ b/tools/perf/Documentation/perf-dlfilter.txt @@ -64,6 +64,12 @@ internal filtering. If implemented, 'filter_description' should return a one-line description of the filter, and optionally a longer description. +Do not assume the 'sample' argument is valid (dereferenceable) +after 'filter_event' and 'filter_event_early' return. + +Do not assume data referenced by pointers in struct perf_dlfilter_sample +is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return. + The perf_dlfilter_sample structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -150,7 +156,8 @@ struct perf_dlfilter_fns { const char *(*srcline)(void *ctx, __u32 *line_number); struct perf_event_attr *(*attr)(void *ctx); __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); - void *(*reserved[120])(void *); + void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al); + void *(*reserved[119])(void *); }; ---- @@ -161,7 +168,8 @@ struct perf_dlfilter_fns { 'args' returns arguments from --dlarg options. 'resolve_address' provides information about 'address'. al->size must be set -before calling. Returns 0 on success, -1 otherwise. +before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present, +see below) when 'al' data is no longer needed. 'insn' returns instruction bytes and length. @@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise. 'object_code' reads object code and returns the number of bytes read. +'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL) +after resolve_address() to free any associated resources. + +Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable) +after 'filter_event' and 'filter_event_early' return. + The perf_dlfilter_al structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -197,9 +211,13 @@ struct perf_dlfilter_al { /* Below members are only populated by resolve_ip() */ __u8 filtered; /* true if this sample event will be filtered out */ const char *comm; + void *priv; /* Private data. Do not change */ }; ---- +Do not assume data referenced by pointers in struct perf_dlfilter_al +is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return. + perf_dlfilter_sample flags ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index df4595563801e..d780b93fcf871 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -96,8 +96,9 @@ OPTIONS for 'perf ftrace trace' --func-opts:: List of options allowed to set: - call-graph - Display kernel stack trace for function tracer. - irq-info - Display irq context info for function tracer. + + - call-graph - Display kernel stack trace for function tracer. + - irq-info - Display irq context info for function tracer. -G:: --graph-funcs=:: @@ -118,11 +119,12 @@ OPTIONS for 'perf ftrace trace' --graph-opts:: List of options allowed to set: - nosleep-time - Measure on-CPU time only for function_graph tracer. - noirqs - Ignore functions that happen inside interrupt. - verbose - Show process names, PIDs, timestamps, etc. - thresh= - Setup trace duration threshold in microseconds. - depth= - Set max depth for function graph tracer to follow. + + - nosleep-time - Measure on-CPU time only for function_graph tracer. + - noirqs - Ignore functions that happen inside interrupt. + - verbose - Show process names, PIDs, timestamps, etc. + - thresh= - Setup trace duration threshold in microseconds. + - depth= - Set max depth for function graph tracer to follow. OPTIONS for 'perf ftrace latency' diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 680396c56bd10..d5217be012d79 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -99,20 +99,6 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. - - a BPF source file (ending in .c) or a precompiled object file (ending - in .o) selects one or more BPF events. - The BPF program can attach to various perf events based on the ELF section - names. - - When processing a '.c' file, perf searches an installed LLVM to compile it - into an object file first. Optional clang options can be passed via the - '--clang-opt' command line option, e.g.: - - perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ - -e tests/bpf-script-example.c - - Note: '--clang-opt' must be placed before '--event/-e'. - - a group of events surrounded by a pair of brace ("{event1,event2,...}"). Each event is separated by commas and the group should be quoted to prevent the shell interpretation. You also need to use --group on @@ -523,9 +509,10 @@ CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. Select AUX area tracing Snapshot Mode. This option is valid only with an AUX area tracing event. Optionally, certain snapshot capturing parameters can be specified in a string that follows this option: - 'e': take one last snapshot on exit; guarantees that there is at least one + + - 'e': take one last snapshot on exit; guarantees that there is at least one snapshot in the output file; - : if the PMU supports this, specify the desired snapshot size. + - : if the PMU supports this, specify the desired snapshot size. In Snapshot Mode trace data is captured only when signal SIGUSR2 is received and on exit if the above 'e' option is given. @@ -547,14 +534,6 @@ PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE) switch events will be enabled automatically, which can be suppressed by by the option --no-switch-events. ---clang-path=PATH:: -Path to clang binary to use for compiling BPF scriptlets. -(enabled when BPF support is on) - ---clang-opt=OPTIONS:: -Options passed to clang when compiling BPF scriptlets. -(enabled when BPF support is on) - --vmlinux=PATH:: Specify vmlinux path which has debuginfo. (enabled when BPF prologue is on) @@ -572,8 +551,9 @@ providing implementation for Posix AIO API. --affinity=mode:: Set affinity mask of trace reading thread according to the policy defined by 'mode' value: - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer - cpu - thread affinity mask is set to cpu of the processed mmap buffer + + - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer + - cpu - thread affinity mask is set to cpu of the processed mmap buffer --mmap-flush=number:: @@ -625,16 +605,17 @@ Record timestamp boundary (time of first/last samples). --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: - "signal" - when receiving a SIGUSR2 (default value) or - - when reaching the size threshold, size is expected to - be a number with appended unit character - B/K/M/G -