diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bb7b271397a66..377d794d3105c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -141,8 +141,9 @@ enum perf_event_sample_format { PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, + PERF_SAMPLE_AUX = 1U << 20, - PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -300,6 +301,7 @@ enum perf_event_read_format { /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -424,7 +426,9 @@ struct perf_event_attr { */ __u32 aux_watermark; __u16 sample_max_stack; - __u16 __reserved_2; /* align to __u64 */ + __u16 __reserved_2; + __u32 aux_sample_size; + __u32 __reserved_3; }; /* @@ -864,6 +868,8 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 5315f3787f8d6..cbb429f550625 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -232,10 +232,10 @@ install_pkgconfig: install_headers: $(call QUIET_INSTALL, headers) \ - $(call do_install,event-parse.h,$(DESTDIR)$(includedir_SQ),644); \ - $(call do_install,event-utils.h,$(DESTDIR)$(includedir_SQ),644); \ - $(call do_install,trace-seq.h,$(DESTDIR)$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(DESTDIR)$(includedir_SQ),644) + $(call do_install,event-parse.h,$(includedir_SQ),644); \ + $(call do_install,event-utils.h,$(includedir_SQ),644); \ + $(call do_install,trace-seq.h,$(includedir_SQ),644); \ + $(call do_install,kbuffer.h,$(includedir_SQ),644) install: install_lib diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 552592d153fb8..f3cbf86e51acf 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1473,8 +1473,10 @@ static int copy_filter_type(struct tep_event_filter *filter, if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { /* Add trivial event */ arg = allocate_arg(); - if (arg == NULL) + if (arg == NULL) { + free(str); return -1; + } arg->type = TEP_FILTER_ARG_BOOLEAN; if (strcmp(str, "TRUE") == 0) @@ -1483,8 +1485,11 @@ static int copy_filter_type(struct tep_event_filter *filter, arg->boolean.value = 0; filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) + if (filter_type == NULL) { + free(str); + free_arg(arg); return -1; + } filter_type->filter = arg; diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index e0d9e7dd4f176..2cf2d9e9d0da1 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -434,6 +434,56 @@ pwr_evt Enable power events. The power events provide information about "0" otherwise. +AUX area sampling option +------------------------ + +To select Intel PT "sampling" the AUX area sampling option can be used: + + --aux-sample + +Optionally it can be followed by the sample size in bytes e.g. + + --aux-sample=8192 + +In addition, the Intel PT event to sample must be defined e.g. + + -e intel_pt//u + +Samples on other events will be created containing Intel PT data e.g. the +following will create Intel PT samples on the branch-misses event, note the +events must be grouped using {}: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' + +An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to +events. In this case, the grouping is implied e.g. + + perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u + +is the same as: + + perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' + +but allows for also using an address filter e.g.: + + perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls + +It is important to select a sample size that is big enough to contain at least +one PSB packet. If not a warning will be displayed: + + Intel PT sample size (%zu) may be too small for PSB period (%zu) + +The calculation used for that is: if sample_size <= psb_period + 256 display the +warning. When sampling is used, psb_period defaults to 0 (2KiB). + +The default sample size is 4KiB. + +The sample size is passed in aux_sample_size in struct perf_event_attr. The +sample size is limited by the maximum event size which is 64KiB. It is +difficult to know how big the event might be without the trace sample attached, +but the tool validates that the sample size is not greater than 60KiB. + + new snapshot option ------------------- @@ -487,8 +537,8 @@ their mlock limit (which defaults to 64KiB but is not multiplied by the number of cpus). In full-trace mode, powers of two are allowed for buffer size, with a minimum -size of 2 pages. In snapshot mode, it is the same but the minimum size is -1 page. +size of 2 pages. In snapshot mode or sampling mode, it is the same but the +minimum size is 1 page. The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. @@ -501,12 +551,17 @@ Intel PT modes of operation Intel PT can be used in 2 modes: full-trace mode + sample mode snapshot mode Full-trace mode traces continuously e.g. perf record -e intel_pt//u uname +Sample mode attaches a Intel PT sample to other events e.g. + + perf record --aux-sample -e intel_pt//u -e branch-misses:u + Snapshot mode captures the available data when a signal is sent e.g. perf record -v -e intel_pt//u -S ./loopy 1000000000 & diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index ebcba1f95513d..b23a4012a6064 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -62,6 +62,9 @@ OPTIONS like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'. - 'aux-output': Generate AUX records instead of events. This requires that an AUX area event is also provided. + - 'aux-sample-size': Set sample size for AUX area sampling. If the + '--aux-sample' option has been used, set aux-sample-size=0 to disable + AUX area sampling for the event. See the linkperf:perf-list[1] man page for more parameters. @@ -433,6 +436,12 @@ can be specified in a string that follows this option: In Snapshot Mode trace data is captured only when signal SIGUSR2 is received and on exit if the above 'e' option is given. +--aux-sample[=OPTIONS]:: +Select AUX area sampling. At least one of the events selected by the -e option +must be an AUX area event. Samples on other events will be created containing +data from the AUX area. Optionally sample size may be specified, otherwise it +defaults to 4KiB. + --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 96f4a2c118937..7abc9fd4cbec4 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -26,7 +26,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, bool found_bts = false; intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + if (intel_pt_pmu) + intel_pt_pmu->auxtrace = true; intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + if (intel_bts_pmu) + intel_bts_pmu->auxtrace = true; evlist__for_each_entry(evlist, evsel) { if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type) diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index f7f68a50a5cd5..27d9e214d0680 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -113,6 +113,11 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, const struct perf_cpu_map *cpus = evlist->core.cpus; bool privileged = perf_event_paranoid_check(-1); + if (opts->auxtrace_sample_mode) { + pr_err("Intel BTS does not support AUX area sampling\n"); + return -EINVAL; + } + btsr->evlist = evlist; btsr->snapshot_mode = opts->auxtrace_snapshot_mode; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index d6d26256915f8..20df442fdf36d 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -17,6 +17,7 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/evsel_config.h" #include "../../util/cpumap.h" #include "../../util/mmap.h" #include @@ -551,6 +552,43 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, evsel->core.attr.config); } +static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu, + struct evsel *evsel) +{ + struct perf_evsel_config_term *term; + u64 user_bits = 0, bits; + + term = perf_evsel__get_config_term(evsel, CFG_CHG); + if (term) + user_bits = term->val.cfg_chg; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period"); + + /* Did user change psb_period */ + if (bits & user_bits) + return; + + /* Set psb_period to 0 */ + evsel->core.attr.config &= ~bits; +} + +static void intel_pt_min_max_sample_sz(struct evlist *evlist, + size_t *min_sz, size_t *max_sz) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + size_t sz = evsel->core.attr.aux_sample_size; + + if (!sz) + continue; + if (min_sz && (sz < *min_sz || !*min_sz)) + *min_sz = sz; + if (max_sz && sz > *max_sz) + *max_sz = sz; + } +} + /* * Currently, there is not enough information to disambiguate different PEBS * events, so only allow one. @@ -606,6 +644,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, return -EINVAL; } + if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) { + pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n"); + return -EINVAL; + } + if (opts->use_clockid) { pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); return -EINVAL; @@ -617,6 +660,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; + if (opts->auxtrace_sample_mode) + intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel); + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); if (err) return err; @@ -666,6 +712,34 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, opts->auxtrace_snapshot_size, psb_period); } + /* Set default sizes for sample mode */ + if (opts->auxtrace_sample_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + size_t min_sz = 0, max_sz = 0; + + intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz); + if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = round_up(max_sz, page_size) / page_size; + + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n", + max_sz, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n", + min_sz, max_sz); + if (psb_period && + min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n", + min_sz, psb_period); + } + /* Set default sizes for full trace mode */ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { if (privileged) { @@ -682,7 +756,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; size_t min_sz; - if (opts->auxtrace_snapshot_mode) + if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode) min_sz = KiB(4); else min_sz = KiB(8); @@ -1136,5 +1210,10 @@ struct auxtrace_record *intel_pt_recording_init(int *err) ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; ptr->itr.reference = intel_pt_reference; ptr->itr.read_finish = intel_pt_read_finish; + /* + * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K + * should give at least 1 PSB per sample. + */ + ptr->itr.default_aux_sample_size = 4096; return &ptr->itr; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 1e5d28311e143..9664a72a089da 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -45,6 +45,7 @@ struct perf_inject { u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; + char event_copy[PERF_SAMPLE_MAX_SIZE]; }; struct event_entry { @@ -214,6 +215,28 @@ static int perf_event__drop_aux(struct perf_tool *tool, return 0; } +static union perf_event * +perf_inject__cut_auxtrace_sample(struct perf_inject *inject, + union perf_event *event, + struct perf_sample *sample) +{ + size_t sz1 = sample->aux_sample.data - (void *)event; + size_t sz2 = event->header.size - sample->aux_sample.size - sz1; + union perf_event *ev = (union perf_event *)inject->event_copy; + + if (sz1 > event->header.size || sz2 > event->header.size || + sz1 + sz2 > event->header.size || + sz1 < sizeof(struct perf_event_header) + sizeof(u64)) + return event; + + memcpy(ev, event, sz1); + memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); + ev->header.size = sz1 + sz2; + ((u64 *)((void *)ev + sz1))[-1] = 0; + + return ev; +} + typedef int (*inject_handler)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -226,6 +249,9 @@ static int perf_event__repipe_sample(struct perf_tool *tool, struct evsel *evsel, struct machine *machine) { + struct perf_inject *inject = container_of(tool, struct perf_inject, + tool); + if (evsel && evsel->handler) { inject_handler f = evsel->handler; return f(tool, event, sample, evsel, machine); @@ -233,6 +259,9 @@ static int perf_event__repipe_sample(struct perf_tool *tool, build_id__mark_dso_hit(tool, event, sample, evsel, machine); + if (inject->itrace_synth_opts.set && sample->aux_sample.size) + event = perf_inject__cut_auxtrace_sample(inject, event, sample); + return perf_event__repipe_synth(tool, event); } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7ab3110b40351..b5063d3b6fd07 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -680,6 +680,11 @@ static int record__auxtrace_init(struct record *rec) if (err) return err; + err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, + rec->opts.auxtrace_sample_opts); + if (err) + return err; + return auxtrace_parse_filters(rec->evlist); } @@ -752,6 +757,8 @@ static int record__mmap_evlist(struct record *rec, struct evlist *evlist) { struct record_opts *opts = &rec->opts; + bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || + opts->auxtrace_sample_mode; char msg[512]; if (opts->affinity != PERF_AFFINITY_SYS) @@ -759,7 +766,7 @@ static int record__mmap_evlist(struct record *rec, if (evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode, + auxtrace_overwrite, opts->nr_cblocks, opts->affinity, opts->mmap_flush, opts->comp_level) < 0) { if (errno == EPERM) { @@ -1046,6 +1053,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && + !rec->opts.auxtrace_sample_mode && record__auxtrace_mmap_read(rec, map) != 0) { rc = -1; goto out; @@ -1321,6 +1329,15 @@ static int record__synthesize(struct record *rec, bool tail) if (err) goto out; + /* Synthesize id_index before auxtrace_info */ + if (rec->opts.auxtrace_sample_mode) { + err = perf_event__synthesize_id_index(tool, + process_synthesized_event, + session->evlist, machine); + if (err) + goto out; + } + if (rec->opts.full_auxtrace) { err = perf_event__synthesize_auxtrace_info(rec->itr, tool, session, process_synthesized_event); @@ -2329,6 +2346,8 @@ static struct option __record_options[] = { parse_clockid), OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, "opts", "AUX area tracing Snapshot Mode", ""), + OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, + "opts", "sample AUX area", ""), OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 585805f51f155..ab0f6e516b03c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -493,7 +493,9 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos); + rep->min_percent, pos, + &rep->session->header.env, + &rep->annotation_opts); if (ret != 0) return ret; } @@ -525,7 +527,8 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos); + rep->min_percent, pos, + NULL, NULL); continue; } @@ -771,7 +774,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) map->prot & PROT_EXEC ? 'x' : '-', map->flags & MAP_SHARED ? 's' : 'p', map->pgoff, - map->ino, map->dso->name); + map->dso->id.ino, map->dso->name); } return printed; @@ -1418,7 +1421,7 @@ int cmd_report(int argc, const char **argv) if (sort__mode != SORT_MODE__BRANCH) report.total_cycles_mode = false; else - sort_order = "sym"; + sort_order = NULL; } if (strcmp(input_name, "-") != 0) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index efd0157b9d223..645009c08b3cb 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=112 +size=120 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index 4d0c2e42b64e8..b0f42c34882e8 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=112 +size=120 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 3a02426db9a63..2762e11552380 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -150,6 +150,15 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_PHYS_ADDR) COMP(phys_addr); + if (type & PERF_SAMPLE_AUX) { + COMP(aux_sample.size); + if (memcmp(s1->aux_sample.data, s2->aux_sample.data, + s1->aux_sample.size)) { + pr_debug("Samples differ at 'aux_sample'\n"); + return false; + } + } + return true; } @@ -182,6 +191,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) u64 regs[64]; const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL}; + const u64 aux_data[] = {0xa55a, 0, 0xeeddee, 0x0282028202820282}; struct perf_sample sample = { .ip = 101, .pid = 102, @@ -218,6 +228,10 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .regs = regs, }, .phys_addr = 113, + .aux_sample = { + .size = sizeof(aux_data), + .data = (void *)aux_data, + }, }; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; struct perf_sample sample_out; @@ -317,7 +331,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4d2d0acfd41a2..d4d3558fdef42 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2385,7 +2385,11 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) if (!notes->src) return 0; - evsel = hists_to_evsel(browser->hists); + if (browser->block_evsel) + evsel = browser->block_evsel; + else + evsel = hists_to_evsel(browser->hists); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, browser->annotation_opts); he = hist_browser__selected_entry(browser); @@ -3444,3 +3448,75 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, warn_lost_event, annotation_opts); } + +static int block_hists_browser__title(struct hist_browser *browser, char *bf, + size_t size) +{ + struct hists *hists = evsel__hists(browser->block_evsel); + const char *evname = perf_evsel__name(browser->block_evsel); + unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + int ret; + + ret = scnprintf(bf, size, "# Samples: %lu", nr_samples); + if (evname) + scnprintf(bf + ret, size - ret, " of event '%s'", evname); + + return 0; +} + +int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, + float min_percent, struct perf_env *env, + struct annotation_options *annotation_opts) +{ + struct hists *hists = &bh->block_hists; + struct hist_browser *browser; + int key = -1; + struct popup_action action; + static const char help[] = + " q Quit \n"; + + browser = hist_browser__new(hists); + if (!browser) + return -1; + + browser->block_evsel = evsel; + browser->title = block_hists_browser__title; + browser->min_pcnt = min_percent; + browser->env = env; + browser->annotation_opts = annotation_opts; + + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + memset(&action, 0, sizeof(action)); + + while (1) { + key = hist_browser__run(browser, "? - help", true); + + switch (key) { + case 'q': + goto out; + case '?': + ui_browser__help_window(&browser->b, help); + break; + case 'a': + case K_ENTER: + if (!browser->selection || + !browser->selection->sym) { + continue; + } + + action.ms.map = browser->selection->map; + action.ms.sym = browser->selection->sym; + do_annotate(browser, &action); + continue; + default: + break; + } + } + +out: + hist_browser__delete(browser); + return 0; +} diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c555c3ccd79d6..eb087e7df6f4b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,6 +31,7 @@ #include "map.h" #include "pmu.h" #include "evsel.h" +#include "evsel_config.h" #include "symbol.h" #include "util/synthetic-events.h" #include "thread_map.h" @@ -57,6 +58,72 @@ #include "symbol/kallsyms.h" #include +static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu->type == evsel->core.attr.type) + break; + } + + return pmu; +} + +static bool perf_evsel__is_aux_event(struct evsel *evsel) +{ + struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + + return pmu && pmu->auxtrace; +} + +/* + * Make a group from 'leader' to 'last', requiring that the events were not + * already grouped to a different leader. + */ +static int perf_evlist__regroup(struct evlist *evlist, + struct evsel *leader, + struct evsel *last) +{ + struct evsel *evsel; + bool grp; + + if (!perf_evsel__is_group_leader(leader)) + return -EINVAL; + + grp = false; + evlist__for_each_entry(evlist, evsel) { + if (grp) { + if (!(evsel->leader == leader || + (evsel->leader == evsel && + evsel->core.nr_members <= 1))) + return -EINVAL; + } else if (evsel == leader) { + grp = true; + } + if (evsel == last) + break; + } + + grp = false; + evlist__for_each_entry(evlist, evsel) { + if (grp) { + if (evsel->leader != leader) { + evsel->leader = leader; + if (leader->core.nr_members < 1) + leader->core.nr_members = 1; + leader->core.nr_members += 1; + } + } else if (evsel == leader) { + grp = true; + } + if (evsel == last) + break; + } + + return 0; +} + static bool auxtrace__dont_decode(struct perf_session *session) { return !session->itrace_synth_opts || @@ -597,6 +664,132 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, return -EINVAL; } +/* + * Event record size is 16-bit which results in a maximum size of about 64KiB. + * Allow about 4KiB for the rest of the sample record, to give a maximum + * AUX area sample size of 60KiB. + */ +#define MAX_AUX_SAMPLE_SIZE (60 * 1024) + +/* Arbitrary default size if no other default provided */ +#define DEFAULT_AUX_SAMPLE_SIZE (4 * 1024) + +static int auxtrace_validate_aux_sample_size(struct evlist *evlist, + struct record_opts *opts) +{ + struct evsel *evsel; + bool has_aux_leader = false; + u32 sz; + + evlist__for_each_entry(evlist, evsel) { + sz = evsel->core.attr.aux_sample_size; + if (perf_evsel__is_group_leader(evsel)) { + has_aux_leader = perf_evsel__is_aux_event(evsel); + if (sz) { + if (has_aux_leader) + pr_err("Cannot add AUX area sampling to an AUX area event\n"); + else + pr_err("Cannot add AUX area sampling to a group leader\n"); + return -EINVAL; + } + } + if (sz > MAX_AUX_SAMPLE_SIZE) { + pr_err("AUX area sample size %u too big, max. %d\n", + sz, MAX_AUX_SAMPLE_SIZE); + return -EINVAL; + } + if (sz) { + if (!has_aux_leader) { + pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n"); + return -EINVAL; + } + perf_evsel__set_sample_bit(evsel, AUX); + opts->auxtrace_sample_mode = true; + } else { + perf_evsel__reset_sample_bit(evsel, AUX); + } + } + + if (!opts->auxtrace_sample_mode) { + pr_err("AUX area sampling requires an AUX area event group leader plus other events to which to add samples\n"); + return -EINVAL; + } + + if (!perf_can_aux_sample()) { + pr_err("AUX area sampling is not supported by kernel\n"); + return -EINVAL; + } + + return 0; +} + +int auxtrace_parse_sample_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts, const char *str) +{ + struct perf_evsel_config_term *term; + struct evsel *aux_evsel; + bool has_aux_sample_size = false; + bool has_aux_leader = false; + struct evsel *evsel; + char *endptr; + unsigned long sz; + + if (!str) + goto no_opt; + + if (!itr) { + pr_err("No AUX area event to sample\n"); + return -EINVAL; + } + + sz = strtoul(str, &endptr, 0); + if (*endptr || sz > UINT_MAX) { + pr_err("Bad AUX area sampling option: '%s'\n", str); + return -EINVAL; + } + + if (!sz) + sz = itr->default_aux_sample_size; + + if (!sz) + sz = DEFAULT_AUX_SAMPLE_SIZE; + + /* Set aux_sample_size based on --aux-sample option */ + evlist__for_each_entry(evlist, evsel) { + if (perf_evsel__is_group_leader(evsel)) { + has_aux_leader = perf_evsel__is_aux_event(evsel); + } else if (has_aux_leader) { + evsel->core.attr.aux_sample_size = sz; + } + } +no_opt: + aux_evsel = NULL; + /* Override with aux_sample_size from config term */ + evlist__for_each_entry(evlist, evsel) { + if (perf_evsel__is_aux_event(evsel)) + aux_evsel = evsel; + term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); + if (term) { + has_aux_sample_size = true; + evsel->core.attr.aux_sample_size = term->val.aux_sample_size; + /* If possible, group with the AUX event */ + if (aux_evsel && evsel->core.attr.aux_sample_size) + perf_evlist__regroup(evlist, aux_evsel, evsel); + } + } + + if (!str && !has_aux_sample_size) + return 0; + + if (!itr) { + pr_err("No AUX area event to sample\n"); + return -EINVAL; + } + + return auxtrace_validate_aux_sample_size(evlist, opts); +} + struct auxtrace_record *__weak auxtrace_record__init(struct evlist *evlist __maybe_unused, int *err) { @@ -811,6 +1004,113 @@ struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, } } +struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *queues, + struct perf_sample *sample, + struct perf_session *session) +{ + struct perf_sample_id *sid; + unsigned int idx; + u64 id; + + id = sample->id; + if (!id) + return NULL; + + sid = perf_evlist__id2sid(session->evlist, id); + if (!sid) + return NULL; + + idx = sid->idx; + + if (idx >= queues->nr_queues) + return NULL; + + return &queues->queue_array[idx]; +} + +int auxtrace_queues__add_sample(struct auxtrace_queues *queues, + struct perf_session *session, + struct perf_sample *sample, u64 data_offset, + u64 reference) +{ + struct auxtrace_buffer buffer = { + .pid = -1, + .data_offset = data_offset, + .reference = reference, + .size = sample->aux_sample.size, + }; + struct perf_sample_id *sid; + u64 id = sample->id; + unsigned int idx; + + if (!id) + return -EINVAL; + + sid = perf_evlist__id2sid(session->evlist, id); + if (!sid) + return -ENOENT; + + idx = sid->idx; + buffer.tid = sid->tid; + buffer.cpu = sid->cpu; + + return auxtrace_queues__add_buffer(queues, session, idx, &buffer, NULL); +} + +struct queue_data { + bool samples; + bool events; +}; + +static int auxtrace_queue_data_cb(struct perf_session *session, + union perf_event *event, u64 offset, + void *data) +{ + struct queue_data *qd = data; + struct perf_sample sample; + int err; + + if (qd->events && event->header.type == PERF_RECORD_AUXTRACE) { + if (event->header.size < sizeof(struct perf_record_auxtrace)) + return -EINVAL; + offset += event->header.size; + return session->auxtrace->queue_data(session, NULL, event, + offset); + } + + if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) + return 0; + + err = perf_evlist__parse_sample(session->evlist, event, &sample); + if (err) + return err; + + if (!sample.aux_sample.size) + return 0; + + offset += sample.aux_sample.data - (void *)event; + + return session->auxtrace->queue_data(session, &sample, NULL, offset); +} + +int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) +{ + struct queue_data qd = { + .samples = samples, + .events = events, + }; + + if (auxtrace__dont_decode(session)) + return 0; + + if (!session->auxtrace || !session->auxtrace->queue_data) + return -EINVAL; + + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + auxtrace_queue_data_cb, &qd); +} + void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd) { size_t adj = buffer->data_offset & (page_size - 1); @@ -2180,18 +2480,6 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter, return err; } -static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - return pmu; -} - static int perf_evsel__nr_addr_filter(struct evsel *evsel) { struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); @@ -2236,6 +2524,16 @@ int auxtrace__process_event(struct perf_session *session, union perf_event *even return session->auxtrace->process_event(session, event, sample, tool); } +void auxtrace__dump_auxtrace_sample(struct perf_session *session, + struct perf_sample *sample) +{ + if (!session->auxtrace || !session->auxtrace->dump_auxtrace_sample || + auxtrace__dont_decode(session)) + return; + + session->auxtrace->dump_auxtrace_sample(session, sample); +} + int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool) { if (!session->auxtrace) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 3f4aa5427d76b..749d72cd9c7b0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -141,6 +141,9 @@ struct auxtrace_index { * struct auxtrace - session callbacks to allow AUX area data decoding. * @process_event: lets the decoder see all session events * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event + * @queue_data: queue an AUX sample or PERF_RECORD_AUXTRACE event for later + * processing + * @dump_auxtrace_sample: dump AUX area sample data * @flush_events: process any remaining data * @free_events: free resources associated with event processing * @free: free resources associated with the session @@ -153,6 +156,11 @@ struct auxtrace { int (*process_auxtrace_event)(struct perf_session *session, union perf_event *event, struct perf_tool *tool); + int (*queue_data)(struct perf_session *session, + struct perf_sample *sample, union perf_event *event, + u64 data_offset); + void (*dump_auxtrace_sample)(struct perf_session *session, + struct perf_sample *sample); int (*flush_events)(struct perf_session *session, struct perf_tool *tool); void (*free_events)(struct perf_session *session); @@ -313,6 +321,7 @@ struct auxtrace_mmap_params { * @reference: provide a 64-bit reference number for auxtrace_event * @read_finish: called after reading from an auxtrace mmap * @alignment: alignment (if any) for AUX area data + * @default_aux_sample_size: default sample size for --aux sample option */ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, @@ -336,6 +345,7 @@ struct auxtrace_record { u64 (*reference)(struct auxtrace_record *itr); int (*read_finish)(struct auxtrace_record *itr, int idx); unsigned int alignment; + unsigned int default_aux_sample_size; }; /** @@ -462,9 +472,19 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct perf_session *session, union perf_event *event, off_t data_offset, struct auxtrace_buffer **buffer_ptr); +struct auxtrace_queue * +auxtrace_queues__sample_queue(struct auxtrace_queues *queues, + struct perf_sample *sample, + struct perf_session *session); +int auxtrace_queues__add_sample(struct auxtrace_queues *queues, + struct perf_session *session, + struct perf_sample *sample, u64 data_offset, + u64 reference); void auxtrace_queues__free(struct auxtrace_queues *queues); int auxtrace_queues__process_index(struct auxtrace_queues *queues, struct perf_session *session); +int auxtrace_queue_data(struct perf_session *session, bool samples, + bool events); struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, struct auxtrace_buffer *buffer); void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd); @@ -498,6 +518,9 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, struct record_opts *opts, const char *str); +int auxtrace_parse_sample_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts, const char *str); int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts); @@ -550,6 +573,8 @@ int auxtrace_parse_filters(struct evlist *evlist); int auxtrace__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool); +void auxtrace__dump_auxtrace_sample(struct perf_session *session, + struct perf_sample *sample); int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); @@ -648,6 +673,18 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, return -EINVAL; } +static inline +int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused, + struct evlist *evlist __maybe_unused, + struct record_opts *opts __maybe_unused, + const char *str) +{ + if (!str) + return 0; + pr_err("AUX area tracing not supported\n"); + return -EINVAL; +} + static inline int auxtrace__process_event(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, @@ -657,6 +694,12 @@ int auxtrace__process_event(struct perf_session *session __maybe_unused, return 0; } +static inline +void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused, + struct perf_sample *sample __maybe_unused) +{ +} + static inline int auxtrace__flush_events(struct perf_session *session __maybe_unused, struct perf_tool *tool __maybe_unused) diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 9abc201ebe639..c4b030bf6ec2d 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -10,6 +10,7 @@ #include "map.h" #include "srcline.h" #include "evlist.h" +#include "hist.h" #include "ui/browsers/hists.h" static struct block_header_column { @@ -439,72 +440,9 @@ struct block_report *block_info__create_report(struct evlist *evlist, return block_reports; } -#ifdef HAVE_SLANG_SUPPORT -static int block_hists_browser__title(struct hist_browser *browser, char *bf, - size_t size) -{ - struct hists *hists = evsel__hists(browser->block_evsel); - const char *evname = perf_evsel__name(browser->block_evsel); - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; - int ret; - - ret = scnprintf(bf, size, "# Samples: %lu", nr_samples); - if (evname) - scnprintf(bf + ret, size - ret, " of event '%s'", evname); - - return 0; -} - -static int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent) -{ - struct hists *hists = &bh->block_hists; - struct hist_browser *browser; - int key = -1; - static const char help[] = - " q Quit \n"; - - browser = hist_browser__new(hists); - if (!browser) - return -1; - - browser->block_evsel = evsel; - browser->title = block_hists_browser__title; - browser->min_pcnt = min_percent; - - /* reset abort key so that it can get Ctrl-C as a key */ - SLang_reset_tty(); - SLang_init_tty(0, 0, 0); - - while (1) { - key = hist_browser__run(browser, "? - help", true); - - switch (key) { - case 'q': - goto out; - case '?': - ui_browser__help_window(&browser->b, help); - break; - default: - break; - } - } - -out: - hist_browser__delete(browser); - return 0; -} -#else -static int block_hists_tui_browse(struct block_hist *bh __maybe_unused, - struct evsel *evsel __maybe_unused, - float min_percent __maybe_unused) -{ - return 0; -} -#endif - int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel) + struct evsel *evsel, struct perf_env *env, + struct annotation_options *annotation_opts) { int ret; @@ -517,7 +455,8 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent); + ret = block_hists_tui_browse(bh, evsel, min_percent, + env, annotation_opts); hists__delete_entries(&bh->block_hists); return ret; default: diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index e4d20bccd9b6e..bef0d75e98195 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -71,7 +71,8 @@ struct block_report *block_info__create_report(struct evlist *evlist, u64 total_cycles); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel); + struct evsel *evsel, struct perf_env *env, + struct annotation_options *annotation_opts); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 0f1b77275a86d..91f21239608bd 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1149,7 +1149,7 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, return dso; } -void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) +static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_id *id, bool name_allocated) { struct rb_root *root = dso->root; @@ -1162,8 +1162,8 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) if (root) { rb_erase(&dso->rb_node, root); /* - * __dsos__findnew_link_by_longname() isn't guaranteed to add it - * back, so a clean removal is required here. + * __dsos__findnew_link_by_longname_id() isn't guaranteed to + * add it back, so a clean removal is required here. */ RB_CLEAR_NODE(&dso->rb_node); dso->root = NULL; @@ -1174,7 +1174,12 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) dso->long_name_allocated = name_allocated; if (root) - __dsos__findnew_link_by_longname(root, dso, NULL); + __dsos__findnew_link_by_longname_id(root, dso, NULL, id); +} + +void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) +{ + dso__set_long_name_id(dso, name, NULL, name_allocated); } void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) @@ -1215,13 +1220,15 @@ void dso__set_sorted_by_name(struct dso *dso) dso->sorted_by_name = true; } -struct dso *dso__new(const char *name) +struct dso *dso__new_id(const char *name, struct dso_id *id) { struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); if (dso != NULL) { strcpy(dso->name, name); - dso__set_long_name(dso, dso->name, false); + if (id) + dso->id = *id; + dso__set_long_name_id(dso, dso->name, id, false); dso__set_short_name(dso, dso->name, false); dso->symbols = dso->symbol_names = RB_ROOT_CACHED; dso->data.cache = RB_ROOT; @@ -1252,6 +1259,11 @@ struct dso *dso__new(const char *name) return dso; } +struct dso *dso__new(const char *name) +{ + return dso__new_id(name, NULL); +} + void dso__delete(struct dso *dso) { if (!RB_EMPTY_NODE(&dso->rb_node)) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2f1fcbc6fead1..2db64b79617ae 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -122,6 +122,16 @@ enum dso_load_errno { #define DSO__DATA_CACHE_SIZE 4096 #define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) +/* + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events + */ +struct dso_id { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; +}; + struct dso_cache { struct rb_node rb_node; u64 offset; @@ -196,6 +206,7 @@ struct dso { u64 db_id; }; struct nsinfo *nsinfo; + struct dso_id id; refcount_t refcnt; char name[0]; }; @@ -214,9 +225,11 @@ static inline void dso__set_loaded(struct dso *dso) dso->loaded = true; } +struct dso *dso__new_id(const char *name, struct dso_id *id); struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); +int dso__cmp_id(struct dso *a, struct dso *b); void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 3ea80d203587a..591707c69c39a 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -9,6 +9,40 @@ #include #include // filename__read_build_id +static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) +{ + if (a->maj > b->maj) return -1; + if (a->maj < b->maj) return 1; + + if (a->min > b->min) return -1; + if (a->min < b->min) return 1; + + if (a->ino > b->ino) return -1; + if (a->ino < b->ino) return 1; + + if (a->ino_generation > b->ino_generation) return -1; + if (a->ino_generation < b->ino_generation) return 1; + + return 0; +} + +static int dso_id__cmp(struct dso_id *a, struct dso_id *b) +{ + /* + * The second is always dso->id, so zeroes if not set, assume passing + * NULL for a means a zeroed id + */ + if (a == NULL) + return 0; + + return __dso_id__cmp(a, b); +} + +int dso__cmp_id(struct dso *a, struct dso *b) +{ + return __dso_id__cmp(&a->id, &b->id); +} + bool __dsos__read_build_ids(struct list_head *head, bool with_hits) { bool have_build_id = false; @@ -34,12 +68,30 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } +static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b) +{ + int rc = strcmp(long_name, b->long_name); + return rc ?: dso_id__cmp(id, &b->id); +} + +static int __dso__cmp_short_name(const char *short_name, struct dso_id *id, struct dso *b) +{ + int rc = strcmp(short_name, b->short_name); + return rc ?: dso_id__cmp(id, &b->id); +} + +static int dso__cmp_short_name(struct dso *a, struct dso *b) +{ + return __dso__cmp_short_name(a->short_name, &a->id, b); +} + /* * Find a matching entry and/or link current entry to RB tree. * Either one of the dso or name parameter must be non-NULL or the * function will not work. */ -struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *dso, const char *name) +struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, + const char *name, struct dso_id *id) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -51,7 +103,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d */ while (*p) { struct dso *this = rb_entry(*p, struct dso, rb_node); - int rc = strcmp(name, this->long_name); + int rc = __dso__cmp_long_name(name, id, this); parent = *p; if (rc == 0) { @@ -67,7 +119,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d * In this case, the short name should be different. * Comparing the short names to differentiate the DSOs. */ - rc = strcmp(dso->short_name, this->short_name); + rc = dso__cmp_short_name(dso, this); if (rc == 0) { pr_err("Duplicated dso name: %s\n", name); return NULL; @@ -90,7 +142,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d void __dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); - __dsos__findnew_link_by_longname(&dsos->root, dso, NULL); + __dsos__findnew_link_by_longname_id(&dsos->root, dso, NULL, &dso->id); /* * It is now in the linked list, grab a reference, then garbage collect * this when needing memory, by looking at LRU dso instances in the @@ -121,26 +173,27 @@ void dsos__add(struct dsos *dsos, struct dso *dso) up_write(&dsos->lock); } -struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const char *name, struct dso_id *id) +{ + return __dsos__findnew_link_by_longname_id(root, NULL, name, id); +} + +static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short) { struct dso *pos; if (cmp_short) { list_for_each_entry(pos, &dsos->head, node) - if (strcmp(pos->short_name, name) == 0) + if (__dso__cmp_short_name(name, id, pos) == 0) return pos; return NULL; } - return __dsos__findnew_by_longname(&dsos->root, name); + return __dsos__findnew_by_longname_id(&dsos->root, name, id); } -struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { - struct dso *dso; - down_read(&dsos->lock); - dso = __dsos__find(dsos, name, cmp_short); - up_read(&dsos->lock); - return dso; + return __dsos__find_id(dsos, name, NULL, cmp_short); } static void dso__set_basename(struct dso *dso) @@ -175,9 +228,9 @@ static void dso__set_basename(struct dso *dso) dso__set_short_name(dso, base, true); } -struct dso *__dsos__addnew(struct dsos *dsos, const char *name) +static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { - struct dso *dso = dso__new(name); + struct dso *dso = dso__new_id(name, id); if (dso != NULL) { __dsos__add(dsos, dso); @@ -188,18 +241,22 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) return dso; } -struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +struct dso *__dsos__addnew(struct dsos *dsos, const char *name) { - struct dso *dso = __dsos__find(dsos, name, false); + return __dsos__addnew_id(dsos, name, NULL); +} - return dso ? dso : __dsos__addnew(dsos, name); +static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +{ + struct dso *dso = __dsos__find_id(dsos, name, id, false); + return dso ? dso : __dsos__addnew_id(dsos, name, id); } -struct dso *dsos__findnew(struct dsos *dsos, const char *name) +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { struct dso *dso; down_write(&dsos->lock); - dso = dso__get(__dsos__findnew(dsos, name)); + dso = dso__get(__dsos__findnew_id(dsos, name, id)); up_write(&dsos->lock); return dso; } diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h index 32f1fbee0feb2..5dbec2bc6966d 100644 --- a/tools/perf/util/dsos.h +++ b/tools/perf/util/dsos.h @@ -9,6 +9,7 @@ #include "rwsem.h" struct dso; +struct dso_id; /* * DSOs are put into both a list for fast iteration and rbtree for fast @@ -24,16 +25,11 @@ void __dsos__add(struct dsos *dsos, struct dso *dso); void dsos__add(struct dsos *dsos, struct dso *dso); struct dso *__dsos__addnew(struct dsos *dsos, const char *name); struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -struct dso *__dsos__findnew(struct dsos *dsos, const char *name); -struct dso *dsos__findnew(struct dsos *dsos, const char *name); -struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *dso, const char *name); - -static inline struct dso *__dsos__findnew_by_longname(struct rb_root *root, const char *name) -{ - return __dsos__findnew_link_by_longname(root, NULL, name); -} +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id); + +struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, + const char *name, struct dso_id *id); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index a0a0c91cde4a6..85223159737c1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -114,6 +114,11 @@ enum { #define MAX_INSN 16 +struct aux_sample { + u64 size; + void *data; +}; + struct perf_sample { u64 ip; u32 pid, tid; @@ -142,6 +147,7 @@ struct perf_sample { struct regs_dump intr_regs; struct stack_dump user_stack; struct sample_read read; + struct aux_sample aux_sample; }; #define PERF_MEM_DATA_SRC_NONE \ diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 13051409fd225..3655b9ebb1473 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -176,6 +176,7 @@ void perf_evlist__set_id_pos(struct evlist *evlist); bool perf_can_sample_identifier(void); bool perf_can_record_switch_events(void); bool perf_can_record_cpu_wide(void); +bool perf_can_aux_sample(void); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1bf60f3256088..f4dea055b0808 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -846,6 +846,11 @@ static void apply_config_terms(struct evsel *evsel, case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT: attr->aux_output = term->val.aux_output ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + /* Already applied by auxtrace */ + break; + case PERF_EVSEL__CONFIG_TERM_CFG_CHG: + break; default: break; } @@ -905,6 +910,19 @@ static bool is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } +struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, + enum evsel_term_type type) +{ + struct perf_evsel_config_term *term, *found_term = NULL; + + list_for_each_entry(term, &evsel->config_terms, list) { + if (term->type == type) + found_term = term; + } + + return found_term; +} + /* * The enable_on_exec/disabled value strategy: * @@ -2209,6 +2227,19 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + if (type & PERF_SAMPLE_AUX) { + OVERFLOW_CHECK_u64(array); + sz = *array++; + + OVERFLOW_CHECK(array, sz, max_size); + /* Undo swap of data */ + if (swapped) + mem_bswap_64((char *)array, sz); + data->aux_sample.size = sz; + data->aux_sample.data = (char *)array; + array = (void *)array + sz; + } + return 0; } diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 8a7648037c18e..1f8d2fe0b66ed 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -25,6 +25,8 @@ enum evsel_term_type { PERF_EVSEL__CONFIG_TERM_BRANCH, PERF_EVSEL__CONFIG_TERM_PERCORE, PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, + PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, + PERF_EVSEL__CONFIG_TERM_CFG_CHG, }; struct perf_evsel_config_term { @@ -44,7 +46,18 @@ struct perf_evsel_config_term { unsigned long max_events; bool percore; bool aux_output; + u32 aux_sample_size; + u64 cfg_chg; } val; bool weak; }; + +struct evsel; + +struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, + enum evsel_term_type type); + +#define perf_evsel__get_config_term(evsel, type) \ + __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type) + #endif // __PERF_EVSEL_CONFIG_H diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4d87c7b4c1b26..45286900aacbf 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -449,6 +449,8 @@ enum rstype { A_SOURCE }; +struct block_hist; + #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" void attr_to_script(char *buf, struct perf_event_attr *attr); @@ -474,6 +476,10 @@ void run_script(char *cmd); int res_sample_browse(struct res_sample *res_samples, int num_res, struct evsel *evsel, enum rstype rstype); void res_sample_init(void); + +int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, + float min_percent, struct perf_env *env, + struct annotation_options *annotation_opts); #else static inline int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -518,6 +524,15 @@ static inline int res_sample_browse(struct res_sample *res_samples __maybe_unuse static inline void res_sample_init(void) {} +static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, + struct evsel *evsel __maybe_unused, + float min_percent __maybe_unused, + struct perf_env *env __maybe_unused, + struct annotation_options *annotation_opts __maybe_unused) +{ + return 0; +} + #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a1c9eb6d4f40d..409afc611be98 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -233,6 +233,16 @@ static void intel_pt_log_event(union perf_event *event) perf_event__fprintf(event, f); } +static void intel_pt_dump_sample(struct perf_session *session, + struct perf_sample *sample) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + printf("\n"); + intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -836,6 +846,18 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) return have_tsc; } +static bool intel_pt_sampling_mode(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) && + evsel->core.attr.aux_sample_size) + return true; + } + return false; +} + static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) { u64 quot, rem; @@ -2320,6 +2342,56 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, return 0; } +static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct perf_sample *sample) +{ + struct machine *m = ptq->pt->machine; + + ptq->pid = sample->pid; + ptq->tid = sample->tid; + ptq->cpu = queue->cpu; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid == -1) + return; + + if (ptq->pid == -1) { + ptq->thread = machine__find_thread(m, -1, ptq->tid); + if (ptq->thread) + ptq->pid = ptq->thread->pid_; + return; + } + + ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid); +} + +static int intel_pt_process_timeless_sample(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + u64 ts = 0; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + if (!queue) + return -EINVAL; + + ptq = queue->priv; + if (!ptq) + return 0; + + ptq->stop = false; + ptq->time = sample->time; + intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample); + intel_pt_run_decoder(ptq, &ts); + return 0; +} + static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, @@ -2550,7 +2622,11 @@ static int intel_pt_process_event(struct perf_session *session, } if (pt->timeless_decoding) { - if (event->header.type == PERF_RECORD_EXIT) { + if (pt->sampling_mode) { + if (sample->aux_sample.size) + err = intel_pt_process_timeless_sample(pt, + sample); + } else if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, event->fork.tid, sample->time); @@ -2676,6 +2752,28 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session, return 0; } +static int intel_pt_queue_data(struct perf_session *session, + struct perf_sample *sample, + union perf_event *event, u64 data_offset) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + + if (event) { + return auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, NULL); + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + return auxtrace_queues__add_sample(&pt->queues, session, sample, + data_offset, timestamp); +} + struct intel_pt_synth { struct perf_tool dummy_tool; struct perf_session *session; @@ -3178,7 +3276,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (pt->timeless_decoding && !pt->tc.time_mult) pt->tc.time_mult = 1; pt->have_tsc = intel_pt_have_tsc(pt); - pt->sampling_mode = false; + pt->sampling_mode = intel_pt_sampling_mode(pt); pt->est_tsc = !pt->timeless_decoding; pt->unknown_thread = thread__new(999999999, 999999999); @@ -3205,6 +3303,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->auxtrace.process_event = intel_pt_process_event; pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.queue_data = intel_pt_queue_data; + pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample; pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; @@ -3282,7 +3382,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_setup_pebs_events(pt); - err = auxtrace_queues__process_index(&pt->queues, session); + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) + err = auxtrace_queue_data(session, true, true); + else + err = auxtrace_queues__process_index(&pt->queues, session); if (err) goto err_delete_thread; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 71ee078d30f4b..e2a312c649f07 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1651,6 +1651,12 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; + struct dso_id dso_id = { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }; int ret = 0; if (dump_trace) @@ -1671,10 +1677,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, - event->mmap2.maj, - event->mmap2.min, event->mmap2.ino, - event->mmap2.ino_generation, - event->mmap2.prot, + &dso_id, event->mmap2.prot, event->mmap2.flags, event->mmap2.filename, thread); @@ -1727,9 +1730,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - 0, 0, 0, 0, prot, 0, - event->mmap.filename, - thread); + NULL, prot, 0, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; @@ -2710,9 +2711,14 @@ u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr) return addr_cpumode; } +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id) +{ + return dsos__findnew_id(&machine->dsos, filename, id); +} + struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return dsos__findnew(&machine->dsos, filename); + return machine__findnew_dso_id(machine, filename, NULL); } char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 1016978f575a9..499be204830d9 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -11,6 +11,7 @@ struct addr_location; struct branch_stack; struct dso; +struct dso_id; struct evsel; struct perf_sample; struct symbol; @@ -202,6 +203,7 @@ int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id); struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 67e0f81416cba..744bfbaf35cfc 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -144,8 +144,8 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, u32 prot, u32 flags, char *filename, + u64 pgoff, struct dso_id *id, + u32 prot, u32 flags, char *filename, struct thread *thread) { struct map *map = malloc(sizeof(*map)); @@ -161,11 +161,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, anon = is_anon_memory(filename, flags); vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - - map->maj = d_maj; - map->min = d_min; - map->ino = ino; - map->ino_generation = ino_gen; map->prot = prot; map->flags = flags; nsi = nsinfo__get(thread->nsinfo); @@ -195,7 +190,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, pgoff = 0; dso = machine__findnew_vdso(machine, thread); } else - dso = machine__findnew_dso(machine, filename); + dso = machine__findnew_dso_id(machine, filename, id); if (dso == NULL) goto out_delete; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0a6c45f85cd93..5e8899883231c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -30,9 +30,6 @@ struct map { u32 prot; u64 pgoff; u64 reloc; - u32 maj, min; /* only valid for MMAP2 record */ - u64 ino; /* only valid for MMAP2 record */ - u64 ino_generation;/* only valid for MMAP2 record */ /* ip -> dso rip */ u64 (*map_ip)(struct map *, u64); @@ -108,9 +105,11 @@ struct thread; void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); + +struct dso_id; + struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, u32 prot, u32 flags, + u64 pgoff, struct dso_id *id, u32 prot, u32 flags, char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6bae9d6edc121..ed7c008b9c8bd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -511,6 +511,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, static void tracepoint_error(struct parse_events_error *e, int err, const char *sys, const char *name) { + const char *str; char help[BUFSIZ]; if (!e) @@ -524,18 +525,18 @@ static void tracepoint_error(struct parse_events_error *e, int err, switch (err) { case EACCES: - e->str = strdup("can't access trace events"); + str = "can't access trace events"; break; case ENOENT: - e->str = strdup("unknown tracepoint"); + str = "unknown tracepoint"; break; default: - e->str = strdup("failed to add tracepoint"); + str = "failed to add tracepoint"; break; } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - e->help = strdup(help); + parse_events__handle_error(e, 0, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx, @@ -996,6 +997,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", + [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", }; static bool config_term_shrinked; @@ -1126,6 +1128,15 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: + CHECK_TYPE_VAL(NUM); + if (term->val.num > UINT_MAX) { + parse_events__handle_error(err, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; default: parse_events__handle_error(err, term->err_term, strdup("unknown term"), @@ -1177,6 +1188,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: return config_term_common(attr, term, err); default: if (err) { @@ -1272,11 +1284,47 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0); break; + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: + ADD_CONFIG_TERM(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num); + break; + default: + break; + } + } + return 0; +} + +/* + * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for + * each bit of attr->config that the user has changed. + */ +static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, + struct list_head *head_terms) +{ + struct parse_events_term *term; + u64 bits = 0; + int type; + + list_for_each_entry(term, head_config, list) { + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_USER: + type = perf_pmu__format_type(&pmu->format, term->config); + if (type != PERF_PMU_FORMAT_VALUE_CONFIG) + continue; + bits |= perf_pmu__format_bits(&pmu->format, term->config); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG: + bits = ~(u64)0; + break; default: break; } } -#undef ADD_EVSEL_CONFIG + + if (bits) + ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits); + +#undef ADD_CONFIG_TERM return 0; } @@ -1405,6 +1453,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; + /* + * When using default config, record which bits of attr->config were + * changed by the user. + */ + if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) + return -ENOMEM; + if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { struct perf_evsel_config_term *pos, *tmp; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ff367f248fe82..27596cbd0ba06 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -77,6 +77,7 @@ enum { PARSE_EVENTS__TERM_TYPE_DRV_CFG, PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, + PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7469497cd28e4..7b1c8ee537cf6 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -285,6 +285,7 @@ overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } +aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index d4ad3f04923ac..651203126c71e 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -34,7 +34,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), - bit_name(WEIGHT), bit_name(PHYS_ADDR), + bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), { .name = NULL, } }; #undef bit_name @@ -143,6 +143,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(sample_regs_intr, p_hex); PRINT_ATTRf(aux_watermark, p_unsigned); PRINT_ATTRf(sample_max_stack, p_unsigned); + PRINT_ATTRf(aux_sample_size, p_unsigned); return ret; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index db1e57113f4ba..e8d3489880260 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -931,6 +931,16 @@ __u64 perf_pmu__format_bits(struct list_head *formats, const char *name) return bits; } +int perf_pmu__format_type(struct list_head *formats, const char *name) +{ + struct perf_pmu_format *format = pmu_find_format(formats, name); + + if (!format) + return -1; + + return format->value; +} + /* * Sets value based on the format definition (format parameter) * and unformated value (value parameter). diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 3e8cd31a89ccb..6737e3d5d568c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -26,6 +26,7 @@ struct perf_pmu { __u32 type; bool selectable; bool is_uncore; + bool auxtrace; int max_precise; struct perf_event_attr *default_config; struct perf_cpu_map *cpus; @@ -71,6 +72,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct list_head *head_terms, bool zero, struct parse_events_error *error); __u64 perf_pmu__format_bits(struct list_head *formats, const char *name); +int perf_pmu__format_type(struct list_head *formats, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 38d6cd22779f0..c470c49a804fd 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -812,7 +812,7 @@ static int verify_representive_line(struct probe_finder *pf, const char *fname, if (strcmp(fname, __fname) || lineno == __lineno) return 0; - pr_warning("This line is sharing the addrees with other lines.\n"); + pr_warning("This line is sharing the address with other lines.\n"); if (pf->pev->point.function) { /* Find best match function name and lines */ diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 8579505c29a4d..7def661685032 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -136,6 +136,37 @@ bool perf_can_record_cpu_wide(void) return true; } +/* + * Architectures are expected to know if AUX area sampling is supported by the + * hardware. Here we check for kernel support. + */ +bool perf_can_aux_sample(void) +{ + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .exclude_kernel = 1, + /* + * Non-zero value causes the kernel to calculate the effective + * attribute size up to that byte. + */ + .aux_sample_size = 1, + }; + int fd; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + /* + * If the kernel attribute is big enough to contain aux_sample_size + * then we assume that it is supported. We are relying on the kernel to + * validate the attribute size before anything else that could be wrong. + */ + if (fd < 0 && errno == E2BIG) + return false; + if (fd >= 0) + close(fd); + + return true; +} + void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain) { diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 948bbcf9aef3f..5421fd2ad3831 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -32,6 +32,7 @@ struct record_opts { bool full_auxtrace; bool auxtrace_snapshot_mode; bool auxtrace_snapshot_on_exit; + bool auxtrace_sample_mode; bool record_namespaces; bool record_switch_events; bool all_kernel; @@ -56,6 +57,7 @@ struct record_opts { u64 user_interval; size_t auxtrace_snapshot_size; const char *auxtrace_snapshot_opts; + const char *auxtrace_sample_opts; bool sample_transaction; unsigned initial_delay; bool use_clockid; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f07b8ecb91bcd..d0d7d25b23e3b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -752,6 +752,7 @@ do { \ bswap_field_32(sample_stack_user); bswap_field_32(aux_watermark); bswap_field_16(sample_max_stack); + bswap_field_32(aux_sample_size); /* * After read_format are bitfields. Check read_format because @@ -1495,8 +1496,13 @@ static int perf_session__deliver_event(struct perf_session *session, if (ret > 0) return 0; - return machines__deliver_event(&session->machines, session->evlist, - event, &sample, tool, file_offset); + ret = machines__deliver_event(&session->machines, session->evlist, + event, &sample, tool, file_offset); + + if (dump_trace && sample.aux_sample.size) + auxtrace__dump_auxtrace_sample(session, &sample); + + return ret; } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1653,6 +1659,34 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, return 0; } +int perf_session__peek_events(struct perf_session *session, u64 offset, + u64 size, peek_events_cb_t cb, void *data) +{ + u64 max_offset = offset + size; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *event; + int err; + + do { + err = perf_session__peek_event(session, offset, buf, + PERF_SAMPLE_MAX_SIZE, &event, + NULL); + if (err) + return err; + + err = cb(session, event, offset, data); + if (err) + return err; + + offset += event->header.size; + if (event->header.type == PERF_RECORD_AUXTRACE) + offset += event->auxtrace.size; + + } while (offset < max_offset); + + return err; +} + static s64 perf_session__process_event(struct perf_session *session, union perf_event *event, u64 file_offset) { @@ -1958,8 +1992,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) } static union perf_event * -fetch_mmaped_event(struct perf_session *session, - u64 head, size_t mmap_size, char *buf) +prefetch_event(char *buf, u64 head, size_t mmap_size, + bool needs_swap, union perf_event *error) { union perf_event *event; @@ -1971,20 +2005,32 @@ fetch_mmaped_event(struct perf_session *session, return NULL; event = (union perf_event *)(buf + head); + if (needs_swap) + perf_event_header__bswap(&event->header); - if (session->header.needs_swap) + if (head + event->header.size <= mmap_size) + return event; + + /* We're not fetching the event so swap back again */ + if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size > mmap_size) { - /* We're not fetching the event so swap back again */ - if (session->header.needs_swap) - perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx: fuzzed perf.data?\n", - __func__, head, event->header.size, mmap_size); - return ERR_PTR(-EINVAL); - } + pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); - return event; + return error; +} + +static union perf_event * +fetch_mmaped_event(u64 head, size_t mmap_size, char *buf, bool needs_swap) +{ + return prefetch_event(buf, head, mmap_size, needs_swap, ERR_PTR(-EINVAL)); +} + +static union perf_event * +fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap) +{ + return prefetch_event(buf, head, mmap_size, needs_swap, NULL); } static int __perf_session__process_decomp_events(struct perf_session *session) @@ -1997,10 +2043,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session) return 0; while (decomp->head < decomp->size && !session_done()) { - union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data); - - if (IS_ERR(event)) - return PTR_ERR(event); + union perf_event *event = fetch_decomp_event(decomp->head, decomp->size, decomp->data, + session->header.needs_swap); if (!event) break; @@ -2100,7 +2144,7 @@ reader__process_events(struct reader *rd, struct perf_session *session, } more: - event = fetch_mmaped_event(session, head, mmap_size, buf); + event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 8456e1d868fda..f76480166d38e 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -64,6 +64,11 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, void *buf, size_t buf_sz, union perf_event **event_ptr, struct perf_sample *sample); +typedef int (*peek_events_cb_t)(struct perf_session *session, + union perf_event *event, u64 offset, + void *data); +int perf_session__peek_events(struct perf_session *session, u64 offset, + u64 size, peek_events_cb_t cb, void *data); int perf_session__process_events(struct perf_session *session); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6b626e6b111ed..345b5ccc90f68 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1194,6 +1194,7 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { u64 l, r; struct map *l_map, *r_map; + int rc; if (!left->mem_info) return -1; if (!right->mem_info) return 1; @@ -1212,18 +1213,9 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (!l_map) return -1; if (!r_map) return 1; - if (l_map->maj > r_map->maj) return -1; - if (l_map->maj < r_map->maj) return 1; - - if (l_map->min > r_map->min) return -1; - if (l_map->min < r_map->min) return 1; - - if (l_map->ino > r_map->ino) return -1; - if (l_map->ino < r_map->ino) return 1; - - if (l_map->ino_generation > r_map->ino_generation) return -1; - if (l_map->ino_generation < r_map->ino_generation) return 1; - + rc = dso__cmp_id(l_map->dso, r_map->dso); + if (rc) + return rc; /* * Addresses with no major/minor numbers are assumed to be * anonymous in userspace. Sort those on pid then address. @@ -1234,8 +1226,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && (!(l_map->flags & MAP_SHARED)) && - !l_map->maj && !l_map->min && !l_map->ino && - !l_map->ino_generation) { + !l_map->dso->id.maj && !l_map->dso->id.min && + !l_map->dso->id.ino && !l_map->dso->id.ino_generation) { /* userspace anonymous */ if (left->thread->pid_ > right->thread->pid_) return -1; @@ -1271,8 +1263,8 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map->prot & PROT_EXEC) && (map->flags & MAP_SHARED) && - (map->maj || map->min || map->ino || - map->ino_generation)) + (map->dso->id.maj || map->dso->id.min || + map->dso->id.ino || map->dso->id.ino_generation)) level = 's'; else if (!map) level = 'X'; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cfa3c9f671414..48c3f8b9c8528 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1228,6 +1228,11 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_PHYS_ADDR) result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { + result += sizeof(u64); + result += sample->aux_sample.size; + } + return result; } @@ -1396,6 +1401,13 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_AUX) { + sz = sample->aux_sample.size; + *array++ = sz; + memcpy(array, sample->aux_sample.data, sz); + array = (void *)array + sz; + } + return 0; }