Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-5.5-20191122' of git://git.kernel.org/…
Browse files Browse the repository at this point in the history
…pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf report:

  Jin Yao:

  - Allow entering the annotation view (symbol source/assembly +
    overhead/cycles/etc column) from the 'perf report --total-cycles'
    interface.

    E.g.:

      # perf record --all-cpus --branch-any --all-kernel
      ^C[ perf record: Woken up 5 times to write data ]
      #
      # perf evlist -v
      cycles: size: 120, { sample_period, sample_freq }: 4000,
      sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK,
      read_format: ID, disabled: 1, inherit: 1, exclude_user: 1, mmap: 1, comm: 1, freq: 1, task: 1,
      precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1,
      bpf_event: 1, branch_sample_type: ANY
      #
      # perf report --total-cycles
      #
      # Samples: 78762 of event 'cycles'
      Sampled  Sampled Avg      Avg
      Cycles%  Cycles  Cycles%  Cycles                           [Program Block Range]     Shared Object
        1.72%    95.8K   0.00%     254                        [msr.h:105 -> msr.h:166]  [kernel.vmlinux]
        1.56%   107.6K   0.00%     618                [compiler.h:199 -> common.c:301]  [kernel.vmlinux]
        0.83%    46.3K   0.00%     409              [entry_64.S:153 -> entry_64.S:175]  [kernel.vmlinux]
        0.83%    46.1K   0.00%      83                  [jump_label.h:41 -> tsc.c:230]  [kernel.vmlinux]
        0.64%    36.9K   0.01%    1.4K            [hda_intel.c:904 -> hda_intel.c:916]   [snd_hda_intel]
        0.57%    30.2K   0.00%     282                      [file.c:710 -> file.c:730]  [kernel.vmlinux]
        0.48%    25.8K   0.00%      82              [spinlock.c:158 -> spinlock.c:160]  [kernel.vmlinux]
        0.45%    23.7K   0.00%     369  [tick-broadcast.c:585 -> tick-broadcast.c:586]  [kernel.vmlinux]
        0.44%    24.4K   0.00%      73                       [msr.h:236 -> tsc.c:1088]  [kernel.vmlinux]
        0.43%    22.7K   0.00%     144                [cpuidle.c:229 -> cpuidle.c:232]  [kernel.vmlinux]

    Then press 'A' or Enter on one of those lines, just like with 'perf top', say
    the top one: [msr.h:105 -> msr.h:166], then this shows up:

      Samples: 78K of event 'cycles', 4000 Hz, Event count (approx.): 78762
      native_write_msr  /lib/modules/5.4.0-rc8/build/vmlinux [Percent: local period]
      Percent│ IPC Cycle (Average IPC: 0.02, IPC Coverage: 50.0%)
             │
             │             Disassembly of section .text:
             │
             │             ffffffff8106c480 <native_write_msr>:
             │             __wrmsr():
             │             return EAX_EDX_VAL(val, low, high);
             │             }
             │
             │             static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high)
             │             {
             │             asm volatile("1: wrmsr\n"
       49.16 │0.02           mov   %edi,%ecx
             │0.02           mov   %esi,%eax
             │0.02           wrmsr
             │             arch_static_branch():
             │             #include <linux/stringify.h>
             │             #include <linux/types.h>
             │
             │             static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
             │             {
             │             asm_volatile_goto("1:"
        0.79 │0.02           nop
             │             native_write_msr():
             │             {
             │             __wrmsr(msr, low, high);
             │
             │             if (msr_tracepoint_active(__tracepoint_write_msr))
             │             do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
             │             }
       50.05 │0.02  254    ← retq
             │             do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
             │               shl   $0x20,%rdx
             │               mov   %esi,%esi
             │               or    %rdx,%rsi
             │               xor   %edx,%edx
             │             → jmpq  do_trace_write_msr

    We need to improve this to show the source code line numbers in the
    annotation view, so one can go from that program block to the annotation view
    and see those source code line numbers straight away.

auxtrace/Intel PT:

  Adrian Hunter:

  - Add support for AUX area sampling, requires new functionality that
    will land in 5.5, its already in tip.

    This includes kernel capability querying so that it fails gracefully
    with older kernels, duimping aux area samples in 'perf report -D' and
    'perf script'.

perf.data:

  Alexey Budankov:

  - Fix decompression of PERF_RECORD_COMPRESSED records.

core:

  Arnaldo Carvalho de Melo:

  - Use the 'dcacheline' cmp routine to find the right DSOs taking into
    account the 'maj', 'min', 'ino' and 'ino_generation', that got moved
    from 'struct map' to 'struct dso', where it belongs.

    This further reduces the size of 'struct map', there is still more
    work to do to maybe get it to max one cacheline.

libtraceevent:

  Hewenliang:

  - Fix memory leakage in copy_filter_type().

  Sudip Mukherjee:

  - Fix header installation.

perf parse:

  Ian Rogers :

  - Fix potential memory leak when handling tracepoint errors, found using
    LLVM's libFuzzer.

perf probe:

  Colin Ian King:

  - Fix spelling mistake "addrees" -> "address".

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Nov 23, 2019
2 parents 8f6ee51 + 4584f08 commit 8cacac6
Show file tree
Hide file tree
Showing 46 changed files with 1,190 additions and 200 deletions.
10 changes: 8 additions & 2 deletions tools/include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,

PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */

__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
Expand Down Expand Up @@ -300,6 +301,7 @@ enum perf_event_read_format {
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */

/*
* Hardware event_id to monitor via a performance monitoring event:
Expand Down Expand Up @@ -424,7 +426,9 @@ struct perf_event_attr {
*/
__u32 aux_watermark;
__u16 sample_max_stack;
__u16 __reserved_2; /* align to __u64 */
__u16 __reserved_2;
__u32 aux_sample_size;
__u32 __reserved_3;
};

/*
Expand Down Expand Up @@ -864,6 +868,8 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* { u64 size;
* char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
Expand Down
8 changes: 4 additions & 4 deletions tools/lib/traceevent/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@ install_pkgconfig:

install_headers:
$(call QUIET_INSTALL, headers) \
$(call do_install,event-parse.h,$(DESTDIR)$(includedir_SQ),644); \
$(call do_install,event-utils.h,$(DESTDIR)$(includedir_SQ),644); \
$(call do_install,trace-seq.h,$(DESTDIR)$(includedir_SQ),644); \
$(call do_install,kbuffer.h,$(DESTDIR)$(includedir_SQ),644)
$(call do_install,event-parse.h,$(includedir_SQ),644); \
$(call do_install,event-utils.h,$(includedir_SQ),644); \
$(call do_install,trace-seq.h,$(includedir_SQ),644); \
$(call do_install,kbuffer.h,$(includedir_SQ),644)

install: install_lib

Expand Down
9 changes: 7 additions & 2 deletions tools/lib/traceevent/parse-filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -1473,8 +1473,10 @@ static int copy_filter_type(struct tep_event_filter *filter,
if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
/* Add trivial event */
arg = allocate_arg();
if (arg == NULL)
if (arg == NULL) {
free(str);
return -1;
}

arg->type = TEP_FILTER_ARG_BOOLEAN;
if (strcmp(str, "TRUE") == 0)
Expand All @@ -1483,8 +1485,11 @@ static int copy_filter_type(struct tep_event_filter *filter,
arg->boolean.value = 0;

filter_type = add_filter_type(filter, event->id);
if (filter_type == NULL)
if (filter_type == NULL) {
free(str);
free_arg(arg);
return -1;
}

filter_type->filter = arg;

Expand Down
59 changes: 57 additions & 2 deletions tools/perf/Documentation/intel-pt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,56 @@ pwr_evt Enable power events. The power events provide information about
"0" otherwise.


AUX area sampling option
------------------------

To select Intel PT "sampling" the AUX area sampling option can be used:

--aux-sample

Optionally it can be followed by the sample size in bytes e.g.

--aux-sample=8192

In addition, the Intel PT event to sample must be defined e.g.

-e intel_pt//u

Samples on other events will be created containing Intel PT data e.g. the
following will create Intel PT samples on the branch-misses event, note the
events must be grouped using {}:

perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'

An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
events. In this case, the grouping is implied e.g.

perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u

is the same as:

perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'

but allows for also using an address filter e.g.:

perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls

It is important to select a sample size that is big enough to contain at least
one PSB packet. If not a warning will be displayed:

Intel PT sample size (%zu) may be too small for PSB period (%zu)

The calculation used for that is: if sample_size <= psb_period + 256 display the
warning. When sampling is used, psb_period defaults to 0 (2KiB).

The default sample size is 4KiB.

The sample size is passed in aux_sample_size in struct perf_event_attr. The
sample size is limited by the maximum event size which is 64KiB. It is
difficult to know how big the event might be without the trace sample attached,
but the tool validates that the sample size is not greater than 60KiB.


new snapshot option
-------------------

Expand Down Expand Up @@ -487,8 +537,8 @@ their mlock limit (which defaults to 64KiB but is not multiplied by the number
of cpus).

In full-trace mode, powers of two are allowed for buffer size, with a minimum
size of 2 pages. In snapshot mode, it is the same but the minimum size is
1 page.
size of 2 pages. In snapshot mode or sampling mode, it is the same but the
minimum size is 1 page.

The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.

Expand All @@ -501,12 +551,17 @@ Intel PT modes of operation

Intel PT can be used in 2 modes:
full-trace mode
sample mode
snapshot mode

Full-trace mode traces continuously e.g.

perf record -e intel_pt//u uname

Sample mode attaches a Intel PT sample to other events e.g.

perf record --aux-sample -e intel_pt//u -e branch-misses:u

Snapshot mode captures the available data when a signal is sent e.g.

perf record -v -e intel_pt//u -S ./loopy 1000000000 &
Expand Down
9 changes: 9 additions & 0 deletions tools/perf/Documentation/perf-record.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ OPTIONS
like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
- 'aux-output': Generate AUX records instead of events. This requires
that an AUX area event is also provided.
- 'aux-sample-size': Set sample size for AUX area sampling. If the
'--aux-sample' option has been used, set aux-sample-size=0 to disable
AUX area sampling for the event.

See the linkperf:perf-list[1] man page for more parameters.

Expand Down Expand Up @@ -433,6 +436,12 @@ can be specified in a string that follows this option:
In Snapshot Mode trace data is captured only when signal SIGUSR2 is received
and on exit if the above 'e' option is given.

--aux-sample[=OPTIONS]::
Select AUX area sampling. At least one of the events selected by the -e option
must be an AUX area event. Samples on other events will be created containing
data from the AUX area. Optionally sample size may be specified, otherwise it
defaults to 4KiB.

--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
because the file may be huge. A time out is needed in such cases.
Expand Down
4 changes: 4 additions & 0 deletions tools/perf/arch/x86/util/auxtrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
bool found_bts = false;

intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
if (intel_pt_pmu)
intel_pt_pmu->auxtrace = true;
intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
if (intel_bts_pmu)
intel_bts_pmu->auxtrace = true;

evlist__for_each_entry(evlist, evsel) {
if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type)
Expand Down
5 changes: 5 additions & 0 deletions tools/perf/arch/x86/util/intel-bts.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
const struct perf_cpu_map *cpus = evlist->core.cpus;
bool privileged = perf_event_paranoid_check(-1);

if (opts->auxtrace_sample_mode) {
pr_err("Intel BTS does not support AUX area sampling\n");
return -EINVAL;
}

btsr->evlist = evlist;
btsr->snapshot_mode = opts->auxtrace_snapshot_mode;

Expand Down
81 changes: 80 additions & 1 deletion tools/perf/arch/x86/util/intel-pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "../../util/event.h"
#include "../../util/evlist.h"
#include "../../util/evsel.h"
#include "../../util/evsel_config.h"
#include "../../util/cpumap.h"
#include "../../util/mmap.h"
#include <subcmd/parse-options.h>
Expand Down Expand Up @@ -551,6 +552,43 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
evsel->core.attr.config);
}

static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
struct evsel *evsel)
{
struct perf_evsel_config_term *term;
u64 user_bits = 0, bits;

term = perf_evsel__get_config_term(evsel, CFG_CHG);
if (term)
user_bits = term->val.cfg_chg;

bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period");

/* Did user change psb_period */
if (bits & user_bits)
return;

/* Set psb_period to 0 */
evsel->core.attr.config &= ~bits;
}

static void intel_pt_min_max_sample_sz(struct evlist *evlist,
size_t *min_sz, size_t *max_sz)
{
struct evsel *evsel;

evlist__for_each_entry(evlist, evsel) {
size_t sz = evsel->core.attr.aux_sample_size;

if (!sz)
continue;
if (min_sz && (sz < *min_sz || !*min_sz))
*min_sz = sz;
if (max_sz && sz > *max_sz)
*max_sz = sz;
}
}

/*
* Currently, there is not enough information to disambiguate different PEBS
* events, so only allow one.
Expand Down Expand Up @@ -606,6 +644,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
return -EINVAL;
}

if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) {
pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n");
return -EINVAL;
}

if (opts->use_clockid) {
pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
return -EINVAL;
Expand All @@ -617,6 +660,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;

if (opts->auxtrace_sample_mode)
intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel);

err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
if (err)
return err;
Expand Down Expand Up @@ -666,6 +712,34 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
opts->auxtrace_snapshot_size, psb_period);
}

/* Set default sizes for sample mode */
if (opts->auxtrace_sample_mode) {
size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
size_t min_sz = 0, max_sz = 0;

intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz);
if (!opts->auxtrace_mmap_pages && !privileged &&
opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
if (!opts->auxtrace_mmap_pages) {
size_t sz = round_up(max_sz, page_size) / page_size;

opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
}
if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) {
pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n",
max_sz,
opts->auxtrace_mmap_pages * (size_t)page_size);
return -EINVAL;
}
pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n",
min_sz, max_sz);
if (psb_period &&
min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR)
ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n",
min_sz, psb_period);
}

/* Set default sizes for full trace mode */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
Expand All @@ -682,7 +756,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz;

if (opts->auxtrace_snapshot_mode)
if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode)
min_sz = KiB(4);
else
min_sz = KiB(8);
Expand Down Expand Up @@ -1136,5 +1210,10 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
ptr->itr.reference = intel_pt_reference;
ptr->itr.read_finish = intel_pt_read_finish;
/*
* Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
* should give at least 1 PSB per sample.
*/
ptr->itr.default_aux_sample_size = 4096;
return &ptr->itr;
}
29 changes: 29 additions & 0 deletions tools/perf/builtin-inject.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ struct perf_inject {
u64 aux_id;
struct list_head samples;
struct itrace_synth_opts itrace_synth_opts;
char event_copy[PERF_SAMPLE_MAX_SIZE];
};

struct event_entry {
Expand Down Expand Up @@ -214,6 +215,28 @@ static int perf_event__drop_aux(struct perf_tool *tool,
return 0;
}

static union perf_event *
perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
union perf_event *event,
struct perf_sample *sample)
{
size_t sz1 = sample->aux_sample.data - (void *)event;
size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
union perf_event *ev = (union perf_event *)inject->event_copy;

if (sz1 > event->header.size || sz2 > event->header.size ||
sz1 + sz2 > event->header.size ||
sz1 < sizeof(struct perf_event_header) + sizeof(u64))
return event;

memcpy(ev, event, sz1);
memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
ev->header.size = sz1 + sz2;
((u64 *)((void *)ev + sz1))[-1] = 0;

return ev;
}

typedef int (*inject_handler)(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
Expand All @@ -226,13 +249,19 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
struct evsel *evsel,
struct machine *machine)
{
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);

if (evsel && evsel->handler) {
inject_handler f = evsel->handler;
return f(tool, event, sample, evsel, machine);
}

build_id__mark_dso_hit(tool, event, sample, evsel, machine);

if (inject->itrace_synth_opts.set && sample->aux_sample.size)
event = perf_inject__cut_auxtrace_sample(inject, event, sample);

return perf_event__repipe_synth(tool, event);
}

Expand Down
Loading

0 comments on commit 8cacac6

Please sign in to comment.