Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-4.13-20170718' of git://git.kernel.org…
Browse files Browse the repository at this point in the history
…/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Initial support for namespaces, using setns to access files in
  namespaces, grabbing their build-ids, etc. We still need to work
  more to deal with namespaces that vanish before we can get the
  needed data to do analysis, but this should be as good as what is
  in bcc now (Krister Johansen)

- Add header record types to pipe-mode, now this command:

  $ perf record -o - -e cycles sleep 1 | perf report --stdio --header

  Will show the same as in non-pipe mode, i.e. involving a perf.data
  file (David Carrillo-Cisneros)

- Implement a visual marker for fused x86 instructions in the annotate
  TUI browser, available now in 'perf report', more work needed to have
  it available as well in 'perf top' (Jin Yao)

  Further explanation from one of Jin's patches:

       │   ┌──cmpl   $0x0,argp_program_version_hook
 81.93 │   ├──je     20
       │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
       │   │↓ jne    29
       │   │↓ jmp    43
 11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

  That means the cmpl+je is a fused instruction pair and they should be
  considered together.

- Record the branch type and then show statistics and info about
  in callchain entries (Jin Yao)

  Example from one of Jin's patches:

  # perf record -g -j any,save_type
  # perf report --branch-history --stdio --no-children

  38.50%  div.c:45                [.] main                    div
          |
          ---main div.c:42 (RET CROSS_2M cycles:2)
             compute_flag div.c:28 (cycles:2)
             compute_flag div.c:27 (RET CROSS_2M cycles:1)
             rand rand.c:28 (cycles:1)
             rand rand.c:28 (RET CROSS_2M cycles:1)
             __random random.c:298 (cycles:1)
             __random random.c:297 (COND_BWD CROSS_2M cycles:1)
             __random random.c:295 (cycles:1)
             __random random.c:295 (COND_BWD CROSS_2M cycles:1)
             __random random.c:295 (cycles:1)
             __random random.c:295 (RET CROSS_2M cycles:9)

- Beautify the fcntl syscall, which is an interesting one in the sense
  that infrastructure had to be put in place to change the formatters of
  some arguments according to the value in a previous one, i.e. cmd
  dictates how arg and the syscall return will be formatted.
  (Arnaldo Carvalho de Melo

Infrastructure changes:

- 'perf test attr' fixes (Jiri Olsa)

Vendor events changes:

- Add POWER9 PMU events Sukadev (Bhattiprolu)

- Support additional POWER8+ PVR in PMU mapfile (Shriya)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Jul 20, 2017
2 parents 3bda69c + b851dd4 commit 510457e
Show file tree
Hide file tree
Showing 126 changed files with 6,339 additions and 1,031 deletions.
52 changes: 51 additions & 1 deletion arch/x86/events/intel/lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ enum {
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
X86_BR_IND_JMP = 1 << 17,/* indirect jump */

X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */

};

#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
Expand Down Expand Up @@ -510,6 +513,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].in_tx = 0;
cpuc->lbr_entries[i].abort = 0;
cpuc->lbr_entries[i].cycles = 0;
cpuc->lbr_entries[i].type = 0;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
Expand Down Expand Up @@ -596,6 +600,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[out].in_tx = in_tx;
cpuc->lbr_entries[out].abort = abort;
cpuc->lbr_entries[out].cycles = cycles;
cpuc->lbr_entries[out].type = 0;
cpuc->lbr_entries[out].reserved = 0;
out++;
}
Expand Down Expand Up @@ -673,6 +678,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)

if (br_type & PERF_SAMPLE_BRANCH_CALL)
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;

if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
mask |= X86_BR_TYPE_SAVE;

/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
Expand Down Expand Up @@ -926,6 +935,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
return ret;
}

#define X86_BR_TYPE_MAP_MAX 16

static int branch_map[X86_BR_TYPE_MAP_MAX] = {
PERF_BR_CALL, /* X86_BR_CALL */
PERF_BR_RET, /* X86_BR_RET */
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
PERF_BR_SYSRET, /* X86_BR_SYSRET */
PERF_BR_UNKNOWN, /* X86_BR_INT */
PERF_BR_UNKNOWN, /* X86_BR_IRET */
PERF_BR_COND, /* X86_BR_JCC */
PERF_BR_UNCOND, /* X86_BR_JMP */
PERF_BR_UNKNOWN, /* X86_BR_IRQ */
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
PERF_BR_IND, /* X86_BR_IND_JMP */
};

static int
common_branch_type(int type)
{
int i;

type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */

if (type) {
i = __ffs(type);
if (i < X86_BR_TYPE_MAP_MAX)
return branch_map[i];
}

return PERF_BR_UNKNOWN;
}

/*
* implement actual branch filter based on user demand.
* Hardware may not exactly satisfy that request, thus
Expand All @@ -942,7 +988,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
bool compress = false;

/* if sampling all branches, then nothing to filter */
if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
return;

for (i = 0; i < cpuc->lbr_stack.nr; i++) {
Expand All @@ -963,6 +1010,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].from = 0;
compress = true;
}

if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
cpuc->lbr_entries[i].type = common_branch_type(type);
}

if (!compress)
Expand Down
27 changes: 26 additions & 1 deletion include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */

PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */

PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};

Expand All @@ -198,9 +200,30 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,

PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,

PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};

/*
* Common flow change classification
*/
enum {
PERF_BR_UNKNOWN = 0, /* unknown */
PERF_BR_COND = 1, /* conditional */
PERF_BR_UNCOND = 2, /* unconditional */
PERF_BR_IND = 3, /* indirect */
PERF_BR_CALL = 4, /* function call */
PERF_BR_IND_CALL = 5, /* indirect function call */
PERF_BR_RET = 6, /* function return */
PERF_BR_SYSCALL = 7, /* syscall */
PERF_BR_SYSRET = 8, /* syscall return */
PERF_BR_COND_CALL = 9, /* conditional function call */
PERF_BR_COND_RET = 10, /* conditional function return */
PERF_BR_MAX,
};

#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
Expand Down Expand Up @@ -1015,6 +1038,7 @@ union perf_mem_data_src {
* in_tx: running in a hardware transaction
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
* type: branch type
*/
struct perf_branch_entry {
__u64 from;
Expand All @@ -1024,7 +1048,8 @@ struct perf_branch_entry {
in_tx:1, /* in transaction */
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
reserved:44;
type:4, /* branch type */
reserved:40;
};

#endif /* _UAPI_LINUX_PERF_EVENT_H */
3 changes: 3 additions & 0 deletions tools/arch/x86/include/asm/unistd_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
#ifndef __NR_getcpu
# define __NR_getcpu 318
#endif
#ifndef __NR_setns
# define __NR_setns 346
#endif
3 changes: 3 additions & 0 deletions tools/arch/x86/include/asm/unistd_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
#ifndef __NR_getcpu
# define __NR_getcpu 309
#endif
#ifndef __NR_setns
#define __NR_setns 308
#endif
17 changes: 17 additions & 0 deletions tools/arch/x86/include/uapi/asm/unistd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef _UAPI_ASM_X86_UNISTD_H
#define _UAPI_ASM_X86_UNISTD_H

/* x32 syscall flag bit */
#define __X32_SYSCALL_BIT 0x40000000

#ifndef __KERNEL__
# ifdef __i386__
# include <asm/unistd_32.h>
# elif defined(__ILP32__)
# include <asm/unistd_x32.h>
# else
# include <asm/unistd_64.h>
# endif
#endif

#endif /* _UAPI_ASM_X86_UNISTD_H */
3 changes: 2 additions & 1 deletion tools/build/Makefile.feature
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ FEATURE_TESTS_BASIC := \
get_cpuid \
bpf \
sched_getcpu \
sdt
sdt \
setns

# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
Expand Down
6 changes: 5 additions & 1 deletion tools/build/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ FILES= \
test-sdt.bin \
test-cxx.bin \
test-jvmti.bin \
test-sched_getcpu.bin
test-sched_getcpu.bin \
test-setns.bin

FILES := $(addprefix $(OUTPUT),$(FILES))

Expand Down Expand Up @@ -95,6 +96,9 @@ $(OUTPUT)test-glibc.bin:
$(OUTPUT)test-sched_getcpu.bin:
$(BUILD)

$(OUTPUT)test-setns.bin:
$(BUILD)

DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
Expand Down
5 changes: 5 additions & 0 deletions tools/build/feature/test-all.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@
# include "test-sdt.c"
#undef main

#define main main_test_setns
# include "test-setns.c"
#undef main

int main(int argc, char *argv[])
{
main_test_libpython();
Expand Down Expand Up @@ -188,6 +192,7 @@ int main(int argc, char *argv[])
main_test_libcrypto();
main_test_sched_getcpu();
main_test_sdt();
main_test_setns();

return 0;
}
7 changes: 7 additions & 0 deletions tools/build/feature/test-setns.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#define _GNU_SOURCE
#include <sched.h>

int main(void)
{
return setns(0, 0);
}
Loading

0 comments on commit 510457e

Please sign in to comment.