Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-4.20-20181025' of git://git.kernel.org…
Browse files Browse the repository at this point in the history
…/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Introduce 'perf trace --max-events' for stopping 'perf trace' when
  that many syscalls (enter+exit), tracepoints or other events such as
  page faults take place.

  Support that as well on a per-event basis, e.g.:

   perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/

  Will stop when 2 context switches, 4 block plugs, 1 block unplug and
  3 net_dev_queue tracepoints take place. (Arnaldo Carvalho de Melo)

- Poll for monitored tasks being alive in 'perf stat -p/-t', exiting when
  those tasks all terminate (Jiri Olsa)

- Encode -k clockid frequency into perf.data to enable timestamps derived
  metrics conversion into wall clock time on reporting stage. (Alexey Budankov)

- Improve Intel PT call graph from SQL database and GUI python scripts,
  including adopting the Qt MDI interface to allow for multiple subwindows
  for all the tables, helping in better visualizing the data in the SQL
  tables, also uses, when available, the Intel XED disassembler libraries
  to present the Intel PT data as x86 asm mnemonics. This last feature
  is not currently working in some cases, fix is being discussed (Adrian Hunter)

- Implement a ftrace function_graph view in 'perf script' when processing
  hardware trace data such as Intel PT (Andi Kleen)

- Better integration with the Intel XED disassembler, when available, in
  'perf script' (Andi Kleen)

- Some 'perf trace' drop refcount fixes (Arnaldo Carvalho de Melo)

- Add Sparc support to 'perf annotate', jitdump (David Miller)

- Fix PLT symbols entry/header sizes properly on Sparc (David Miller)

- Fix generation of system call table failure with /tmp mounted with 'noexec'
  in arm64 (Hongxu Jia)

- Allow extended console debug output in 'perf script' (Milian Wolff)

- Flush output stream after events in 'perf script' verbose mode (Milian Wolff)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Oct 26, 2018
2 parents 034bda1 + fe57120 commit efe8eaf
Show file tree
Hide file tree
Showing 38 changed files with 2,778 additions and 396 deletions.
19 changes: 19 additions & 0 deletions tools/lib/subcmd/parse-options.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
case OPTION_ULONG:
case OPTION_U64:
default:
break;
Expand Down Expand Up @@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
case OPTION_ULONG:
case OPTION_U64:
default:
break;
Expand Down Expand Up @@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
return opterror(opt, "expects a numerical value", flags);
return 0;

case OPTION_ULONG:
if (unset) {
*(unsigned long *)opt->value = 0;
return 0;
}
if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
*(unsigned long *)opt->value = opt->defval;
return 0;
}
if (get_arg(p, opt, flags, &arg))
return -1;
*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
if (*s)
return opterror(opt, "expects a numerical value", flags);
return 0;

case OPTION_U64:
if (unset) {
*(u64 *)opt->value = 0;
Expand Down Expand Up @@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
case OPTION_ARGUMENT:
break;
case OPTION_LONG:
case OPTION_ULONG:
case OPTION_U64:
case OPTION_INTEGER:
case OPTION_UINTEGER:
Expand Down
2 changes: 2 additions & 0 deletions tools/lib/subcmd/parse-options.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enum parse_opt_type {
OPTION_STRING,
OPTION_INTEGER,
OPTION_LONG,
OPTION_ULONG,
OPTION_CALLBACK,
OPTION_U64,
OPTION_UINTEGER,
Expand Down Expand Up @@ -133,6 +134,7 @@ struct option {
#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
Expand Down
19 changes: 19 additions & 0 deletions tools/perf/Documentation/build-xed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

For --xed the xed tool is needed. Here is how to install it:

$ git clone https://github.com/intelxed/mbuild.git mbuild
$ git clone https://github.com/intelxed/xed
$ cd xed
$ ./mfile.py --share
$ ./mfile.py examples
$ sudo ./mfile.py --prefix=/usr/local install
$ sudo ldconfig
$ sudo cp obj/examples/xed /usr/local/bin

Basic xed testing:

$ xed | head -3
ERROR: required argument(s) were missing
Copyright (C) 2017, Intel Corporation. All rights reserved.
XED version: [v10.0-328-g7d62c8c49b7b]
$
2 changes: 1 addition & 1 deletion tools/perf/Documentation/intel-pt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ in transaction, respectively.
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a sqlite or postgresql database.
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
and to script call-graph-from-sql.py for an example of using the database.
and to script exported-sql-viewer.py for an example of using the database.

There is also script intel-pt-events.py which provides an example of how to
unpack the raw data for power events and PTWRITE.
Expand Down
7 changes: 4 additions & 3 deletions tools/perf/Documentation/itrace.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
l synthesize last branch entries (use with i or x)
s skip initial number of events

The default is all events i.e. the same as --itrace=ibxwpe
The default is all events i.e. the same as --itrace=ibxwpe,
except for perf script where it is --itrace=ce

In addition, the period (default 100000) for instructions events
can be specified in units of:
In addition, the period (default 100000, except for perf script where it is 1)
for instructions events can be specified in units of:

i instructions
t ticks
Expand Down
18 changes: 18 additions & 0 deletions tools/perf/Documentation/perf-script.txt
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,24 @@ include::itrace.txt[]
will be printed. Each entry has function name and file/line. Enabled by
default, disable with --no-inline.

--insn-trace::
Show instruction stream for intel_pt traces. Combine with --xed to
show disassembly.

--xed::
Run xed disassembler on output. Requires installing the xed disassembler.

--call-trace::
Show call stream for intel_pt traces. The CPUs are interleaved, but
can be filtered with -C.

--call-ret-trace::
Show call and return stream for intel_pt traces.

--graph-function::
For itrace only show specified functions and their callees for
itrace. Multiple functions can be separated by comma.

SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
Expand Down
67 changes: 67 additions & 0 deletions tools/perf/Documentation/perf-trace.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--kernel-syscall-graph::
Show the kernel callchains on the syscall exit path.

--max-events=N::
Stop after processing N events. Note that strace-like events are considered
only at exit time or when a syscall is interrupted, i.e. in those cases this
option is equivalent to the number of lines printed.

--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. Note that at this point
Expand Down Expand Up @@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
As you can see, there was major pagefault in python process, from
CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.

Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):

$ perf trace -e open* --max-events 4
[root@jouet perf]# trace -e open* --max-events 4
2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
$

Trace the first minor page fault when running a workload:

# perf trace -F min --max-stack=7 --max-events 1 sleep 1
0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
__clear_user ([kernel.kallsyms])
load_elf_binary ([kernel.kallsyms])
search_binary_handler ([kernel.kallsyms])
__do_execve_file.isra.33 ([kernel.kallsyms])
__x64_sys_execve ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
#

Trace the next min page page fault to take place on the first CPU:

# perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
js::gc::FreeSpan::initAsEmpty (inlined)
js::gc::Arena::setAsNotAllocated (inlined)
js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
JSThinInlineString::new_<(js::AllowGC)1> (inlined)
AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
[0x18b26e6bc2bd] (/tmp/perf-17136.map)
#

Trace the next two sched:sched_switch events, four block:*_plug events, the
next block:*_unplug and the next three net:*dev_queue events, this last one
with a backtrace of at most 16 entries, system wide:

# perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
__dev_queue_xmit ([kernel.kallsyms])
273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
__dev_queue_xmit ([kernel.kallsyms])
274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
__dev_queue_xmit ([kernel.kallsyms])
2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
#

SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]
2 changes: 1 addition & 1 deletion tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ create_table_from_c()
{
local sc nr last_sc

create_table_exe=`mktemp /tmp/create-table-XXXXXX`
create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`

{

Expand Down
2 changes: 2 additions & 0 deletions tools/perf/arch/sparc/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif

PERF_HAVE_JITDUMP := 1
169 changes: 169 additions & 0 deletions tools/perf/arch/sparc/annotate/instructions.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// SPDX-License-Identifier: GPL-2.0

static int is_branch_cond(const char *cond)
{
if (cond[0] == '\0')
return 1;

if (cond[0] == 'a' && cond[1] == '\0')
return 1;

if (cond[0] == 'c' &&
(cond[1] == 'c' || cond[1] == 's') &&
cond[2] == '\0')
return 1;

if (cond[0] == 'e' &&
(cond[1] == '\0' ||
(cond[1] == 'q' && cond[2] == '\0')))
return 1;

if (cond[0] == 'g' &&
(cond[1] == '\0' ||
(cond[1] == 't' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
return 1;

if (cond[0] == 'l' &&
(cond[1] == '\0' ||
(cond[1] == 't' && cond[2] == '\0') ||
(cond[1] == 'u' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
return 1;

if (cond[0] == 'n' &&
(cond[1] == '\0' ||
(cond[1] == 'e' && cond[2] == '\0') ||
(cond[1] == 'z' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
return 1;

if (cond[0] == 'b' &&
cond[1] == 'p' &&
cond[2] == 'o' &&
cond[3] == 's' &&
cond[4] == '\0')
return 1;

if (cond[0] == 'v' &&
(cond[1] == 'c' || cond[1] == 's') &&
cond[2] == '\0')
return 1;

if (cond[0] == 'b' &&
cond[1] == 'z' &&
cond[2] == '\0')
return 1;

return 0;
}

static int is_branch_reg_cond(const char *cond)
{
if ((cond[0] == 'n' || cond[0] == 'l') &&
cond[1] == 'z' &&
cond[2] == '\0')
return 1;

if (cond[0] == 'z' &&
cond[1] == '\0')
return 1;

if ((cond[0] == 'g' || cond[0] == 'l') &&
cond[1] == 'e' &&
cond[2] == 'z' &&
cond[3] == '\0')
return 1;

if (cond[0] == 'g' &&
cond[1] == 'z' &&
cond[2] == '\0')
return 1;

return 0;
}

static int is_branch_float_cond(const char *cond)
{
if (cond[0] == '\0')
return 1;

if ((cond[0] == 'a' || cond[0] == 'e' ||
cond[0] == 'z' || cond[0] == 'g' ||
cond[0] == 'l' || cond[0] == 'n' ||
cond[0] == 'o' || cond[0] == 'u') &&
cond[1] == '\0')
return 1;

if (((cond[0] == 'g' && cond[1] == 'e') ||
(cond[0] == 'l' && (cond[1] == 'e' ||
cond[1] == 'g')) ||
(cond[0] == 'n' && (cond[1] == 'e' ||
cond[1] == 'z')) ||
(cond[0] == 'u' && (cond[1] == 'e' ||
cond[1] == 'g' ||
cond[1] == 'l'))) &&
cond[2] == '\0')
return 1;

if (cond[0] == 'u' &&
(cond[1] == 'g' || cond[1] == 'l') &&
cond[2] == 'e' &&
cond[3] == '\0')
return 1;

return 0;
}

static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;

if (!strcmp(name, "call") ||
!strcmp(name, "jmp") ||
!strcmp(name, "jmpl")) {
ops = &call_ops;
} else if (!strcmp(name, "ret") ||
!strcmp(name, "retl") ||
!strcmp(name, "return")) {
ops = &ret_ops;
} else if (!strcmp(name, "mov")) {
ops = &mov_ops;
} else {
if (name[0] == 'c' &&
(name[1] == 'w' || name[1] == 'x'))
name += 2;

if (name[0] == 'b') {
const char *cond = name + 1;

if (cond[0] == 'r') {
if (is_branch_reg_cond(cond + 1))
ops = &jump_ops;
} else if (is_branch_cond(cond)) {
ops = &jump_ops;
}
} else if (name[0] == 'f' && name[1] == 'b') {
if (is_branch_float_cond(name + 2))
ops = &jump_ops;
}
}

if (ops)
arch__associate_ins_ops(arch, name, ops);

return ops;
}

static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = sparc__associate_instruction_ops;
arch->objdump.comment_char = '#';
}

return 0;
}
Loading

0 comments on commit efe8eaf

Please sign in to comment.