Skip to content

Commit

Permalink
perf report: Add --max-stack option to limit callchain stack scan
Browse files Browse the repository at this point in the history
When callgraph data was included in the perf data file, it may take a
long time to scan all those data and merge them together especially if
the stored callchains are long and the perf data file itself is large,
like a Gbyte or so.

The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127).
This is a large value. Usually the callgraph data that developers are
most interested in are the first few levels, the rests are usually not
looked at.

This patch adds a new --max-stack option to perf-report to limit the
depth of callchain stack data to look at to reduce the time it takes for
perf-report to finish its processing. It trades the presence of trailing
stack information with faster speed.

The following table shows the elapsed time of doing perf-report on a
perf.data file of size 985,531,828 bytes.

  --max_stack   Elapsed Time    Output data size
  -----------   ------------    ----------------
  not set        88.0s          124,422,651
  64             87.5s          116,303,213
  32             87.2s          112,023,804
  16             86.6s           94,326,380
  8              59.9s           33,697,248
  4              40.7s           10,116,637
  -g none        27.1s            2,555,810

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Waiman Long authored and Arnaldo Carvalho de Melo committed Oct 21, 2013
1 parent cc9784b commit 91e9561
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 13 deletions.
8 changes: 8 additions & 0 deletions tools/perf/Documentation/perf-report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ OPTIONS

Default: fractal,0.5,callee,function.

--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
between information loss and faster processing especially for
workloads that can have a very long callchain stack.

Default: 127

-G::
--inverted::
alias for inverted caller based call graph.
Expand Down
22 changes: 17 additions & 5 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct perf_report {
bool show_threads;
bool inverted_callchain;
bool mem_mode;
int max_stack;
struct perf_read_values show_threads_values;
const char *pretty_printing_style;
const char *cpu_list;
Expand Down Expand Up @@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
if ((sort__has_parent || symbol_conf.use_callchain) &&
sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample, &parent, al);
sample, &parent, al,
rep->max_stack);
if (err)
return err;
}
Expand Down Expand Up @@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
if ((sort__has_parent || symbol_conf.use_callchain)
&& sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample, &parent, al);
sample, &parent, al,
rep->max_stack);
if (err)
return err;
}
Expand Down Expand Up @@ -244,18 +247,21 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
return err;
}

static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
static int perf_evsel__add_hist_entry(struct perf_tool *tool,
struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample,
struct machine *machine)
{
struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct symbol *parent = NULL;
int err = 0;
struct hist_entry *he;

if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample, &parent, al);
sample, &parent, al,
rep->max_stack);
if (err)
return err;
}
Expand Down Expand Up @@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;

ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine);
ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample,
machine);
if (ret < 0)
pr_debug("problem incrementing symbol period, skipping event\n");
}
Expand Down Expand Up @@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.ordered_samples = true,
.ordering_requires_timestamps = true,
},
.max_stack = PERF_MAX_STACK_DEPTH,
.pretty_printing_style = "normal",
};
const struct option options[] = {
Expand Down Expand Up @@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
"alias for inverted call graph"),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
Expand Down
3 changes: 2 additions & 1 deletion tools/perf/builtin-top.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
sample->callchain) {
err = machine__resolve_callchain(machine, evsel,
al.thread, sample,
&parent, &al);
&parent, &al,
PERF_MAX_STACK_DEPTH);
if (err)
return;
}
Expand Down
14 changes: 9 additions & 5 deletions tools/perf/util/machine.c
Original file line number Diff line number Diff line change
Expand Up @@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine,
struct thread *thread,
struct ip_callchain *chain,
struct symbol **parent,
struct addr_location *root_al)
struct addr_location *root_al,
int max_stack)
{
u8 cpumode = PERF_RECORD_MISC_USER;
unsigned int i;
int chain_nr = min(max_stack, (int)chain->nr);
int i;
int err;

callchain_cursor_reset(&callchain_cursor);
Expand All @@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine,
return 0;
}

for (i = 0; i < chain->nr; i++) {
for (i = 0; i < chain_nr; i++) {
u64 ip;
struct addr_location al;

Expand Down Expand Up @@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine,
struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al)
struct addr_location *root_al,
int max_stack)
{
int ret;

ret = machine__resolve_callchain_sample(machine, thread,
sample->callchain, parent, root_al);
sample->callchain, parent,
root_al, max_stack);
if (ret)
return ret;

Expand Down
3 changes: 2 additions & 1 deletion tools/perf/util/machine.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine,
struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al);
struct addr_location *root_al,
int max_stack);

/*
* Default guest kernel is defined by parameter --guestkallsyms
Expand Down
3 changes: 2 additions & 1 deletion tools/perf/util/session.c
Original file line number Diff line number Diff line change
Expand Up @@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
if (symbol_conf.use_callchain && sample->callchain) {

if (machine__resolve_callchain(machine, evsel, al.thread,
sample, NULL, NULL) != 0) {
sample, NULL, NULL,
PERF_MAX_STACK_DEPTH) != 0) {
if (verbose)
error("Failed to resolve callchain. Skipping\n");
return;
Expand Down

0 comments on commit 91e9561

Please sign in to comment.