Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/jolsa/perf into perf/core

Pull perf/core improvements and fixes from Jiri Olsa:

  * Add support to accumulate hist periods (Namhyung Kim)

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Jun 3, 2014
2 parents e450f90 + 0506aec commit 9b26136
Show file tree
Hide file tree
Showing 28 changed files with 1,768 additions and 323 deletions.
7 changes: 6 additions & 1 deletion tools/perf/Documentation/perf-report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ OPTIONS
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).

By default, every sort keys not specified in -F will be appended
Expand Down Expand Up @@ -163,6 +163,11 @@ OPTIONS

Default: fractal,0.5,callee,function.

--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.

--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
Expand Down
8 changes: 7 additions & 1 deletion tools/perf/Documentation/perf-top.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ Default is to monitor all CPUS.
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).

By default, every sort keys not specified in --field will be appended
Expand Down Expand Up @@ -161,6 +161,12 @@ Default is to monitor all CPUS.
Setup and enable call-graph (stack chain/backtrace) recording,
implies -g.

--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled.

--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
Expand Down
1 change: 1 addition & 0 deletions tools/perf/Makefile.perf
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
LIB_OBJS += $(OUTPUT)tests/hists_filter.o
LIB_OBJS += $(OUTPUT)tests/hists_output.o
LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
Expand Down
5 changes: 3 additions & 2 deletions tools/perf/builtin-annotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}

he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
true);
if (he == NULL)
return -ENOMEM;

ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
hists__inc_nr_samples(&evsel->hists, true);
return ret;
}

Expand Down
2 changes: 1 addition & 1 deletion tools/perf/builtin-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
u64 weight, u64 transaction)
{
if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
transaction) != NULL)
transaction, true) != NULL)
return 0;
return -ENOMEM;
}
Expand Down
210 changes: 64 additions & 146 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb)
rep->min_percent = strtof(value, NULL);
return 0;
}
if (!strcmp(var, "report.children")) {
symbol_conf.cumulate_callchain = perf_config_bool(var, value);
return 0;
}

return perf_default_config(var, value, cb);
}
Expand All @@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he)
*/
if (he->stat.nr_events == 1)
rep->nr_entries++;

/*
* Only counts number of samples at this stage as it's more
* natural to do it here and non-sample events are also
* counted in perf_session_deliver_event(). The dump_trace
* requires this info is ready before going to the output tree.
*/
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}

static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
{
struct symbol *parent = NULL;
struct hist_entry *he;
struct mem_info *mi, *mx;
uint64_t cost;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);

if (err)
return err;
int err = 0;
struct report *rep = arg;
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
struct mem_info *mi;
struct branch_info *bi;

mi = sample__resolve_mem(sample, al);
if (!mi)
return -ENOMEM;
report__inc_stats(rep, he);

if (rep->hide_unresolved && !al->sym)
if (!ui__has_annotation())
return 0;

cost = sample->weight;
if (!cost)
cost = 1;

/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
cost, cost, 0);
if (!he)
return -ENOMEM;

if (ui__has_annotation()) {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
if (err)
goto out;

mx = he->mem_info;
err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
if (sort__mode == SORT_MODE__BRANCH) {
bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
if (err)
goto out;
}

report__inc_stats(rep, he);

err = hist_entry__append_callchain(he, sample);
out:
return err;
}

static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
{
struct symbol *parent = NULL;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);

if (err)
return err;

bi = sample__resolve_bstack(sample, al);
if (!bi)
return -ENOMEM;

for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);

err = -ENOMEM;

/* overwrite the 'al' to branch-to info */
al->map = bi[i].to.map;
al->sym = bi[i].to.sym;
al->addr = bi[i].to.addr;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
1, 1, 0);
if (he) {
if (ui__has_annotation()) {
bx = he->branch_info;
err = addr_map_symbol__inc_samples(&bx->from,
evsel->idx);
if (err)
goto out;

err = addr_map_symbol__inc_samples(&bx->to,
evsel->idx);
if (err)
goto out;
}
report__inc_stats(rep, he);
} else
} else if (rep->mem_mode) {
mi = he->mem_info;
err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
if (err)
goto out;
}
err = 0;
out:
free(bi);
return err;
}

static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
struct addr_location *al, struct perf_sample *sample)
{
struct symbol *parent = NULL;
struct hist_entry *he;
int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);

if (err)
return err;

he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
sample->period, sample->weight,
sample->transaction);
if (he == NULL)
return -ENOMEM;

err = hist_entry__append_callchain(he, sample);
if (err)
goto out;

if (ui__has_annotation())
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);

report__inc_stats(rep, he);
} else if (symbol_conf.cumulate_callchain) {
if (single)
err = hist_entry__inc_addr_samples(he, evsel->idx,
al->addr);
} else {
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
}

out:
return err;
}


static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
Expand All @@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
{
struct report *rep = container_of(tool, struct report, tool);
struct addr_location al;
struct hist_entry_iter iter = {
.hide_unresolved = rep->hide_unresolved,
.add_entry_cb = hist_iter__report_callback,
};
int ret;

if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
Expand All @@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;

if (sort__mode == SORT_MODE__BRANCH) {
ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
if (ret < 0)
pr_debug("problem adding lbr entry, skipping event\n");
} else if (rep->mem_mode == 1) {
ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
if (ret < 0)
pr_debug("problem adding mem entry, skipping event\n");
} else {
if (al.map != NULL)
al.map->dso->hit = 1;
if (sort__mode == SORT_MODE__BRANCH)
iter.ops = &hist_iter_branch;
else if (rep->mem_mode)
iter.ops = &hist_iter_mem;
else if (symbol_conf.cumulate_callchain)
iter.ops = &hist_iter_cumulative;
else
iter.ops = &hist_iter_normal;

if (al.map != NULL)
al.map->dso->hit = 1;

ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");

ret = report__add_hist_entry(rep, evsel, &al, sample);
if (ret < 0)
pr_debug("problem incrementing symbol period, skipping event\n");
}
return ret;
}

Expand Down Expand Up @@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
}
}

if (symbol_conf.cumulate_callchain) {
/* Silently ignore if callchain is missing */
if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
}

if (sort__mode == SORT_MODE__BRANCH) {
if (!is_pipe &&
!(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
Expand Down Expand Up @@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
Expand Down Expand Up @@ -804,15 +719,18 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);

if (branch_mode == -1 && has_br_stack)
if (branch_mode == -1 && has_br_stack) {
sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}

if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
pr_err("branch and mem mode incompatible\n");
goto error;
}
sort__mode = SORT_MODE__MEMORY;
symbol_conf.cumulate_callchain = false;
}

if (setup_sorting() < 0) {
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/builtin-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
int err = 0;

evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
hists__inc_nr_samples(&evsel->hists, true);

if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
Expand Down
Loading

0 comments on commit 9b26136

Please sign in to comment.