Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-2' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/acme/linux into perf/core

Pull perf/core improvements from Arnaldo Carvalho de Melo:

User visible changes:

  - Support handling complete branch stacks as histograms (Andi Kleen)

Infrastructure changes:

  - Prep work for supporting per-pkg and snapshot counters in 'perf stat' (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Dec 8, 2014
2 parents e460bfd + 09a6a1b commit cfa0bd5
Show file tree
Hide file tree
Showing 9 changed files with 257 additions and 89 deletions.
12 changes: 11 additions & 1 deletion tools/perf/Documentation/perf-report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.

-g [type,min[,limit],order[,key]]::
-g [type,min[,limit],order[,key][,branch]]::
--call-graph::
Display call chains using type, min percent threshold, optional print
limit and order.
Expand All @@ -177,6 +177,11 @@ OPTIONS
- function: compare on functions
- address: compare on individual code addresses

branch can be:
- branch: include last branch information in callgraph
when available. Usually more convenient to use --branch-history
for this.

Default: fractal,0.5,callee,function.

--children::
Expand Down Expand Up @@ -266,6 +271,11 @@ OPTIONS
branch stacks and it will automatically switch to the branch view mode,
unless --no-branch-stack is used.

--branch-history::
Add the addresses of sampled taken branches to the callstack.
This allows to examine the path the program took to each sample.
The data collection must have used -b (or -j) and -g.

--objdump=<path>::
Path to objdump binary.

Expand Down
31 changes: 25 additions & 6 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ static int report__setup_sample_type(struct report *rep)
return -EINVAL;
}
if (symbol_conf.use_callchain) {
ui__error("Selected -g but no callchain data. Did "
"you call 'perf record' without -g?\n");
ui__error("Selected -g or --branch-history but no "
"callchain data. Did\n"
"you call 'perf record' without -g?\n");
return -1;
}
} else if (!rep->dont_use_callchains &&
Expand Down Expand Up @@ -575,6 +576,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct stat st;
bool has_br_stack = false;
int branch_mode = -1;
bool branch_call_mode = false;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
"perf report [<options>]",
Expand Down Expand Up @@ -637,8 +639,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"regex filter to identify parent, see: '--sort parent'"),
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
Expand Down Expand Up @@ -684,7 +686,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
"Show event group information together"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
"use branch records for histogram filling", parse_branch_mode),
"use branch records for per branch histogram filling",
parse_branch_mode),
OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
"add last branch records to call history"),
OPT_STRING(0, "objdump", &objdump_path, "path",
"objdump binary to use for disassembly and annotations"),
OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
Expand Down Expand Up @@ -745,10 +750,24 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);

if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) {
/*
* Branch mode is a tristate:
* -1 means default, so decide based on the file having branch data.
* 0/1 means the user chose a mode.
*/
if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
branch_call_mode == -1) {
sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}
if (branch_call_mode) {
callchain_param.key = CCKEY_ADDRESS;
callchain_param.branch_callstack = 1;
symbol_conf.use_callchain = true;
callchain_register_param(&callchain_param);
if (sort_order == NULL)
sort_order = "srcline,symbol,dso";
}

if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
Expand Down
105 changes: 96 additions & 9 deletions tools/perf/builtin-stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,20 +388,102 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_itlb_cache_stats[0], count[0]);
}

static void zero_per_pkg(struct perf_evsel *counter)
{
if (counter->per_pkg_mask)
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}

static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
{
unsigned long *mask = counter->per_pkg_mask;
struct cpu_map *cpus = perf_evsel__cpus(counter);
int s;

*skip = false;

if (!counter->per_pkg)
return 0;

if (cpu_map__empty(cpus))
return 0;

if (!mask) {
mask = zalloc(MAX_NR_CPUS);
if (!mask)
return -ENOMEM;

counter->per_pkg_mask = mask;
}

s = cpu_map__get_socket(cpus, cpu);
if (s < 0)
return -1;

*skip = test_and_set_bit(s, mask) == 1;
return 0;
}

static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;

if (check_per_pkg(evsel, cpu, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}

if (skip)
count = &zero;

switch (aggr_mode) {
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
if (!evsel->snapshot)
perf_evsel__compute_deltas(evsel, cpu, count);
perf_counts_values__scale(count, scale, NULL);
evsel->counts->cpu[cpu] = *count;
update_shadow_stats(evsel, count->values);
break;
case AGGR_GLOBAL:
aggr->val += count->val;
if (scale) {
aggr->ena += count->ena;
aggr->run += count->run;
}
default:
break;
}

return 0;
}

static int read_counter(struct perf_evsel *counter);

/*
* Read out the results of a single counter:
* aggregate counts across CPUs in system-wide mode
*/
static int read_counter_aggr(struct perf_evsel *counter)
{
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i;

if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
thread_map__nr(evsel_list->threads), scale) < 0)
aggr->val = aggr->ena = aggr->run = 0;

if (read_counter(counter))
return -1;

if (!counter->snapshot)
perf_evsel__compute_deltas(counter, -1, aggr);
perf_counts_values__scale(aggr, scale, &counter->counts->scaled);

for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);

Expand All @@ -424,16 +506,21 @@ static int read_counter_aggr(struct perf_evsel *counter)
*/
static int read_counter(struct perf_evsel *counter)
{
u64 *count;
int cpu;
int nthreads = thread_map__nr(evsel_list->threads);
int ncpus = perf_evsel__nr_cpus(counter);
int cpu, thread;

for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
return -1;
if (counter->system_wide)
nthreads = 1;

count = counter->counts->cpu[cpu].values;
if (counter->per_pkg)
zero_per_pkg(counter);

update_shadow_stats(counter, count);
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
if (perf_evsel__read_cb(counter, cpu, thread, read_cb))
return -1;
}
}

return 0;
Expand Down
4 changes: 4 additions & 0 deletions tools/perf/util/callchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value)
callchain_param.key = CCKEY_ADDRESS;
return 0;
}
if (!strncmp(value, "branch", strlen(value))) {
callchain_param.branch_callstack = 1;
return 0;
}
return -1;
}

Expand Down
1 change: 1 addition & 0 deletions tools/perf/util/callchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ struct callchain_param {
sort_chain_func_t sort;
enum chain_order order;
enum chain_key key;
bool branch_callstack;
};

extern struct callchain_param callchain_param;
Expand Down
34 changes: 0 additions & 34 deletions tools/perf/util/evsel.c
Original file line number Diff line number Diff line change
Expand Up @@ -954,40 +954,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
return 0;
}

int __perf_evsel__read(struct perf_evsel *evsel,
int ncpus, int nthreads, bool scale)
{
size_t nv = scale ? 3 : 1;
int cpu, thread;
struct perf_counts_values *aggr = &evsel->counts->aggr, count;

if (evsel->system_wide)
nthreads = 1;

aggr->val = aggr->ena = aggr->run = 0;

for (cpu = 0; cpu < ncpus; cpu++) {
for (thread = 0; thread < nthreads; thread++) {
if (FD(evsel, cpu, thread) < 0)
continue;

if (readn(FD(evsel, cpu, thread),
&count, nv * sizeof(u64)) < 0)
return -errno;

aggr->val += count.val;
if (scale) {
aggr->ena += count.ena;
aggr->run += count.run;
}
}
}

perf_evsel__compute_deltas(evsel, -1, aggr);
perf_counts_values__scale(aggr, scale, &evsel->counts->scaled);
return 0;
}

static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
{
struct perf_evsel *leader = evsel->leader;
Expand Down
30 changes: 1 addition & 29 deletions tools/perf/util/evsel.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct perf_evsel {
bool system_wide;
bool tracking;
bool per_pkg;
unsigned long *per_pkg_mask;
/* parse modifier helper */
int exclude_GH;
int nr_members;
Expand Down Expand Up @@ -271,35 +272,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
}

int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
bool scale);

/**
* perf_evsel__read - Read the aggregate results on all CPUs
*
* @evsel - event selector to read value
* @ncpus - Number of cpus affected, from zero
* @nthreads - Number of threads affected, from zero
*/
static inline int perf_evsel__read(struct perf_evsel *evsel,
int ncpus, int nthreads)
{
return __perf_evsel__read(evsel, ncpus, nthreads, false);
}

/**
* perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
*
* @evsel - event selector to read value
* @ncpus - Number of cpus affected, from zero
* @nthreads - Number of threads affected, from zero
*/
static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
int ncpus, int nthreads)
{
return __perf_evsel__read(evsel, ncpus, nthreads, true);
}

int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *sample);

Expand Down
Loading

0 comments on commit cfa0bd5

Please sign in to comment.