Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

. Check for flex and bison before continuing building, from Borislav Petkov.

. Make event_copy local to mmaps, fixing buffer wrap around problems, from
  David Ahern.

. Add option for runtime switching perf data file in perf report, just press
  's' and a menu with the valid files found in the current directory will be
  presented, from Feng Tang.

. Add support to display whole group data for raw columns, from Jiri Olsa.

. Fix SIGALRM and pipe read race for the rwtop perl script. from Jiri Olsa.

. Fix perf_evsel::exclude_GH handling and add a test to catch regressions, from
  Jiri Olsa.

. Error checking fixes, from Namhyung Kim.

. Fix calloc argument ordering, from Paul Gortmaker.

. Fix set event list leader, from Stephane Eranian.

. Add per processor socket count aggregation in perf stat, from Stephane Eranian.

. Fix perf python binding breakage.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Feb 6, 2013
2 parents 0fbdad0 + 88fd2b6 commit 661e591
Show file tree
Hide file tree
Showing 27 changed files with 611 additions and 128 deletions.
9 changes: 8 additions & 1 deletion tools/perf/Documentation/perf-stat.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m

-I msecs::
--interval-print msecs::
print count deltas every N milliseconds (minimum: 100ms)
Print count deltas every N milliseconds (minimum: 100ms)
example: perf stat -I 1000 -e cycles -a sleep 5

--aggr-socket::
Aggregate counts per processor socket for system-wide mode measurements. This
is a useful mode to detect imbalance between sockets. To enable this mode,
use --aggr-socket in addition to -a. (system-wide). The output includes the
socket number and the number of online processors on that socket. This is
useful to gauge the amount of aggregation.

EXAMPLES
--------

Expand Down
13 changes: 10 additions & 3 deletions tools/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ RM = rm -f
MKDIR = mkdir
FIND = find
INSTALL = install
FLEX = flex
BISON= bison

# sparse is architecture-neutral, which means that we need to tell it
# explicitly what architecture to check for. Fix this up for yours..
Expand All @@ -158,6 +160,14 @@ ifneq ($(MAKECMDGOALS),clean)
ifneq ($(MAKECMDGOALS),tags)
-include config/feature-tests.mak

ifeq ($(call get-executable,$(FLEX)),)
dummy := $(error Error: $(FLEX) is missing on this system, please install it)
endif

ifeq ($(call get-executable,$(BISON)),)
dummy := $(error Error: $(BISON) is missing on this system, please install it)
endif

ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
CFLAGS := $(CFLAGS) -fstack-protector-all
endif
Expand Down Expand Up @@ -282,9 +292,6 @@ endif

export PERL_PATH

FLEX = flex
BISON= bison

$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c

Expand Down
3 changes: 2 additions & 1 deletion tools/perf/builtin-annotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
if (symbol__init() < 0)
return -1;

setup_sorting(annotate_usage, options);
if (setup_sorting() < 0)
usage_with_options(annotate_usage, options);

if (argc) {
/*
Expand Down
4 changes: 3 additions & 1 deletion tools/perf/builtin-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)

ui_init();

setup_sorting(diff_usage, options);
if (setup_sorting() < 0)
usage_with_options(diff_usage, options);

setup_pager();

sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/builtin-evlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
OPT_BOOLEAN('v', "verbose", &details.verbose,
"Show all event attr details"),
OPT_BOOLEAN('g', "group", &symbol_conf.event_group,
OPT_BOOLEAN('g', "group", &details.event_group,
"Show event group information"),
OPT_END()
};
Expand All @@ -52,7 +52,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(evlist_usage, options);

if (symbol_conf.event_group && (details.verbose || details.freq)) {
if (details.event_group && (details.verbose || details.freq)) {
pr_err("--group option is not compatible with other options\n");
usage_with_options(evlist_usage, options);
}
Expand Down
41 changes: 29 additions & 12 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -468,9 +468,17 @@ static int __cmd_report(struct perf_report *rep)

if (use_browser > 0) {
if (use_browser == 1) {
perf_evlist__tui_browse_hists(session->evlist, help,
NULL,
&session->header.env);
ret = perf_evlist__tui_browse_hists(session->evlist,
help,
NULL,
&session->header.env);
/*
* Usually "ret" is the last pressed key, and we only
* care if the key notifies us to switch data file.
*/
if (ret != K_SWITCH_INPUT_DATA)
ret = 0;

} else if (use_browser == 2) {
perf_evlist__gtk_browse_hists(session->evlist, help,
NULL);
Expand Down Expand Up @@ -708,6 +716,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
else
input_name = "perf.data";
}

if (strcmp(input_name, "-") != 0)
setup_browser(true);
else {
use_browser = 0;
perf_hpp__column_enable(PERF_HPP__OVERHEAD);
perf_hpp__init();
}

repeat:
session = perf_session__new(input_name, O_RDONLY,
report.force, false, &report.tool);
if (session == NULL)
Expand All @@ -733,15 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)

}

if (strcmp(input_name, "-") != 0)
setup_browser(true);
else {
use_browser = 0;
perf_hpp__column_enable(PERF_HPP__OVERHEAD);
perf_hpp__init();
}

setup_sorting(report_usage, options);
if (setup_sorting() < 0)
usage_with_options(report_usage, options);

/*
* Only in the newt browser we are doing integrated annotation,
Expand Down Expand Up @@ -809,6 +820,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
}

ret = __cmd_report(&report);
if (ret == K_SWITCH_INPUT_DATA) {
perf_session__delete(session);
goto repeat;
} else
ret = 0;

error:
perf_session__delete(session);
return ret;
Expand Down
126 changes: 115 additions & 11 deletions tools/perf/builtin-stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr_socket(char *prefix);

static struct perf_evlist *evsel_list;

Expand All @@ -79,6 +80,7 @@ static int run_count = 1;
static bool no_inherit = false;
static bool scale = true;
static bool no_aggr = false;
static bool aggr_socket = false;
static pid_t child_pid = -1;
static bool null_run = false;
static int detailed_run = 0;
Expand All @@ -93,6 +95,7 @@ static const char *post_cmd = NULL;
static bool sync_run = false;
static unsigned int interval = 0;
static struct timespec ref_time;
static struct cpu_map *sock_map;

static volatile int done = 0;

Expand Down Expand Up @@ -312,7 +315,9 @@ static void print_interval(void)
sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);

if (num_print_interval == 0 && !csv_output) {
if (no_aggr)
if (aggr_socket)
fprintf(output, "# time socket cpus counts events\n");
else if (no_aggr)
fprintf(output, "# time CPU counts events\n");
else
fprintf(output, "# time counts events\n");
Expand All @@ -321,7 +326,9 @@ static void print_interval(void)
if (++num_print_interval == 25)
num_print_interval = 0;

if (no_aggr) {
if (aggr_socket)
print_aggr_socket(prefix);
else if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, prefix);
} else {
Expand Down Expand Up @@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
ts.tv_nsec = 0;
}

if (aggr_socket
&& cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
perror("cannot build socket map");
return -1;
}

if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
return -1;
Expand Down Expand Up @@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
}

static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
double msecs = avg / 1e6;
char cpustr[16] = { '\0', };
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";

if (no_aggr)
if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5,
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
Expand Down Expand Up @@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
fprintf(output, " of all LL-cache hits ");
}

static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
double total, ratio = 0.0;
char cpustr[16] = { '\0', };
Expand All @@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
else
fmt = "%s%18.0f%s%-25s";

if (no_aggr)
if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5,
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
Expand Down Expand Up @@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
}
}

static void print_aggr_socket(char *prefix)
{
struct perf_evsel *counter;
u64 ena, run, val;
int cpu, s, s2, sock, nr;

if (!sock_map)
return;

for (s = 0; s < sock_map->nr; s++) {
sock = cpu_map__socket(sock_map, s);
list_for_each_entry(counter, &evsel_list->entries, node) {
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
if (s2 != sock)
continue;
val += counter->counts->cpu[cpu].val;
ena += counter->counts->cpu[cpu].ena;
run += counter->counts->cpu[cpu].run;
nr++;
}
if (prefix)
fprintf(output, "%s", prefix);

if (run == 0 || ena == 0) {
fprintf(output, "S%*d%s%*d%s%*s%s%*s",
csv_output ? 0 : -5,
s,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep,
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep,
csv_output ? 0 : -24,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);

fputc('\n', output);
continue;
}

if (nsec_counter(counter))
nsec_printout(sock, nr, counter, val);
else
abs_printout(sock, nr, counter, val);

if (!csv_output) {
print_noise(counter, 1.0);

if (run != ena)
fprintf(output, " (%.2f%%)",
100.0 * run / ena);
}
fputc('\n', output);
}
}
}

/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
Expand Down Expand Up @@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
}

if (nsec_counter(counter))
nsec_printout(-1, counter, avg);
nsec_printout(-1, 0, counter, avg);
else
abs_printout(-1, counter, avg);
abs_printout(-1, 0, counter, avg);

print_noise(counter, avg);

Expand Down Expand Up @@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
}

if (nsec_counter(counter))
nsec_printout(cpu, counter, val);
nsec_printout(cpu, 0, counter, val);
else
abs_printout(cpu, counter, val);
abs_printout(cpu, 0, counter, val);

if (!csv_output) {
print_noise(counter, 1.0);
Expand Down Expand Up @@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv)
fprintf(output, ":\n\n");
}

if (no_aggr) {
if (aggr_socket)
print_aggr_socket(NULL);
else if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, NULL);
} else {
Expand Down Expand Up @@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &interval,
"print counts at regular interval in ms (>= 100)"),
OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
OPT_END()
};
const char * const stat_usage[] = {
Expand Down Expand Up @@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
usage_with_options(stat_usage, options);
}

if (aggr_socket) {
if (!perf_target__has_cpu(&target)) {
fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
usage_with_options(stat_usage, options);
}
no_aggr = true;
}

if (add_default_attributes())
goto out;

Expand Down
Loading

0 comments on commit 661e591

Please sign in to comment.