Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/acme/linux into perf/core

perf/core improvements and fixes:

 * Improve 'perf bench' docs, by Namhyung Kim

 * Fix build when O= is not used, from David Ahern

 * Fix cross compilation build, from Namhyung Kim

 * Fix pipe mode when callchains are used, from David Ahern

 * Follow .gnu_debuglink section to find separate symbols, from Pierre-Loup A. Griffais

 * Fix 'perf test' raw events entries, from Jiri Olsa

 * Use the events description in the perf.data file, not the sysfs ones.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Jun 29, 2012
2 parents 357398e + d9873ab commit add7946
Show file tree
Hide file tree
Showing 25 changed files with 471 additions and 201 deletions.
78 changes: 75 additions & 3 deletions tools/perf/Documentation/perf-bench.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ SYNOPSIS

DESCRIPTION
-----------
This 'perf bench' command is general framework for benchmark suites.
This 'perf bench' command is a general framework for benchmark suites.

COMMON OPTIONS
--------------
Expand Down Expand Up @@ -45,14 +45,20 @@ SUBSYSTEM
'sched'::
Scheduler and IPC mechanisms.

'mem'::
Memory access performance.

'all'::
All benchmark subsystems.

SUITES FOR 'sched'
~~~~~~~~~~~~~~~~~~
*messaging*::
Suite for evaluating performance of scheduler and IPC mechanisms.
Based on hackbench by Rusty Russell.

Options of *pipe*
^^^^^^^^^^^^^^^^^
Options of *messaging*
^^^^^^^^^^^^^^^^^^^^^^
-p::
--pipe::
Use pipe() instead of socketpair()
Expand Down Expand Up @@ -115,6 +121,72 @@ Example of *pipe*
59004 ops/sec
---------------------

SUITES FOR 'mem'
~~~~~~~~~~~~~~~~
*memcpy*::
Suite for evaluating performance of simple memory copy in various ways.

Options of *memcpy*
^^^^^^^^^^^^^^^^^^^
-l::
--length::
Specify length of memory to copy (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).

-r::
--routine::
Specify routine to copy (default: default).
Available routines are depend on the architecture.
On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.

-i::
--iterations::
Repeat memcpy invocation this number of times.

-c::
--clock::
Use perf's cpu-cycles event instead of gettimeofday syscall.

-o::
--only-prefault::
Show only the result with page faults before memcpy.

-n::
--no-prefault::
Show only the result without page faults before memcpy.

*memset*::
Suite for evaluating performance of simple memory set in various ways.

Options of *memset*
^^^^^^^^^^^^^^^^^^^
-l::
--length::
Specify length of memory to set (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).

-r::
--routine::
Specify routine to set (default: default).
Available routines are depend on the architecture.
On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.

-i::
--iterations::
Repeat memset invocation this number of times.

-c::
--clock::
Use perf's cpu-cycles event instead of gettimeofday syscall.

-o::
--only-prefault::
Show only the result with page faults before memset.

-n::
--no-prefault::
Show only the result without page faults before memset.

SEE ALSO
--------
linkperf:perf[1]
2 changes: 1 addition & 1 deletion tools/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ endif

### --- END CONFIGURATION SECTION ---

BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)/util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
BASIC_LDFLAGS =

# Guard against environment variables
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/bench/mem-memcpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ static bool no_prefault;
static const struct option options[] = {
OPT_STRING('l', "length", &length_str, "1MB",
"Specify length of memory to copy. "
"available unit: B, MB, GB (upper and lower)"),
"Available units: B, KB, MB, GB and TB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
"Specify routine to copy"),
OPT_INTEGER('i', "iterations", &iterations,
"repeat memcpy() invocation this number of times"),
OPT_BOOLEAN('c', "clock", &use_clock,
"Use CPU clock for measuring"),
"Use cycles event instead of gettimeofday() for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
"Show only the result with page faults before memcpy()"),
OPT_BOOLEAN('n', "no-prefault", &no_prefault,
Expand Down
8 changes: 4 additions & 4 deletions tools/perf/bench/mem-memset.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ static bool no_prefault;

static const struct option options[] = {
OPT_STRING('l', "length", &length_str, "1MB",
"Specify length of memory to copy. "
"available unit: B, MB, GB (upper and lower)"),
"Specify length of memory to set. "
"Available units: B, KB, MB, GB and TB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
"Specify routine to copy"),
"Specify routine to set"),
OPT_INTEGER('i', "iterations", &iterations,
"repeat memset() invocation this number of times"),
OPT_BOOLEAN('c', "clock", &use_clock,
"Use CPU clock for measuring"),
"Use cycles event instead of gettimeofday() for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
"Show only the result with page faults before memset()"),
OPT_BOOLEAN('n', "no-prefault", &no_prefault,
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/builtin-bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct bench_suite {
};
\
/* sentinel: easy for help */
#define suite_all { "all", "test all suite (pseudo suite)", NULL }
#define suite_all { "all", "Test all benchmark suites", NULL }

static struct bench_suite sched_suites[] = {
{ "messaging",
Expand Down Expand Up @@ -75,7 +75,7 @@ static struct bench_subsys subsystems[] = {
"memory access performance",
mem_suites },
{ "all", /* sentinel: easy for help */
"test all subsystem (pseudo subsystem)",
"all benchmark subsystem",
NULL },
{ NULL,
NULL,
Expand Down
37 changes: 25 additions & 12 deletions tools/perf/builtin-kmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ static unsigned long nr_allocs, nr_cross_allocs;

#define PATH_SYS_NODE "/sys/devices/system/node"

struct perf_kmem {
struct perf_tool tool;
struct perf_session *session;
};

static void init_cpunode_map(void)
{
FILE *fp;
Expand Down Expand Up @@ -278,14 +283,16 @@ static void process_free_event(void *data,
s_alloc->alloc_cpu = -1;
}

static void process_raw_event(union perf_event *raw_event __used, void *data,
static void process_raw_event(struct perf_tool *tool,
union perf_event *raw_event __used, void *data,
int cpu, u64 timestamp, struct thread *thread)
{
struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool);
struct event_format *event;
int type;

type = trace_parse_common_type(data);
event = trace_find_event(type);
type = trace_parse_common_type(kmem->session->pevent, data);
event = pevent_find_event(kmem->session->pevent, type);

if (!strcmp(event->name, "kmalloc") ||
!strcmp(event->name, "kmem_cache_alloc")) {
Expand All @@ -306,7 +313,7 @@ static void process_raw_event(union perf_event *raw_event __used, void *data,
}
}

static int process_sample_event(struct perf_tool *tool __used,
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
Expand All @@ -322,16 +329,18 @@ static int process_sample_event(struct perf_tool *tool __used,

dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);

process_raw_event(event, sample->raw_data, sample->cpu,
process_raw_event(tool, event, sample->raw_data, sample->cpu,
sample->time, thread);

return 0;
}

static struct perf_tool perf_kmem = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
static struct perf_kmem perf_kmem = {
.tool = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
},
};

static double fragmentation(unsigned long n_req, unsigned long n_alloc)
Expand Down Expand Up @@ -486,19 +495,23 @@ static void sort_result(void)
static int __cmd_kmem(void)
{
int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
0, false, &perf_kmem);
struct perf_session *session;

session = perf_session__new(input_name, O_RDONLY, 0, false,
&perf_kmem.tool);
if (session == NULL)
return -ENOMEM;

perf_kmem.session = session;

if (perf_session__create_kernel_maps(session) < 0)
goto out_delete;

if (!perf_session__has_traces(session, "kmem record"))
goto out_delete;

setup_pager();
err = perf_session__process_events(session, &perf_kmem);
err = perf_session__process_events(session, &perf_kmem.tool);
if (err != 0)
goto out_delete;
sort_result();
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/builtin-lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -724,8 +724,8 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
struct event_format *event;
int type;

type = trace_parse_common_type(data);
event = trace_find_event(type);
type = trace_parse_common_type(session->pevent, data);
event = pevent_find_event(session->pevent, type);

if (!strcmp(event->name, "lock_acquire"))
process_lock_acquire_event(data, event, cpu, timestamp, thread);
Expand Down
6 changes: 4 additions & 2 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,11 +245,12 @@ static int process_read_event(struct perf_tool *tool,
return 0;
}

/* For pipe mode, sample_type is not currently set */
static int perf_report__setup_sample_type(struct perf_report *rep)
{
struct perf_session *self = rep->session;

if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (!self->fd_pipe && !(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
ui__error("Selected --sort parent, but no "
"callchain data. Did you call "
Expand All @@ -272,7 +273,8 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
}

if (sort__branch_mode == 1) {
if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
if (!self->fd_pipe &&
!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
ui__error("Selected -b but no branch data. "
"Did you call perf record without -b?\n");
return -1;
Expand Down
36 changes: 25 additions & 11 deletions tools/perf/builtin-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ static u64 sleep_measurement_overhead;

static unsigned long nr_tasks;

struct perf_sched {
struct perf_tool tool;
struct perf_session *session;
};

struct sched_atom;

struct task_desc {
Expand Down Expand Up @@ -1597,6 +1602,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
struct perf_evsel *evsel,
struct machine *machine)
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
struct pevent *pevent = sched->session->pevent;
struct thread *thread = machine__findnew_thread(machine, sample->pid);

if (thread == NULL) {
Expand All @@ -1612,20 +1619,23 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
tracepoint_handler f = evsel->handler.func;

if (evsel->handler.data == NULL)
evsel->handler.data = trace_find_event(evsel->attr.config);
evsel->handler.data = pevent_find_event(pevent,
evsel->attr.config);

f(tool, evsel->handler.data, sample, machine, thread);
}

return 0;
}

static struct perf_tool perf_sched = {
.sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm,
.lost = perf_event__process_lost,
.fork = perf_event__process_task,
.ordered_samples = true,
static struct perf_sched perf_sched = {
.tool = {
.sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm,
.lost = perf_event__process_lost,
.fork = perf_event__process_task,
.ordered_samples = true,
},
};

static void read_events(bool destroy, struct perf_session **psession)
Expand All @@ -1640,16 +1650,20 @@ static void read_events(bool destroy, struct perf_session **psession)
{ "sched:sched_process_exit", process_sched_exit_event, },
{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
};
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
0, false, &perf_sched);
struct perf_session *session;

session = perf_session__new(input_name, O_RDONLY, 0, false,
&perf_sched.tool);
if (session == NULL)
die("No Memory");

err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers);
perf_sched.session = session;

err = perf_session__set_tracepoints_handlers(session, handlers);
assert(err == 0);

if (perf_session__has_traces(session, "record -R")) {
err = perf_session__process_events(session, &perf_sched);
err = perf_session__process_events(session, &perf_sched.tool);
if (err)
die("Failed to process events, error %d", err);

Expand Down
Loading

0 comments on commit add7946

Please sign in to comment.