Skip to content

Commit

Permalink
perf session: Keep file mmaped instead of malloc/memcpy
Browse files Browse the repository at this point in the history
Profiling perf with perf revealed that a large part of the processing time is
spent in malloc/memcpy/free in the sample ordering code. That code copies the
data from the mmap into malloc'ed memory. That's silly. We can keep the mmap
and just store the pointer in the queuing data structure. For 64 bit this is
not a problem as we map the whole file anyway. On 32bit we keep 8 maps around
and unmap the oldest before mmaping the next chunk of the file.

Performance gain: 2.95s -> 1.23s (Faktor 2.4)

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20101130163820.278787719@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Thomas Gleixner authored and Arnaldo Carvalho de Melo committed Nov 30, 2010
1 parent 55b4462 commit fe17420
Showing 1 changed file with 11 additions and 16 deletions.
27 changes: 11 additions & 16 deletions tools/perf/util/session.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,6 @@ static void flush_sample_queue(struct perf_session *s,

os->last_flush = iter->timestamp;
list_del(&iter->list);
free(iter->event);
free(iter);
}

Expand Down Expand Up @@ -531,7 +530,6 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
u64 timestamp = data->time;
struct sample_queue *new;


if (timestamp < s->ordered_samples.last_flush) {
printf("Warning: Timestamp below last timeslice flush\n");
return -EINVAL;
Expand All @@ -542,14 +540,7 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
return -ENOMEM;

new->timestamp = timestamp;

new->event = malloc(event->header.size);
if (!new->event) {
free(new);
return -ENOMEM;
}

memcpy(new->event, event, event->header.size);
new->event = event;

__queue_sample_event(new, s);

Expand Down Expand Up @@ -747,12 +738,12 @@ int __perf_session__process_events(struct perf_session *session,
u64 file_size, struct perf_event_ops *ops)
{
u64 head, page_offset, file_offset, file_pos, progress_next;
int err, mmap_prot, mmap_flags;
int err, mmap_prot, mmap_flags, map_idx = 0;
struct ui_progress *progress;
size_t page_size, mmap_size;
char *buf, *mmaps[8];
event_t *event;
uint32_t size;
char *buf;

perf_event_ops__fill_defaults(ops);

Expand All @@ -774,6 +765,8 @@ int __perf_session__process_events(struct perf_session *session,
if (mmap_size > file_size)
mmap_size = file_size;

memset(mmaps, 0, sizeof(mmaps));

mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;

Expand All @@ -789,6 +782,8 @@ int __perf_session__process_events(struct perf_session *session,
err = -errno;
goto out_err;
}
mmaps[map_idx] = buf;
map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
file_pos = file_offset + head;

more:
Expand All @@ -801,10 +796,10 @@ int __perf_session__process_events(struct perf_session *session,
size = 8;

if (head + event->header.size >= mmap_size) {
int munmap_ret;

munmap_ret = munmap(buf, mmap_size);
assert(munmap_ret == 0);
if (mmaps[map_idx]) {
munmap(mmaps[map_idx], mmap_size);
mmaps[map_idx] = NULL;
}

page_offset = page_size * (head / page_size);
file_offset += page_offset;
Expand Down

0 comments on commit fe17420

Please sign in to comment.