Skip to content

Commit

Permalink
perf sample: Make user_regs and intr_regs optional
Browse files Browse the repository at this point in the history
The struct dump_regs contains 512 bytes of cache_regs, meaning the two
values in perf_sample contribute 1088 bytes of its total 1384 bytes
size. Initializing this much memory has a cost reported by Tavian
Barnes <tavianator@tavianator.com> as about 2.5% when running `perf
script --itrace=i0`:
https://lore.kernel.org/lkml/d841b97b3ad2ca8bcab07e4293375fb7c32dfce7.1736618095.git.tavianator@tavianator.com/

Adrian Hunter <adrian.hunter@intel.com> replied that the zero
initialization was necessary and couldn't simply be removed.

This patch aims to strike a middle ground of still zeroing the
perf_sample, but removing 79% of its size by make user_regs and
intr_regs optional pointers to zalloc-ed memory. To support the
allocation accessors are created for user_regs and intr_regs. To
support correct cleanup perf_sample__init and perf_sample__exit
functions are created and added throughout the code base.

Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250113194345.1537821-1-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
  • Loading branch information
Ian Rogers authored and Namhyung Kim committed Feb 13, 2025
1 parent 08d9e88 commit dc6d2bc
Show file tree
Hide file tree
Showing 34 changed files with 450 additions and 193 deletions.
2 changes: 1 addition & 1 deletion tools/perf/arch/x86/tests/dwarf-unwind.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static int sample_ustack(struct perf_sample *sample,
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread)
{
struct regs_dump *regs = &sample->user_regs;
struct regs_dump *regs = perf_sample__user_regs(sample);
u64 *buf;

buf = malloc(sizeof(u64) * PERF_REGS_MAX);
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/arch/x86/util/unwind-libdw.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
Dwarf_Word dwarf_regs[17];
unsigned nregs;

Expand Down
4 changes: 3 additions & 1 deletion tools/perf/builtin-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -1917,9 +1917,10 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
u16 misc_flag)
{
struct perf_sample_id *sid;
struct perf_sample sample = {};
struct perf_sample sample;
int id_hdr_size;

perf_sample__init(&sample, /*all=*/true);
lost->lost = lost_count;
if (evsel->core.ids) {
sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
Expand All @@ -1931,6 +1932,7 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
lost->header.size = sizeof(*lost) + id_hdr_size;
lost->header.misc = misc_flag;
record__write(rec, NULL, lost, lost->header.size);
perf_sample__exit(&sample);
}

static void record__read_lost_samples(struct record *rec)
Expand Down
10 changes: 8 additions & 2 deletions tools/perf/builtin-script.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,14 +783,20 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
if (!sample->intr_regs)
return 0;

return perf_sample__fprintf_regs(perf_sample__intr_regs(sample),
attr->sample_regs_intr, arch, fp);
}

static int perf_sample__fprintf_uregs(struct perf_sample *sample,
struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
if (!sample->user_regs)
return 0;

return perf_sample__fprintf_regs(perf_sample__user_regs(sample),
attr->sample_regs_user, arch, fp);
}

Expand Down
8 changes: 6 additions & 2 deletions tools/perf/builtin-top.c
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,7 @@ static int deliver_event(struct ordered_events *qe,
return 0;
}

perf_sample__init(&sample, /*all=*/false);
ret = evlist__parse_sample(evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
Expand All @@ -1167,8 +1168,10 @@ static int deliver_event(struct ordered_events *qe,
assert(evsel != NULL);

if (event->header.type == PERF_RECORD_SAMPLE) {
if (evswitch__discard(&top->evswitch, evsel))
return 0;
if (evswitch__discard(&top->evswitch, evsel)) {
ret = 0;
goto next_event;
}
++top->samples;
}

Expand Down Expand Up @@ -1219,6 +1222,7 @@ static int deliver_event(struct ordered_events *qe,

ret = 0;
next_event:
perf_sample__exit(&sample);
return ret;
}

Expand Down
5 changes: 4 additions & 1 deletion tools/perf/builtin-trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -4066,13 +4066,16 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event)
{
struct evlist *evlist = trace->evlist;
struct perf_sample sample;
int err = evlist__parse_sample(evlist, event, &sample);
int err;

perf_sample__init(&sample, /*all=*/false);
err = evlist__parse_sample(evlist, event, &sample);
if (err)
fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
else
trace__handle_event(trace, event, &sample);

perf_sample__exit(&sample);
return 0;
}

Expand Down
12 changes: 9 additions & 3 deletions tools/perf/tests/code-reading.c
Original file line number Diff line number Diff line change
Expand Up @@ -479,19 +479,25 @@ static int process_sample_event(struct machine *machine,
struct thread *thread;
int ret;

if (evlist__parse_sample(evlist, event, &sample)) {
perf_sample__init(&sample, /*all=*/false);
ret = evlist__parse_sample(evlist, event, &sample);
if (ret) {
pr_debug("evlist__parse_sample failed\n");
return -1;
ret = -1;
goto out;
}

thread = machine__findnew_thread(machine, sample.pid, sample.tid);
if (!thread) {
pr_debug("machine__findnew_thread failed\n");
return -1;
ret = -1;
goto out;
}

ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
thread__put(thread);
out:
perf_sample__exit(&sample);
return ret;
}

Expand Down
6 changes: 3 additions & 3 deletions tools/perf/tests/dwarf-unwind.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr
unsigned long cnt = 0;
int err = -1;

memset(&sample, 0, sizeof(sample));

perf_sample__init(&sample, /*all=*/true);
if (test__arch_unwind_sample(&sample, thread)) {
pr_debug("failed to get unwind sample\n");
goto out;
Expand All @@ -134,7 +133,8 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr

out:
zfree(&sample.user_stack.data);
zfree(&sample.user_regs.regs);
zfree(&sample.user_regs->regs);
perf_sample__exit(&sample);
return err;
}

Expand Down
3 changes: 3 additions & 0 deletions tools/perf/tests/mmap-basic.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,17 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
goto out_delete_evlist;
}

perf_sample__init(&sample, /*all=*/false);
err = evlist__parse_sample(evlist, event, &sample);
if (err) {
pr_err("Can't parse sample, err = %d\n", err);
perf_sample__exit(&sample);
goto out_delete_evlist;
}

err = -1;
evsel = evlist__id2evsel(evlist, sample.id);
perf_sample__exit(&sample);
if (evsel == NULL) {
pr_debug("event with id %" PRIu64
" doesn't map to an evsel\n", sample.id);
Expand Down
4 changes: 3 additions & 1 deletion tools/perf/tests/openat-syscall-tp-fields.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,16 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused
continue;
}

perf_sample__init(&sample, /*all=*/false);
err = evsel__parse_sample(evsel, event, &sample);
if (err) {
pr_debug("Can't parse sample, err = %d\n", err);
perf_sample__exit(&sample);
goto out_delete_evlist;
}

tp_flags = evsel__intval(evsel, &sample, "flags");

perf_sample__exit(&sample);
if (flags != tp_flags) {
pr_debug("%s: Expected flags=%#x, got %#x\n",
__func__, flags, tp_flags);
Expand Down
6 changes: 5 additions & 1 deletion tools/perf/tests/parse-no-sample-id-all.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
static int process_event(struct evlist **pevlist, union perf_event *event)
{
struct perf_sample sample;
int ret;

if (event->header.type == PERF_RECORD_HEADER_ATTR) {
if (perf_event__process_attr(NULL, event, pevlist)) {
Expand All @@ -28,7 +29,10 @@ static int process_event(struct evlist **pevlist, union perf_event *event)
if (!*pevlist)
return -1;

if (evlist__parse_sample(*pevlist, event, &sample)) {
perf_sample__init(&sample, /*all=*/false);
ret = evlist__parse_sample(*pevlist, event, &sample);
perf_sample__exit(&sample);
if (ret) {
pr_debug("evlist__parse_sample failed\n");
return -1;
}
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/tests/perf-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
char sbuf[STRERR_BUFSIZE];

perf_sample__init(&sample, /*all=*/false);
if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
evlist = evlist__new_default();

Expand Down Expand Up @@ -330,6 +331,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
out_delete_evlist:
evlist__delete(evlist);
out:
perf_sample__exit(&sample);
if (err == -EACCES)
return TEST_SKIP;
if (err < 0 || errs != 0)
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/tests/perf-time-to-tsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
struct perf_sample sample;

perf_sample__init(&sample, /*all=*/false);
if (event->header.type != PERF_RECORD_COMM ||
(pid_t)event->comm.pid != getpid() ||
(pid_t)event->comm.tid != getpid())
Expand All @@ -170,6 +171,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
}
next_event:
perf_mmap__consume(&md->core);
perf_sample__exit(&sample);
}
perf_mmap__read_done(&md->core);
}
Expand Down
62 changes: 36 additions & 26 deletions tools/perf/tests/sample-parsing.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
#define BS_EXPECTED_LE 0x1aa00000000
#define FLAG(s) s->branch_stack->entries[i].flags

static bool samples_same(const struct perf_sample *s1,
const struct perf_sample *s2,
static bool samples_same(struct perf_sample *s1,
struct perf_sample *s2,
u64 type, u64 read_format, bool needs_swap)
{
size_t i;
Expand Down Expand Up @@ -126,13 +126,15 @@ static bool samples_same(const struct perf_sample *s1,
}

if (type & PERF_SAMPLE_REGS_USER) {
size_t sz = hweight_long(s1->user_regs.mask) * sizeof(u64);

COMP(user_regs.mask);
COMP(user_regs.abi);
if (s1->user_regs.abi &&
(!s1->user_regs.regs || !s2->user_regs.regs ||
memcmp(s1->user_regs.regs, s2->user_regs.regs, sz))) {
struct regs_dump *s1_regs = perf_sample__user_regs(s1);
struct regs_dump *s2_regs = perf_sample__user_regs(s2);
size_t sz = hweight_long(s1_regs->mask) * sizeof(u64);

COMP(user_regs->mask);
COMP(user_regs->abi);
if (s1_regs->abi &&
(!s1_regs->regs || !s2_regs->regs ||
memcmp(s1_regs->regs, s2_regs->regs, sz))) {
pr_debug("Samples differ at 'user_regs'\n");
return false;
}
Expand All @@ -157,13 +159,15 @@ static bool samples_same(const struct perf_sample *s1,
COMP(transaction);

if (type & PERF_SAMPLE_REGS_INTR) {
size_t sz = hweight_long(s1->intr_regs.mask) * sizeof(u64);

COMP(intr_regs.mask);
COMP(intr_regs.abi);
if (s1->intr_regs.abi &&
(!s1->intr_regs.regs || !s2->intr_regs.regs ||
memcmp(s1->intr_regs.regs, s2->intr_regs.regs, sz))) {
struct regs_dump *s1_regs = perf_sample__intr_regs(s1);
struct regs_dump *s2_regs = perf_sample__intr_regs(s2);
size_t sz = hweight_long(s1_regs->mask) * sizeof(u64);

COMP(intr_regs->mask);
COMP(intr_regs->abi);
if (s1_regs->abi &&
(!s1_regs->regs || !s2_regs->regs ||
memcmp(s1_regs->regs, s2_regs->regs, sz))) {
pr_debug("Samples differ at 'intr_regs'\n");
return false;
}
Expand Down Expand Up @@ -223,6 +227,16 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
const u32 raw_data[] = {0x12345678, 0x0a0b0c0d, 0x11020304, 0x05060708, 0 };
const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL};
const u64 aux_data[] = {0xa55a, 0, 0xeeddee, 0x0282028202820282};
struct regs_dump user_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
.mask = sample_regs,
.regs = regs,
};
struct regs_dump intr_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
.mask = sample_regs,
.regs = regs,
};
struct perf_sample sample = {
.ip = 101,
.pid = 102,
Expand All @@ -241,11 +255,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.callchain = &callchain.callchain,
.no_hw_idx = false,
.branch_stack = &branch_stack.branch_stack,
.user_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
.mask = sample_regs,
.regs = regs,
},
.user_regs = &user_regs,
.user_stack = {
.size = sizeof(data),
.data = (void *)data,
Expand All @@ -254,11 +264,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.time_enabled = 0x030a59d664fca7deULL,
.time_running = 0x011b6ae553eb98edULL,
},
.intr_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
.mask = sample_regs,
.regs = regs,
},
.intr_regs = &intr_regs,
.phys_addr = 113,
.cgroup = 114,
.data_page_size = 115,
Expand All @@ -273,6 +279,8 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
size_t i, sz, bufsz;
int err, ret = -1;

perf_sample__init(&sample_out, /*all=*/false);
perf_sample__init(&sample_out_endian, /*all=*/false);
if (sample_type & PERF_SAMPLE_REGS_USER)
evsel.core.attr.sample_regs_user = sample_regs;

Expand Down Expand Up @@ -361,6 +369,8 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
ret = 0;
out_free:
free(event);
perf_sample__exit(&sample_out_endian);
perf_sample__exit(&sample_out);
if (ret && read_format)
pr_debug("read_format %#"PRIx64"\n", read_format);
return ret;
Expand Down
3 changes: 3 additions & 0 deletions tools/perf/tests/sw-clock.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,22 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
struct perf_sample sample;

perf_sample__init(&sample, /*all=*/false);
if (event->header.type != PERF_RECORD_SAMPLE)
goto next_event;

err = evlist__parse_sample(evlist, event, &sample);
if (err < 0) {
pr_debug("Error during parse sample\n");
perf_sample__exit(&sample);
goto out_delete_evlist;
}

total_periods += sample.period;
nr_samples++;
next_event:
perf_mmap__consume(&md->core);
perf_sample__exit(&sample);
}
perf_mmap__read_done(&md->core);

Expand Down
Loading

0 comments on commit dc6d2bc

Please sign in to comment.