Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-20160530' of git://git.kernel.org/pub/…
Browse files Browse the repository at this point in the history
…scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible/kernel ABI changes:

- Per event callchain limit: Recently we introduced a sysctl to tune the
  max-stack for all events for which callchains were requested:

  $ sysctl kernel.perf_event_max_stack
  kernel.perf_event_max_stack = 127

  Now this patch introduces a way to configure this per event, i.e. this
  becomes possible:

  $ perf record -e sched:*/max-stack=2/ -e block:*/max-stack=10/ -a

  allowing finer tuning of how much buffer space callchains use.

  This uses an u16 from the reserved space at the end, leaving another
  u16 for future use.

  There has been interest in even finer tuning, namely to control the
  max stack for kernel and userspace callchains separately. Further
  discussion is needed, we may for instance use the remaining u16 for
  that and when it is present, assume that the sample_max_stack introduced
  in this patch applies for the kernel, and the u16 left is used for
  limiting the userspace callchain. (Arnaldo Carvalho de Melo)

Infrastructure changes:

- Adopt get_main_thread from db-export.c (Andi Kleen)

- More prep work for backward ring buffer support (Wang Nan)

- Prep work for supporting SDT (Statically Defined Tracing)
  tracepoints (Masami Hiramatsu)

- Add arch/*/include/generated/ to .gitignore (Taeung Song)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed May 31, 2016
2 parents 7114605 + 0141226 commit 42c4fb7
Show file tree
Hide file tree
Showing 27 changed files with 252 additions and 100 deletions.
2 changes: 1 addition & 1 deletion include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark);
extern int get_callchain_buffers(void);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);

extern int sysctl_perf_event_max_stack;
Expand Down
6 changes: 5 additions & 1 deletion include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ enum perf_event_read_format {

/*
* Hardware event_id to monitor via a performance monitoring event:
*
* @sample_max_stack: Max number of frame pointers in a callchain,
* should be < /proc/sys/kernel/perf_event_max_stack
*/
struct perf_event_attr {

Expand Down Expand Up @@ -385,7 +388,8 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
__u32 __reserved_2; /* align to __u64 */
__u16 sample_max_stack;
__u16 __reserved_2; /* align to __u64 */
};

#define perf_flags(attr) (*(&(attr)->read_format + 1))
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/stackmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (err)
goto free_smap;

err = get_callchain_buffers();
err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
goto free_smap;

Expand Down
14 changes: 12 additions & 2 deletions kernel/events/callchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ static int alloc_callchain_buffers(void)
return -ENOMEM;
}

int get_callchain_buffers(void)
int get_callchain_buffers(int event_max_stack)
{
int err = 0;
int count;
Expand All @@ -121,6 +121,15 @@ int get_callchain_buffers(void)
/* If the allocation failed, give up */
if (!callchain_cpus_entries)
err = -ENOMEM;
/*
* If requesting per event more than the global cap,
* return a different error to help userspace figure
* this out.
*
* And also do it here so that we have &callchain_mutex held.
*/
if (event_max_stack > sysctl_perf_event_max_stack)
err = -EOVERFLOW;
goto exit;
}

Expand Down Expand Up @@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
bool user = !event->attr.exclude_callchain_user;
/* Disallow cross-task user callchains. */
bool crosstask = event->ctx->task && event->ctx->task != current;
const u32 max_stack = event->attr.sample_max_stack;

if (!kernel && !user)
return NULL;

return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
}

struct perf_callchain_entry *
Expand Down
5 changes: 4 additions & 1 deletion kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,

if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
err = get_callchain_buffers();
err = get_callchain_buffers(attr->sample_max_stack);
if (err)
goto err_addr_filters;
}
Expand Down Expand Up @@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open,
return -EINVAL;
}

if (!attr.sample_max_stack)
attr.sample_max_stack = sysctl_perf_event_max_stack;

/*
* In cgroup mode, the pid argument is used to pass the fd
* opened to the cgroup directory in cgroupfs. The cpu argument
Expand Down
5 changes: 3 additions & 2 deletions tools/lib/api/fd/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
}

int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd))
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
void *arg)
{
int fd, nr = 0;

Expand All @@ -95,7 +96,7 @@ int fdarray__filter(struct fdarray *fda, short revents,
for (fd = 0; fd < fda->nr; ++fd) {
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
entry_destructor(fda, fd);
entry_destructor(fda, fd, arg);

continue;
}
Expand Down
3 changes: 2 additions & 1 deletion tools/lib/api/fd/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ void fdarray__delete(struct fdarray *fda);
int fdarray__add(struct fdarray *fda, int fd, short revents);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd));
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
void *arg);
int fdarray__grow(struct fdarray *fda, int extra);
int fdarray__fprintf(struct fdarray *fda, FILE *fp);

Expand Down
1 change: 1 addition & 0 deletions tools/perf/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ config.mak.autogen
*.pyo
.config-detected
util/intel-pt-decoder/inat-tables.c
arch/*/include/generated/
2 changes: 2 additions & 0 deletions tools/perf/arch/x86/util/tsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
struct perf_tsc_conversion tc;
int err;

if (!pc)
return 0;
err = perf_read_tsc_conversion(pc, &tc);
if (err == -EOPNOTSUPP)
return 0;
Expand Down
9 changes: 8 additions & 1 deletion tools/perf/builtin-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,13 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused
return 0;
}

static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
{
if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
return rec->evlist->mmap[0].base;
return NULL;
}

static int record__synthesize(struct record *rec)
{
struct perf_session *session = rec->session;
Expand Down Expand Up @@ -692,7 +699,7 @@ static int record__synthesize(struct record *rec)
}
}

err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
process_synthesized_event, machine);
if (err)
goto out;
Expand Down
8 changes: 4 additions & 4 deletions tools/perf/tests/fdarray.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ int test__fdarray__filter(int subtest __maybe_unused)
}

fdarray__init_revents(fda, POLLIN);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
if (nr_fds != fda->nr_alloc) {
pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
nr_fds, fda->nr_alloc);
goto out_delete;
}

fdarray__init_revents(fda, POLLHUP);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
if (nr_fds != 0) {
pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
nr_fds, fda->nr_alloc);
Expand All @@ -57,7 +57,7 @@ int test__fdarray__filter(int subtest __maybe_unused)

pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 1) {
pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
Expand All @@ -78,7 +78,7 @@ int test__fdarray__filter(int subtest __maybe_unused)

pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 2) {
pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
Expand Down
Loading

0 comments on commit 42c4fb7

Please sign in to comment.