From 99459a84d5870a88274b4f10bc85c3e39e1d642c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 12:26:19 -0300 Subject: [PATCH 01/26] perf map: Move maj/min/ino/ino_generation to separate struct And this patch highlights where these fields are being used: in the sort order where it uses it to compare maps and classify samples taking into account not just the DSO, but those DSO id fields. I think these should be used to differentiate DSOs with the same name but different 'struct dso_id' fields, i.e. these fields should move to 'struct dso' and then be used as part of the key when doing lookups for DSOs, in addition to the DSO name. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-8v5isitqy0dup47nnwkpc80f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/util/map.c | 8 ++++---- tools/perf/util/map.h | 14 +++++++++++--- tools/perf/util/sort.c | 24 ++++++++++++------------ 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 585805f51f155..04c197d3beead 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -771,7 +771,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) map->prot & PROT_EXEC ? 'x' : '-', map->flags & MAP_SHARED ? 's' : 'p', map->pgoff, - map->ino, map->dso->name); + map->dso_id.ino, map->dso->name); } return printed; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 67e0f81416cba..4f50b1b2961fb 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -162,10 +162,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - map->maj = d_maj; - map->min = d_min; - map->ino = ino; - map->ino_generation = ino_gen; + map->dso_id.maj = d_maj; + map->dso_id.min = d_min; + map->dso_id.ino = ino; + map->dso_id.ino_generation = ino_gen; map->prot = prot; map->flags = flags; nsi = nsinfo__get(thread->nsinfo); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0a6c45f85cd93..70d87dcbe35dc 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -18,6 +18,16 @@ struct map_groups; struct machine; struct evsel; +/* + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events + */ +struct dso_id { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; +}; + struct map { union { struct rb_node rb_node; @@ -30,9 +40,6 @@ struct map { u32 prot; u64 pgoff; u64 reloc; - u32 maj, min; /* only valid for MMAP2 record */ - u64 ino; /* only valid for MMAP2 record */ - u64 ino_generation;/* only valid for MMAP2 record */ /* ip -> dso rip */ u64 (*map_ip)(struct map *, u64); @@ -40,6 +47,7 @@ struct map { u64 (*unmap_ip)(struct map *, u64); struct dso *dso; + struct dso_id dso_id; refcount_t refcnt; u32 flags; }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6b626e6b111ed..bc589438cd12a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1212,17 +1212,17 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (!l_map) return -1; if (!r_map) return 1; - if (l_map->maj > r_map->maj) return -1; - if (l_map->maj < r_map->maj) return 1; + if (l_map->dso_id.maj > r_map->dso_id.maj) return -1; + if (l_map->dso_id.maj < r_map->dso_id.maj) return 1; - if (l_map->min > r_map->min) return -1; - if (l_map->min < r_map->min) return 1; + if (l_map->dso_id.min > r_map->dso_id.min) return -1; + if (l_map->dso_id.min < r_map->dso_id.min) return 1; - if (l_map->ino > r_map->ino) return -1; - if (l_map->ino < r_map->ino) return 1; + if (l_map->dso_id.ino > r_map->dso_id.ino) return -1; + if (l_map->dso_id.ino < r_map->dso_id.ino) return 1; - if (l_map->ino_generation > r_map->ino_generation) return -1; - if (l_map->ino_generation < r_map->ino_generation) return 1; + if (l_map->dso_id.ino_generation > r_map->dso_id.ino_generation) return -1; + if (l_map->dso_id.ino_generation < r_map->dso_id.ino_generation) return 1; /* * Addresses with no major/minor numbers are assumed to be @@ -1234,8 +1234,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && (!(l_map->flags & MAP_SHARED)) && - !l_map->maj && !l_map->min && !l_map->ino && - !l_map->ino_generation) { + !l_map->dso_id.maj && !l_map->dso_id.min && + !l_map->dso_id.ino && !l_map->dso_id.ino_generation) { /* userspace anonymous */ if (left->thread->pid_ > right->thread->pid_) return -1; @@ -1271,8 +1271,8 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map->prot & PROT_EXEC) && (map->flags & MAP_SHARED) && - (map->maj || map->min || map->ino || - map->ino_generation)) + (map->dso_id.maj || map->dso_id.min || + map->dso_id.ino || map->dso_id.ino_generation)) level = 's'; else if (!map) level = 'X'; From 4a7380a52ec90fbb1565dd638ee7f5b6e709f7fb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 12:40:29 -0300 Subject: [PATCH 02/26] perf map: Pass a dso_id to map__new() Instead of the 4 fields, a step in the direction of moving this to struct dso. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-gp5s1xgxacurmih5d1l94ymy@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 15 ++++++++------- tools/perf/util/map.c | 13 +++++++------ tools/perf/util/map.h | 3 +-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 71ee078d30f4b..41b4263c073d2 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1651,6 +1651,12 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; + struct dso_id dso_id = { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }; int ret = 0; if (dump_trace) @@ -1671,10 +1677,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, - event->mmap2.maj, - event->mmap2.min, event->mmap2.ino, - event->mmap2.ino_generation, - event->mmap2.prot, + &dso_id, event->mmap2.prot, event->mmap2.flags, event->mmap2.filename, thread); @@ -1727,9 +1730,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - 0, 0, 0, 0, prot, 0, - event->mmap.filename, - thread); + NULL, prot, 0, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4f50b1b2961fb..812d663ebb578 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -144,8 +144,8 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, u32 prot, u32 flags, char *filename, + u64 pgoff, struct dso_id *id, + u32 prot, u32 flags, char *filename, struct thread *thread) { struct map *map = malloc(sizeof(*map)); @@ -162,10 +162,11 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - map->dso_id.maj = d_maj; - map->dso_id.min = d_min; - map->dso_id.ino = ino; - map->dso_id.ino_generation = ino_gen; + if (id) + map->dso_id = *id; + else + map->dso_id.min = map->dso_id.ino = map->dso_id.ino_generation = 0; + map->prot = prot; map->flags = flags; nsi = nsinfo__get(thread->nsinfo); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 70d87dcbe35dc..f962eb9035c7f 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -117,8 +117,7 @@ struct thread; void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, u32 prot, u32 flags, + u64 pgoff, struct dso_id *id, u32 prot, u32 flags, char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); From 7b59a82493b49b715224bfe3b35fae52e48e5fa1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 16:30:56 -0300 Subject: [PATCH 03/26] perf map: Move comparision of map's dso_id to a separate function We'll use it when doing DSO lookups using dso_ids. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-u2nr1oq03o0i29w2ay9jx03s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dsos.c | 25 +++++++++++++++++++++++++ tools/perf/util/map.h | 2 ++ tools/perf/util/sort.c | 16 ++++------------ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 3ea80d203587a..ecf8d73466854 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -2,6 +2,7 @@ #include "debug.h" #include "dsos.h" #include "dso.h" +#include "map.h" #include "vdso.h" #include "namespaces.h" #include @@ -9,6 +10,30 @@ #include #include // filename__read_build_id +int dso_id__cmp(struct dso_id *a, struct dso_id *b) +{ + /* + * The second is always dso->id, so zeroes if not set, assume passing + * NULL for a means a zeroed id + */ + if (a == NULL) + return 0; + + if (a->maj > b->maj) return -1; + if (a->maj < b->maj) return 1; + + if (a->min > b->min) return -1; + if (a->min < b->min) return 1; + + if (a->ino > b->ino) return -1; + if (a->ino < b->ino) return 1; + + if (a->ino_generation > b->ino_generation) return -1; + if (a->ino_generation < b->ino_generation) return 1; + + return 0; +} + bool __dsos__read_build_ids(struct list_head *head, bool with_hits) { bool have_build_id = false; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index f962eb9035c7f..e1e573a28a554 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -28,6 +28,8 @@ struct dso_id { u64 ino_generation; }; +int dso_id__cmp(struct dso_id *a, struct dso_id *b); + struct map { union { struct rb_node rb_node; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index bc589438cd12a..f1481002fafb0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1194,6 +1194,7 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { u64 l, r; struct map *l_map, *r_map; + int rc; if (!left->mem_info) return -1; if (!right->mem_info) return 1; @@ -1212,18 +1213,9 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (!l_map) return -1; if (!r_map) return 1; - if (l_map->dso_id.maj > r_map->dso_id.maj) return -1; - if (l_map->dso_id.maj < r_map->dso_id.maj) return 1; - - if (l_map->dso_id.min > r_map->dso_id.min) return -1; - if (l_map->dso_id.min < r_map->dso_id.min) return 1; - - if (l_map->dso_id.ino > r_map->dso_id.ino) return -1; - if (l_map->dso_id.ino < r_map->dso_id.ino) return 1; - - if (l_map->dso_id.ino_generation > r_map->dso_id.ino_generation) return -1; - if (l_map->dso_id.ino_generation < r_map->dso_id.ino_generation) return 1; - + rc = dso_id__cmp(&l_map->dso_id, &r_map->dso_id); + if (rc) + return rc; /* * Addresses with no major/minor numbers are assumed to be * anonymous in userspace. Sort those on pid then address. From 1f74b100c9d9406fa12b22675c6b2111e5f60e9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 17:51:34 -0300 Subject: [PATCH 04/26] perf dsos: Remove unused dsos__find() method Not used anywhere, nuke it. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-teqz0eqcw43mnt7i3me44esw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dsos.c | 9 --------- tools/perf/util/dsos.h | 1 - 2 files changed, 10 deletions(-) diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index ecf8d73466854..1d38d6ac6e5a7 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -159,15 +159,6 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) return __dsos__findnew_by_longname(&dsos->root, name); } -struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) -{ - struct dso *dso; - down_read(&dsos->lock); - dso = __dsos__find(dsos, name, cmp_short); - up_read(&dsos->lock); - return dso; -} - static void dso__set_basename(struct dso *dso) { char *base, *lname; diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h index 32f1fbee0feb2..fd7ba51fc965b 100644 --- a/tools/perf/util/dsos.h +++ b/tools/perf/util/dsos.h @@ -24,7 +24,6 @@ void __dsos__add(struct dsos *dsos, struct dso *dso); void dsos__add(struct dsos *dsos, struct dso *dso); struct dso *__dsos__addnew(struct dsos *dsos, const char *name); struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); From 0e3149f86b99ddabde8c5029eea0a9267e34f1a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2019 18:44:22 -0300 Subject: [PATCH 05/26] perf dso: Move dso_id from 'struct map' to 'struct dso' And take it into account when looking up DSOs when we have the dso_id fields obtained from somewhere, like from PERF_RECORD_MMAP2 records. Instances of struct map pointing to the same DSO pathname but with anything in dso_id different are in fact different DSOs, so better have different 'struct dso' instances to reflect that. At some point we may want to get copies of the contents of the different objects if we want to do correct annotation or other analysis. With this we get 'struct map' 24 bytes leaner: $ pahole -C map ~/bin/perf struct map { union { struct rb_node rb_node __attribute__((__aligned__(8))); /* 0 24 */ struct list_head node; /* 0 16 */ } __attribute__((__aligned__(8))); /* 0 24 */ u64 start; /* 24 8 */ u64 end; /* 32 8 */ _Bool erange_warned:1; /* 40: 0 1 */ _Bool priv:1; /* 40: 1 1 */ /* XXX 6 bits hole, try to pack */ /* XXX 3 bytes hole, try to pack */ u32 prot; /* 44 4 */ u64 pgoff; /* 48 8 */ u64 reloc; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u64 (*map_ip)(struct map *, u64); /* 64 8 */ u64 (*unmap_ip)(struct map *, u64); /* 72 8 */ struct dso * dso; /* 80 8 */ refcount_t refcnt; /* 88 4 */ u32 flags; /* 92 4 */ /* size: 96, cachelines: 2, members: 13 */ /* sum members: 92, holes: 1, sum holes: 3 */ /* sum bitfield members: 2 bits, bit holes: 1, sum bit holes: 6 bits */ /* forced alignments: 1 */ /* last cacheline: 32 bytes */ } __attribute__((__aligned__(8))); $ Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-g4hxxmraplo7wfjmk384mfsb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/util/dso.c | 24 +++++++--- tools/perf/util/dso.h | 13 ++++++ tools/perf/util/dsos.c | 87 +++++++++++++++++++++++++++---------- tools/perf/util/dsos.h | 13 +++--- tools/perf/util/machine.c | 7 ++- tools/perf/util/machine.h | 2 + tools/perf/util/map.c | 8 +--- tools/perf/util/map.h | 16 ++----- tools/perf/util/sort.c | 10 ++--- 10 files changed, 118 insertions(+), 64 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 04c197d3beead..0b6157c02c888 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -771,7 +771,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) map->prot & PROT_EXEC ? 'x' : '-', map->flags & MAP_SHARED ? 's' : 'p', map->pgoff, - map->dso_id.ino, map->dso->name); + map->dso->id.ino, map->dso->name); } return printed; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 0f1b77275a86d..91f21239608bd 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1149,7 +1149,7 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, return dso; } -void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) +static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_id *id, bool name_allocated) { struct rb_root *root = dso->root; @@ -1162,8 +1162,8 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) if (root) { rb_erase(&dso->rb_node, root); /* - * __dsos__findnew_link_by_longname() isn't guaranteed to add it - * back, so a clean removal is required here. + * __dsos__findnew_link_by_longname_id() isn't guaranteed to + * add it back, so a clean removal is required here. */ RB_CLEAR_NODE(&dso->rb_node); dso->root = NULL; @@ -1174,7 +1174,12 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) dso->long_name_allocated = name_allocated; if (root) - __dsos__findnew_link_by_longname(root, dso, NULL); + __dsos__findnew_link_by_longname_id(root, dso, NULL, id); +} + +void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) +{ + dso__set_long_name_id(dso, name, NULL, name_allocated); } void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) @@ -1215,13 +1220,15 @@ void dso__set_sorted_by_name(struct dso *dso) dso->sorted_by_name = true; } -struct dso *dso__new(const char *name) +struct dso *dso__new_id(const char *name, struct dso_id *id) { struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); if (dso != NULL) { strcpy(dso->name, name); - dso__set_long_name(dso, dso->name, false); + if (id) + dso->id = *id; + dso__set_long_name_id(dso, dso->name, id, false); dso__set_short_name(dso, dso->name, false); dso->symbols = dso->symbol_names = RB_ROOT_CACHED; dso->data.cache = RB_ROOT; @@ -1252,6 +1259,11 @@ struct dso *dso__new(const char *name) return dso; } +struct dso *dso__new(const char *name) +{ + return dso__new_id(name, NULL); +} + void dso__delete(struct dso *dso) { if (!RB_EMPTY_NODE(&dso->rb_node)) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2f1fcbc6fead1..2db64b79617ae 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -122,6 +122,16 @@ enum dso_load_errno { #define DSO__DATA_CACHE_SIZE 4096 #define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) +/* + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events + */ +struct dso_id { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; +}; + struct dso_cache { struct rb_node rb_node; u64 offset; @@ -196,6 +206,7 @@ struct dso { u64 db_id; }; struct nsinfo *nsinfo; + struct dso_id id; refcount_t refcnt; char name[0]; }; @@ -214,9 +225,11 @@ static inline void dso__set_loaded(struct dso *dso) dso->loaded = true; } +struct dso *dso__new_id(const char *name, struct dso_id *id); struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); +int dso__cmp_id(struct dso *a, struct dso *b); void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 1d38d6ac6e5a7..591707c69c39a 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -2,7 +2,6 @@ #include "debug.h" #include "dsos.h" #include "dso.h" -#include "map.h" #include "vdso.h" #include "namespaces.h" #include @@ -10,15 +9,8 @@ #include #include // filename__read_build_id -int dso_id__cmp(struct dso_id *a, struct dso_id *b) +static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) { - /* - * The second is always dso->id, so zeroes if not set, assume passing - * NULL for a means a zeroed id - */ - if (a == NULL) - return 0; - if (a->maj > b->maj) return -1; if (a->maj < b->maj) return 1; @@ -34,6 +26,23 @@ int dso_id__cmp(struct dso_id *a, struct dso_id *b) return 0; } +static int dso_id__cmp(struct dso_id *a, struct dso_id *b) +{ + /* + * The second is always dso->id, so zeroes if not set, assume passing + * NULL for a means a zeroed id + */ + if (a == NULL) + return 0; + + return __dso_id__cmp(a, b); +} + +int dso__cmp_id(struct dso *a, struct dso *b) +{ + return __dso_id__cmp(&a->id, &b->id); +} + bool __dsos__read_build_ids(struct list_head *head, bool with_hits) { bool have_build_id = false; @@ -59,12 +68,30 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } +static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b) +{ + int rc = strcmp(long_name, b->long_name); + return rc ?: dso_id__cmp(id, &b->id); +} + +static int __dso__cmp_short_name(const char *short_name, struct dso_id *id, struct dso *b) +{ + int rc = strcmp(short_name, b->short_name); + return rc ?: dso_id__cmp(id, &b->id); +} + +static int dso__cmp_short_name(struct dso *a, struct dso *b) +{ + return __dso__cmp_short_name(a->short_name, &a->id, b); +} + /* * Find a matching entry and/or link current entry to RB tree. * Either one of the dso or name parameter must be non-NULL or the * function will not work. */ -struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *dso, const char *name) +struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, + const char *name, struct dso_id *id) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -76,7 +103,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d */ while (*p) { struct dso *this = rb_entry(*p, struct dso, rb_node); - int rc = strcmp(name, this->long_name); + int rc = __dso__cmp_long_name(name, id, this); parent = *p; if (rc == 0) { @@ -92,7 +119,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d * In this case, the short name should be different. * Comparing the short names to differentiate the DSOs. */ - rc = strcmp(dso->short_name, this->short_name); + rc = dso__cmp_short_name(dso, this); if (rc == 0) { pr_err("Duplicated dso name: %s\n", name); return NULL; @@ -115,7 +142,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d void __dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); - __dsos__findnew_link_by_longname(&dsos->root, dso, NULL); + __dsos__findnew_link_by_longname_id(&dsos->root, dso, NULL, &dso->id); /* * It is now in the linked list, grab a reference, then garbage collect * this when needing memory, by looking at LRU dso instances in the @@ -146,17 +173,27 @@ void dsos__add(struct dsos *dsos, struct dso *dso) up_write(&dsos->lock); } -struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const char *name, struct dso_id *id) +{ + return __dsos__findnew_link_by_longname_id(root, NULL, name, id); +} + +static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short) { struct dso *pos; if (cmp_short) { list_for_each_entry(pos, &dsos->head, node) - if (strcmp(pos->short_name, name) == 0) + if (__dso__cmp_short_name(name, id, pos) == 0) return pos; return NULL; } - return __dsos__findnew_by_longname(&dsos->root, name); + return __dsos__findnew_by_longname_id(&dsos->root, name, id); +} + +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +{ + return __dsos__find_id(dsos, name, NULL, cmp_short); } static void dso__set_basename(struct dso *dso) @@ -191,9 +228,9 @@ static void dso__set_basename(struct dso *dso) dso__set_short_name(dso, base, true); } -struct dso *__dsos__addnew(struct dsos *dsos, const char *name) +static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { - struct dso *dso = dso__new(name); + struct dso *dso = dso__new_id(name, id); if (dso != NULL) { __dsos__add(dsos, dso); @@ -204,18 +241,22 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) return dso; } -struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +struct dso *__dsos__addnew(struct dsos *dsos, const char *name) { - struct dso *dso = __dsos__find(dsos, name, false); + return __dsos__addnew_id(dsos, name, NULL); +} - return dso ? dso : __dsos__addnew(dsos, name); +static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +{ + struct dso *dso = __dsos__find_id(dsos, name, id, false); + return dso ? dso : __dsos__addnew_id(dsos, name, id); } -struct dso *dsos__findnew(struct dsos *dsos, const char *name) +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { struct dso *dso; down_write(&dsos->lock); - dso = dso__get(__dsos__findnew(dsos, name)); + dso = dso__get(__dsos__findnew_id(dsos, name, id)); up_write(&dsos->lock); return dso; } diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h index fd7ba51fc965b..5dbec2bc6966d 100644 --- a/tools/perf/util/dsos.h +++ b/tools/perf/util/dsos.h @@ -9,6 +9,7 @@ #include "rwsem.h" struct dso; +struct dso_id; /* * DSOs are put into both a list for fast iteration and rbtree for fast @@ -24,15 +25,11 @@ void __dsos__add(struct dsos *dsos, struct dso *dso); void dsos__add(struct dsos *dsos, struct dso *dso); struct dso *__dsos__addnew(struct dsos *dsos, const char *name); struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -struct dso *__dsos__findnew(struct dsos *dsos, const char *name); -struct dso *dsos__findnew(struct dsos *dsos, const char *name); -struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *dso, const char *name); - -static inline struct dso *__dsos__findnew_by_longname(struct rb_root *root, const char *name) -{ - return __dsos__findnew_link_by_longname(root, NULL, name); -} +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id); + +struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, + const char *name, struct dso_id *id); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 41b4263c073d2..e2a312c649f07 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2711,9 +2711,14 @@ u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr) return addr_cpumode; } +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id) +{ + return dsos__findnew_id(&machine->dsos, filename, id); +} + struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return dsos__findnew(&machine->dsos, filename); + return machine__findnew_dso_id(machine, filename, NULL); } char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 1016978f575a9..499be204830d9 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -11,6 +11,7 @@ struct addr_location; struct branch_stack; struct dso; +struct dso_id; struct evsel; struct perf_sample; struct symbol; @@ -202,6 +203,7 @@ int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id); struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 812d663ebb578..744bfbaf35cfc 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -161,12 +161,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, anon = is_anon_memory(filename, flags); vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - - if (id) - map->dso_id = *id; - else - map->dso_id.min = map->dso_id.ino = map->dso_id.ino_generation = 0; - map->prot = prot; map->flags = flags; nsi = nsinfo__get(thread->nsinfo); @@ -196,7 +190,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, pgoff = 0; dso = machine__findnew_vdso(machine, thread); } else - dso = machine__findnew_dso(machine, filename); + dso = machine__findnew_dso_id(machine, filename, id); if (dso == NULL) goto out_delete; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e1e573a28a554..5e8899883231c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -18,18 +18,6 @@ struct map_groups; struct machine; struct evsel; -/* - * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events - */ -struct dso_id { - u32 maj; - u32 min; - u64 ino; - u64 ino_generation; -}; - -int dso_id__cmp(struct dso_id *a, struct dso_id *b); - struct map { union { struct rb_node rb_node; @@ -49,7 +37,6 @@ struct map { u64 (*unmap_ip)(struct map *, u64); struct dso *dso; - struct dso_id dso_id; refcount_t refcnt; u32 flags; }; @@ -118,6 +105,9 @@ struct thread; void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); + +struct dso_id; + struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, struct dso_id *id, u32 prot, u32 flags, char *filename, struct thread *thread); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f1481002fafb0..345b5ccc90f68 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1213,7 +1213,7 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (!l_map) return -1; if (!r_map) return 1; - rc = dso_id__cmp(&l_map->dso_id, &r_map->dso_id); + rc = dso__cmp_id(l_map->dso, r_map->dso); if (rc) return rc; /* @@ -1226,8 +1226,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && (!(l_map->flags & MAP_SHARED)) && - !l_map->dso_id.maj && !l_map->dso_id.min && - !l_map->dso_id.ino && !l_map->dso_id.ino_generation) { + !l_map->dso->id.maj && !l_map->dso->id.min && + !l_map->dso->id.ino && !l_map->dso->id.ino_generation) { /* userspace anonymous */ if (left->thread->pid_ > right->thread->pid_) return -1; @@ -1263,8 +1263,8 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map->prot & PROT_EXEC) && (map->flags & MAP_SHARED) && - (map->dso_id.maj || map->dso_id.min || - map->dso_id.ino || map->dso_id.ino_generation)) + (map->dso->id.maj || map->dso->id.min || + map->dso->id.ino || map->dso->id.ino_generation)) level = 's'; else if (!map) level = 'X'; From bb1835a3b86c73aa534ef6430ad40223728dfbc0 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Mon, 18 Nov 2019 17:21:03 +0300 Subject: [PATCH 06/26] perf session: Fix decompression of PERF_RECORD_COMPRESSED records Avoid termination of trace loading in case the last record in the decompressed buffer partly resides in the following mmaped PERF_RECORD_COMPRESSED record. In this case NULL value returned by fetch_mmaped_event() means to proceed to the next mmaped record then decompress it and load compressed events. The issue can be reproduced like this: $ perf record -z -- some_long_running_workload $ perf report --stdio -vv decomp (B): 44519 to 163000 decomp (B): 48119 to 174800 decomp (B): 65527 to 131072 fetch_mmaped_event: head=0x1ffe0 event->header_size=0x28, mmap_size=0x20000: fuzzed perf.data? Error: failed to process sample ... Testing: 71: Zstd perf.data compression/decompression : Ok $ tools/perf/perf report -vv --stdio decomp (B): 59593 to 262160 decomp (B): 4438 to 16512 decomp (B): 285 to 880 Looking at the vmlinux_path (8 entries long) Using vmlinux for symbols decomp (B): 57474 to 261248 prefetch_event: head=0x3fc78 event->header_size=0x28, mmap_size=0x3fc80: fuzzed or compressed perf.data? decomp (B): 25 to 32 decomp (B): 52 to 120 ... Fixes: 57fc032ad643 ("perf session: Avoid infinite loop when seeing invalid header.size") Link: https://marc.info/?l=linux-kernel&m=156580812427554&w=2 Co-developed-by: Jiri Olsa Acked-by: Jiri Olsa Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/cf782c34-f3f8-2f9f-d6ab-145cee0d5322@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 44 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f07b8ecb91bcd..8454a650146bb 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1958,8 +1958,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) } static union perf_event * -fetch_mmaped_event(struct perf_session *session, - u64 head, size_t mmap_size, char *buf) +prefetch_event(char *buf, u64 head, size_t mmap_size, + bool needs_swap, union perf_event *error) { union perf_event *event; @@ -1971,20 +1971,32 @@ fetch_mmaped_event(struct perf_session *session, return NULL; event = (union perf_event *)(buf + head); + if (needs_swap) + perf_event_header__bswap(&event->header); - if (session->header.needs_swap) + if (head + event->header.size <= mmap_size) + return event; + + /* We're not fetching the event so swap back again */ + if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size > mmap_size) { - /* We're not fetching the event so swap back again */ - if (session->header.needs_swap) - perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx: fuzzed perf.data?\n", - __func__, head, event->header.size, mmap_size); - return ERR_PTR(-EINVAL); - } + pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); - return event; + return error; +} + +static union perf_event * +fetch_mmaped_event(u64 head, size_t mmap_size, char *buf, bool needs_swap) +{ + return prefetch_event(buf, head, mmap_size, needs_swap, ERR_PTR(-EINVAL)); +} + +static union perf_event * +fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap) +{ + return prefetch_event(buf, head, mmap_size, needs_swap, NULL); } static int __perf_session__process_decomp_events(struct perf_session *session) @@ -1997,10 +2009,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session) return 0; while (decomp->head < decomp->size && !session_done()) { - union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data); - - if (IS_ERR(event)) - return PTR_ERR(event); + union perf_event *event = fetch_decomp_event(decomp->head, decomp->size, decomp->data, + session->header.needs_swap); if (!event) break; @@ -2100,7 +2110,7 @@ reader__process_events(struct reader *rd, struct perf_session *session, } more: - event = fetch_mmaped_event(session, head, mmap_size, buf); + event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); From 5cb456af99f58378fe90649d6faaab25e379be06 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 18 Nov 2019 22:08:48 +0800 Subject: [PATCH 07/26] perf util: Move block TUI function to ui browsers It would be nice if we could jump to the assembler/source view (like the normal perf report) from total cycles view. This patch moves the block_hists_tui_browse from block-info.c to ui/browsers/hists.c in order to reuse some browser codes (i.e do_annotate) for implementing new annotation view. v2: --- Fix the 'make NO_SLANG=1' error. (Change 'int block_hists_tui_browse()' to 'static inline int block_hists_tui_browse()') Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191118140849.20714-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 55 ++++++++++++++++++++++++++++ tools/perf/util/block-info.c | 65 +--------------------------------- tools/perf/util/hist.h | 12 +++++++ 3 files changed, 68 insertions(+), 64 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4d2d0acfd41a2..87405dc4750c0 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3444,3 +3444,58 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, warn_lost_event, annotation_opts); } + +static int block_hists_browser__title(struct hist_browser *browser, char *bf, + size_t size) +{ + struct hists *hists = evsel__hists(browser->block_evsel); + const char *evname = perf_evsel__name(browser->block_evsel); + unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + int ret; + + ret = scnprintf(bf, size, "# Samples: %lu", nr_samples); + if (evname) + scnprintf(bf + ret, size - ret, " of event '%s'", evname); + + return 0; +} + +int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, + float min_percent) +{ + struct hists *hists = &bh->block_hists; + struct hist_browser *browser; + int key = -1; + static const char help[] = + " q Quit \n"; + + browser = hist_browser__new(hists); + if (!browser) + return -1; + + browser->block_evsel = evsel; + browser->title = block_hists_browser__title; + browser->min_pcnt = min_percent; + + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + while (1) { + key = hist_browser__run(browser, "? - help", true); + + switch (key) { + case 'q': + goto out; + case '?': + ui_browser__help_window(&browser->b, help); + break; + default: + break; + } + } + +out: + hist_browser__delete(browser); + return 0; +} diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 9abc201ebe639..5887f8f9149fd 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -10,6 +10,7 @@ #include "map.h" #include "srcline.h" #include "evlist.h" +#include "hist.h" #include "ui/browsers/hists.h" static struct block_header_column { @@ -439,70 +440,6 @@ struct block_report *block_info__create_report(struct evlist *evlist, return block_reports; } -#ifdef HAVE_SLANG_SUPPORT -static int block_hists_browser__title(struct hist_browser *browser, char *bf, - size_t size) -{ - struct hists *hists = evsel__hists(browser->block_evsel); - const char *evname = perf_evsel__name(browser->block_evsel); - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; - int ret; - - ret = scnprintf(bf, size, "# Samples: %lu", nr_samples); - if (evname) - scnprintf(bf + ret, size - ret, " of event '%s'", evname); - - return 0; -} - -static int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent) -{ - struct hists *hists = &bh->block_hists; - struct hist_browser *browser; - int key = -1; - static const char help[] = - " q Quit \n"; - - browser = hist_browser__new(hists); - if (!browser) - return -1; - - browser->block_evsel = evsel; - browser->title = block_hists_browser__title; - browser->min_pcnt = min_percent; - - /* reset abort key so that it can get Ctrl-C as a key */ - SLang_reset_tty(); - SLang_init_tty(0, 0, 0); - - while (1) { - key = hist_browser__run(browser, "? - help", true); - - switch (key) { - case 'q': - goto out; - case '?': - ui_browser__help_window(&browser->b, help); - break; - default: - break; - } - } - -out: - hist_browser__delete(browser); - return 0; -} -#else -static int block_hists_tui_browse(struct block_hist *bh __maybe_unused, - struct evsel *evsel __maybe_unused, - float min_percent __maybe_unused) -{ - return 0; -} -#endif - int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4d87c7b4c1b26..2aca8ce16b2cd 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -449,6 +449,8 @@ enum rstype { A_SOURCE }; +struct block_hist; + #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" void attr_to_script(char *buf, struct perf_event_attr *attr); @@ -474,6 +476,9 @@ void run_script(char *cmd); int res_sample_browse(struct res_sample *res_samples, int num_res, struct evsel *evsel, enum rstype rstype); void res_sample_init(void); + +int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, + float min_percent); #else static inline int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -518,6 +523,13 @@ static inline int res_sample_browse(struct res_sample *res_samples __maybe_unuse static inline void res_sample_init(void) {} +static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, + struct evsel *evsel __maybe_unused, + float min_percent __maybe_unused) +{ + return 0; +} + #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 From 848a5e507e26176902e328bd8ae4a5e9c7d2bafe Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 18 Nov 2019 22:08:49 +0800 Subject: [PATCH 08/26] perf report: Jump to symbol source view from total cycles view This patch supports jumping from tui total cycles view to symbol source view. For example, perf record -b ./div perf report --total-cycles In total cycles view, we can select one entry and press 'a' or press ENTER key to jump to symbol source view. This patch also sets sort_order to NULL in cmd_report() which will use the default branch sort order. The percent value in new annotate view will be consistent with the percent in annotate view switched from perf report (we observed the original percent gap with previous patches). v2: --- Fix the 'make NO_SLANG=1' error. (set __maybe_unused to annotation_opts in block_hists_tui_browse()). Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191118140849.20714-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 ++++++--- tools/perf/ui/browsers/hists.c | 25 +++++++++++++++++++++++-- tools/perf/util/block-info.c | 6 ++++-- tools/perf/util/block-info.h | 3 ++- tools/perf/util/hist.h | 7 +++++-- 5 files changed, 40 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0b6157c02c888..ab0f6e516b03c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -493,7 +493,9 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos); + rep->min_percent, pos, + &rep->session->header.env, + &rep->annotation_opts); if (ret != 0) return ret; } @@ -525,7 +527,8 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos); + rep->min_percent, pos, + NULL, NULL); continue; } @@ -1418,7 +1421,7 @@ int cmd_report(int argc, const char **argv) if (sort__mode != SORT_MODE__BRANCH) report.total_cycles_mode = false; else - sort_order = "sym"; + sort_order = NULL; } if (strcmp(input_name, "-") != 0) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 87405dc4750c0..d4d3558fdef42 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2385,7 +2385,11 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) if (!notes->src) return 0; - evsel = hists_to_evsel(browser->hists); + if (browser->block_evsel) + evsel = browser->block_evsel; + else + evsel = hists_to_evsel(browser->hists); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, browser->annotation_opts); he = hist_browser__selected_entry(browser); @@ -3461,11 +3465,13 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, } int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent) + float min_percent, struct perf_env *env, + struct annotation_options *annotation_opts) { struct hists *hists = &bh->block_hists; struct hist_browser *browser; int key = -1; + struct popup_action action; static const char help[] = " q Quit \n"; @@ -3476,11 +3482,15 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, browser->block_evsel = evsel; browser->title = block_hists_browser__title; browser->min_pcnt = min_percent; + browser->env = env; + browser->annotation_opts = annotation_opts; /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + memset(&action, 0, sizeof(action)); + while (1) { key = hist_browser__run(browser, "? - help", true); @@ -3490,6 +3500,17 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, case '?': ui_browser__help_window(&browser->b, help); break; + case 'a': + case K_ENTER: + if (!browser->selection || + !browser->selection->sym) { + continue; + } + + action.ms.map = browser->selection->map; + action.ms.sym = browser->selection->sym; + do_annotate(browser, &action); + continue; default: break; } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 5887f8f9149fd..c4b030bf6ec2d 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -441,7 +441,8 @@ struct block_report *block_info__create_report(struct evlist *evlist, } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel) + struct evsel *evsel, struct perf_env *env, + struct annotation_options *annotation_opts) { int ret; @@ -454,7 +455,8 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent); + ret = block_hists_tui_browse(bh, evsel, min_percent, + env, annotation_opts); hists__delete_entries(&bh->block_hists); return ret; default: diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index e4d20bccd9b6e..bef0d75e98195 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -71,7 +71,8 @@ struct block_report *block_info__create_report(struct evlist *evlist, u64 total_cycles); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel); + struct evsel *evsel, struct perf_env *env, + struct annotation_options *annotation_opts); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2aca8ce16b2cd..45286900aacbf 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -478,7 +478,8 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent); + float min_percent, struct perf_env *env, + struct annotation_options *annotation_opts); #else static inline int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -525,7 +526,9 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, - float min_percent __maybe_unused) + float min_percent __maybe_unused, + struct perf_env *env __maybe_unused, + struct annotation_options *annotation_opts __maybe_unused) { return 0; } From 98dcf14d7f9c9482144b9015d481bf6b63bce965 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:11 +0200 Subject: [PATCH 09/26] perf tools: Add kernel AUX area sampling definitions Add kernel AUX area sampling definitions, which brings perf_event.h into line with the kernel version. New sample type PERF_SAMPLE_AUX requests a sample of the AUX area buffer. New perf_event_attr member 'aux_sample_size' specifies the desired size of the sample. Also add support for parsing samples containing AUX area data i.e. PERF_SAMPLE_AUX. Committer notes: I squashed the first two patches in this series to avoid breaking automatic bisection, i.e. after applying only the original first patch in this series we would have: # perf test -v parsing 26: Sample parsing : --- start --- test child forked, pid 17018 sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating test child finished with -1 ---- end ---- Sample parsing: FAILED! # With the two paches combined: # perf test parsing 26: Sample parsing : Ok # Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 10 ++++++++-- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/base-stat | 2 +- tools/perf/tests/sample-parsing.c | 16 +++++++++++++++- tools/perf/util/event.h | 6 ++++++ tools/perf/util/evsel.c | 13 +++++++++++++ tools/perf/util/perf_event_attr_fprintf.c | 3 ++- tools/perf/util/session.c | 1 + tools/perf/util/synthetic-events.c | 12 ++++++++++++ 9 files changed, 59 insertions(+), 6 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bb7b271397a66..377d794d3105c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -141,8 +141,9 @@ enum perf_event_sample_format { PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, + PERF_SAMPLE_AUX = 1U << 20, - PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -300,6 +301,7 @@ enum perf_event_read_format { /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -424,7 +426,9 @@ struct perf_event_attr { */ __u32 aux_watermark; __u16 sample_max_stack; - __u16 __reserved_2; /* align to __u64 */ + __u16 __reserved_2; + __u32 aux_sample_size; + __u32 __reserved_3; }; /* @@ -864,6 +868,8 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index efd0157b9d223..645009c08b3cb 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=112 +size=120 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index 4d0c2e42b64e8..b0f42c34882e8 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=112 +size=120 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 3a02426db9a63..2762e11552380 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -150,6 +150,15 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_PHYS_ADDR) COMP(phys_addr); + if (type & PERF_SAMPLE_AUX) { + COMP(aux_sample.size); + if (memcmp(s1->aux_sample.data, s2->aux_sample.data, + s1->aux_sample.size)) { + pr_debug("Samples differ at 'aux_sample'\n"); + return false; + } + } + return true; } @@ -182,6 +191,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) u64 regs[64]; const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL}; + const u64 aux_data[] = {0xa55a, 0, 0xeeddee, 0x0282028202820282}; struct perf_sample sample = { .ip = 101, .pid = 102, @@ -218,6 +228,10 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .regs = regs, }, .phys_addr = 113, + .aux_sample = { + .size = sizeof(aux_data), + .data = (void *)aux_data, + }, }; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; struct perf_sample sample_out; @@ -317,7 +331,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index a0a0c91cde4a6..85223159737c1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -114,6 +114,11 @@ enum { #define MAX_INSN 16 +struct aux_sample { + u64 size; + void *data; +}; + struct perf_sample { u64 ip; u32 pid, tid; @@ -142,6 +147,7 @@ struct perf_sample { struct regs_dump intr_regs; struct stack_dump user_stack; struct sample_read read; + struct aux_sample aux_sample; }; #define PERF_MEM_DATA_SRC_NONE \ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1bf60f3256088..772f4879c4923 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2209,6 +2209,19 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + if (type & PERF_SAMPLE_AUX) { + OVERFLOW_CHECK_u64(array); + sz = *array++; + + OVERFLOW_CHECK(array, sz, max_size); + /* Undo swap of data */ + if (swapped) + mem_bswap_64((char *)array, sz); + data->aux_sample.size = sz; + data->aux_sample.data = (char *)array; + array = (void *)array + sz; + } + return 0; } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index d4ad3f04923ac..651203126c71e 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -34,7 +34,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), - bit_name(WEIGHT), bit_name(PHYS_ADDR), + bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), { .name = NULL, } }; #undef bit_name @@ -143,6 +143,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(sample_regs_intr, p_hex); PRINT_ATTRf(aux_watermark, p_unsigned); PRINT_ATTRf(sample_max_stack, p_unsigned); + PRINT_ATTRf(aux_sample_size, p_unsigned); return ret; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8454a650146bb..dbdb47624dec1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -752,6 +752,7 @@ do { \ bswap_field_32(sample_stack_user); bswap_field_32(aux_watermark); bswap_field_16(sample_max_stack); + bswap_field_32(aux_sample_size); /* * After read_format are bitfields. Check read_format because diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cfa3c9f671414..48c3f8b9c8528 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1228,6 +1228,11 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_PHYS_ADDR) result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { + result += sizeof(u64); + result += sample->aux_sample.size; + } + return result; } @@ -1396,6 +1401,13 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_AUX) { + sz = sample->aux_sample.size; + *array++ = sz; + memcpy(array, sample->aux_sample.data, sz); + array = (void *)array + sz; + } + return 0; } From 9bca1a4ef5034f0a82861ac0375eb0272c5ce04e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:13 +0200 Subject: [PATCH 10/26] perf record: Add a function to test for kernel support for AUX area sampling Architectures are expected to know if AUX area sampling is supported by the hardware. Add a function perf_can_aux_sample() which will determine whether the kernel supports it. Committer notes: I reported that this message was taking place on a kernel without the required bits: # perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' Error: The sys_perf_event_open() syscall returned with 7 (Argument list too long) for event (branch-misses:u). /bin/dmesg | grep -i perf may provide additional information. Adrian sent a patch addressing it, with this explanation: ---- perf_can_aux_sample_size() always returned true because it did not pass the attribute size to sys_perf_event_open, nor correctly check the return value and errno. ---- After applying it I get, later in the series, when --aux-sample is added: # perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' AUX area sampling is not supported by kernel Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.h | 1 + tools/perf/util/record.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 13051409fd225..3655b9ebb1473 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -176,6 +176,7 @@ void perf_evlist__set_id_pos(struct evlist *evlist); bool perf_can_sample_identifier(void); bool perf_can_record_switch_events(void); bool perf_can_record_cpu_wide(void); +bool perf_can_aux_sample(void); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 8579505c29a4d..7def661685032 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -136,6 +136,37 @@ bool perf_can_record_cpu_wide(void) return true; } +/* + * Architectures are expected to know if AUX area sampling is supported by the + * hardware. Here we check for kernel support. + */ +bool perf_can_aux_sample(void) +{ + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .exclude_kernel = 1, + /* + * Non-zero value causes the kernel to calculate the effective + * attribute size up to that byte. + */ + .aux_sample_size = 1, + }; + int fd; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + /* + * If the kernel attribute is big enough to contain aux_sample_size + * then we assume that it is supported. We are relying on the kernel to + * validate the attribute size before anything else that could be wrong. + */ + if (fd < 0 && errno == E2BIG) + return false; + if (fd >= 0) + close(fd); + + return true; +} + void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain) { From f306de275b7c18da9ab060acb3dfa91c09e9ae89 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:14 +0200 Subject: [PATCH 11/26] perf auxtrace: Move perf_evsel__find_pmu() Move perf_evsel__find_pmu() so it can be used without forward declaration. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c555c3ccd79d6..263d1d9d89875 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -57,6 +57,18 @@ #include "symbol/kallsyms.h" #include +static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu->type == evsel->core.attr.type) + break; + } + + return pmu; +} + static bool auxtrace__dont_decode(struct perf_session *session) { return !session->itrace_synth_opts || @@ -2180,18 +2192,6 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter, return err; } -static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - return pmu; -} - static int perf_evsel__nr_addr_filter(struct evsel *evsel) { struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); From f0bb7ee8530a07d3c23bd2e06984796e66cfbcf1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:15 +0200 Subject: [PATCH 12/26] perf auxtrace: Add support for AUX area sample recording Add support for parsing and validating AUX area sample options. At present, the only option is the sample size, but it is also necessary to ensure that events are in a group with an AUX area event as the leader. Committer note: Add missing 'static inline' in front of auxtrace_parse_sample_options() for when we don't HAVE_AUXTRACE_SUPPORT. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 107 +++++++++++++++++++++++++++++++++++++ tools/perf/util/auxtrace.h | 17 ++++++ tools/perf/util/pmu.h | 1 + tools/perf/util/record.h | 2 + 4 files changed, 127 insertions(+) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 263d1d9d89875..51fbe01f8a119 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -69,6 +69,13 @@ static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) return pmu; } +static bool perf_evsel__is_aux_event(struct evsel *evsel) +{ + struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + + return pmu && pmu->auxtrace; +} + static bool auxtrace__dont_decode(struct perf_session *session) { return !session->itrace_synth_opts || @@ -609,6 +616,106 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, return -EINVAL; } +/* + * Event record size is 16-bit which results in a maximum size of about 64KiB. + * Allow about 4KiB for the rest of the sample record, to give a maximum + * AUX area sample size of 60KiB. + */ +#define MAX_AUX_SAMPLE_SIZE (60 * 1024) + +/* Arbitrary default size if no other default provided */ +#define DEFAULT_AUX_SAMPLE_SIZE (4 * 1024) + +static int auxtrace_validate_aux_sample_size(struct evlist *evlist, + struct record_opts *opts) +{ + struct evsel *evsel; + bool has_aux_leader = false; + u32 sz; + + evlist__for_each_entry(evlist, evsel) { + sz = evsel->core.attr.aux_sample_size; + if (perf_evsel__is_group_leader(evsel)) { + has_aux_leader = perf_evsel__is_aux_event(evsel); + if (sz) { + if (has_aux_leader) + pr_err("Cannot add AUX area sampling to an AUX area event\n"); + else + pr_err("Cannot add AUX area sampling to a group leader\n"); + return -EINVAL; + } + } + if (sz > MAX_AUX_SAMPLE_SIZE) { + pr_err("AUX area sample size %u too big, max. %d\n", + sz, MAX_AUX_SAMPLE_SIZE); + return -EINVAL; + } + if (sz) { + if (!has_aux_leader) { + pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n"); + return -EINVAL; + } + perf_evsel__set_sample_bit(evsel, AUX); + opts->auxtrace_sample_mode = true; + } else { + perf_evsel__reset_sample_bit(evsel, AUX); + } + } + + if (!opts->auxtrace_sample_mode) { + pr_err("AUX area sampling requires an AUX area event group leader plus other events to which to add samples\n"); + return -EINVAL; + } + + if (!perf_can_aux_sample()) { + pr_err("AUX area sampling is not supported by kernel\n"); + return -EINVAL; + } + + return 0; +} + +int auxtrace_parse_sample_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts, const char *str) +{ + bool has_aux_leader = false; + struct evsel *evsel; + char *endptr; + unsigned long sz; + + if (!str) + return 0; + + if (!itr) { + pr_err("No AUX area event to sample\n"); + return -EINVAL; + } + + sz = strtoul(str, &endptr, 0); + if (*endptr || sz > UINT_MAX) { + pr_err("Bad AUX area sampling option: '%s'\n", str); + return -EINVAL; + } + + if (!sz) + sz = itr->default_aux_sample_size; + + if (!sz) + sz = DEFAULT_AUX_SAMPLE_SIZE; + + /* Set aux_sample_size based on --aux-sample option */ + evlist__for_each_entry(evlist, evsel) { + if (perf_evsel__is_group_leader(evsel)) { + has_aux_leader = perf_evsel__is_aux_event(evsel); + } else if (has_aux_leader) { + evsel->core.attr.aux_sample_size = sz; + } + } + + return auxtrace_validate_aux_sample_size(evlist, opts); +} + struct auxtrace_record *__weak auxtrace_record__init(struct evlist *evlist __maybe_unused, int *err) { diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 3f4aa5427d76b..ab48de13c3535 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -313,6 +313,7 @@ struct auxtrace_mmap_params { * @reference: provide a 64-bit reference number for auxtrace_event * @read_finish: called after reading from an auxtrace mmap * @alignment: alignment (if any) for AUX area data + * @default_aux_sample_size: default sample size for --aux sample option */ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, @@ -336,6 +337,7 @@ struct auxtrace_record { u64 (*reference)(struct auxtrace_record *itr); int (*read_finish)(struct auxtrace_record *itr, int idx); unsigned int alignment; + unsigned int default_aux_sample_size; }; /** @@ -498,6 +500,9 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, struct record_opts *opts, const char *str); +int auxtrace_parse_sample_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts, const char *str); int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts); @@ -648,6 +653,18 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, return -EINVAL; } +static inline +int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused, + struct evlist *evlist __maybe_unused, + struct record_opts *opts __maybe_unused, + const char *str) +{ + if (!str) + return 0; + pr_err("AUX area tracing not supported\n"); + return -EINVAL; +} + static inline int auxtrace__process_event(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 3e8cd31a89ccb..2eb7a70013077 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -26,6 +26,7 @@ struct perf_pmu { __u32 type; bool selectable; bool is_uncore; + bool auxtrace; int max_precise; struct perf_event_attr *default_config; struct perf_cpu_map *cpus; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 948bbcf9aef3f..5421fd2ad3831 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -32,6 +32,7 @@ struct record_opts { bool full_auxtrace; bool auxtrace_snapshot_mode; bool auxtrace_snapshot_on_exit; + bool auxtrace_sample_mode; bool record_namespaces; bool record_switch_events; bool all_kernel; @@ -56,6 +57,7 @@ struct record_opts { u64 user_interval; size_t auxtrace_snapshot_size; const char *auxtrace_snapshot_opts; + const char *auxtrace_sample_opts; bool sample_transaction; unsigned initial_delay; bool use_clockid; From c0a6de06c446f8d173ef53fba361acedd5880b20 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:16 +0200 Subject: [PATCH 13/26] perf record: Add support for AUX area sampling Add a 'perf record' option '--aux-sample' to request AUX area sampling. AUX area sampling uses an overwriting buffer much like snapshot mode, so adjust the AUX buffer mmapping accordingly. To make it easy to queue samples for decoding, synthesize an ID index. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 6 ++++++ tools/perf/builtin-record.c | 21 ++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index ebcba1f95513d..e216d7b529c92 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -433,6 +433,12 @@ can be specified in a string that follows this option: In Snapshot Mode trace data is captured only when signal SIGUSR2 is received and on exit if the above 'e' option is given. +--aux-sample[=OPTIONS]:: +Select AUX area sampling. At least one of the events selected by the -e option +must be an AUX area event. Samples on other events will be created containing +data from the AUX area. Optionally sample size may be specified, otherwise it +defaults to 4KiB. + --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7ab3110b40351..b5063d3b6fd07 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -680,6 +680,11 @@ static int record__auxtrace_init(struct record *rec) if (err) return err; + err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, + rec->opts.auxtrace_sample_opts); + if (err) + return err; + return auxtrace_parse_filters(rec->evlist); } @@ -752,6 +757,8 @@ static int record__mmap_evlist(struct record *rec, struct evlist *evlist) { struct record_opts *opts = &rec->opts; + bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || + opts->auxtrace_sample_mode; char msg[512]; if (opts->affinity != PERF_AFFINITY_SYS) @@ -759,7 +766,7 @@ static int record__mmap_evlist(struct record *rec, if (evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode, + auxtrace_overwrite, opts->nr_cblocks, opts->affinity, opts->mmap_flush, opts->comp_level) < 0) { if (errno == EPERM) { @@ -1046,6 +1053,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && + !rec->opts.auxtrace_sample_mode && record__auxtrace_mmap_read(rec, map) != 0) { rc = -1; goto out; @@ -1321,6 +1329,15 @@ static int record__synthesize(struct record *rec, bool tail) if (err) goto out; + /* Synthesize id_index before auxtrace_info */ + if (rec->opts.auxtrace_sample_mode) { + err = perf_event__synthesize_id_index(tool, + process_synthesized_event, + session->evlist, machine); + if (err) + goto out; + } + if (rec->opts.full_auxtrace) { err = perf_event__synthesize_auxtrace_info(rec->itr, tool, session, process_synthesized_event); @@ -2329,6 +2346,8 @@ static struct option __record_options[] = { parse_clockid), OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, "opts", "AUX area tracing Snapshot Mode", ""), + OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, + "opts", "sample AUX area", ""), OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, From eb7a52d46c6ac95df563f867d526b3d46616b10b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:17 +0200 Subject: [PATCH 14/26] perf record: Add aux-sample-size config term To allow individual events to be selected for AUX area sampling, add aux-sample-size config term. attr.aux_sample_size is updated by auxtrace_parse_sample_options() so that the existing validation will see the value. Any event that has a non-zero aux_sample_size will cause AUX area sampling to be configured, irrespective of the --aux-sample option. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 + tools/perf/util/auxtrace.c | 76 +++++++++++++++++++++++- tools/perf/util/evsel.c | 16 +++++ tools/perf/util/evsel_config.h | 11 ++++ tools/perf/util/parse-events.c | 14 +++++ tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.l | 1 + 7 files changed, 121 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e216d7b529c92..b23a4012a6064 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -62,6 +62,9 @@ OPTIONS like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'. - 'aux-output': Generate AUX records instead of events. This requires that an AUX area event is also provided. + - 'aux-sample-size': Set sample size for AUX area sampling. If the + '--aux-sample' option has been used, set aux-sample-size=0 to disable + AUX area sampling for the event. See the linkperf:perf-list[1] man page for more parameters. diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 51fbe01f8a119..026585b67a3c5 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,6 +31,7 @@ #include "map.h" #include "pmu.h" #include "evsel.h" +#include "evsel_config.h" #include "symbol.h" #include "util/synthetic-events.h" #include "thread_map.h" @@ -76,6 +77,53 @@ static bool perf_evsel__is_aux_event(struct evsel *evsel) return pmu && pmu->auxtrace; } +/* + * Make a group from 'leader' to 'last', requiring that the events were not + * already grouped to a different leader. + */ +static int perf_evlist__regroup(struct evlist *evlist, + struct evsel *leader, + struct evsel *last) +{ + struct evsel *evsel; + bool grp; + + if (!perf_evsel__is_group_leader(leader)) + return -EINVAL; + + grp = false; + evlist__for_each_entry(evlist, evsel) { + if (grp) { + if (!(evsel->leader == leader || + (evsel->leader == evsel && + evsel->core.nr_members <= 1))) + return -EINVAL; + } else if (evsel == leader) { + grp = true; + } + if (evsel == last) + break; + } + + grp = false; + evlist__for_each_entry(evlist, evsel) { + if (grp) { + if (evsel->leader != leader) { + evsel->leader = leader; + if (leader->core.nr_members < 1) + leader->core.nr_members = 1; + leader->core.nr_members += 1; + } + } else if (evsel == leader) { + grp = true; + } + if (evsel == last) + break; + } + + return 0; +} + static bool auxtrace__dont_decode(struct perf_session *session) { return !session->itrace_synth_opts || @@ -679,13 +727,16 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts, const char *str) { + struct perf_evsel_config_term *term; + struct evsel *aux_evsel; + bool has_aux_sample_size = false; bool has_aux_leader = false; struct evsel *evsel; char *endptr; unsigned long sz; if (!str) - return 0; + goto no_opt; if (!itr) { pr_err("No AUX area event to sample\n"); @@ -712,6 +763,29 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, evsel->core.attr.aux_sample_size = sz; } } +no_opt: + aux_evsel = NULL; + /* Override with aux_sample_size from config term */ + evlist__for_each_entry(evlist, evsel) { + if (perf_evsel__is_aux_event(evsel)) + aux_evsel = evsel; + term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); + if (term) { + has_aux_sample_size = true; + evsel->core.attr.aux_sample_size = term->val.aux_sample_size; + /* If possible, group with the AUX event */ + if (aux_evsel && evsel->core.attr.aux_sample_size) + perf_evlist__regroup(evlist, aux_evsel, evsel); + } + } + + if (!str && !has_aux_sample_size) + return 0; + + if (!itr) { + pr_err("No AUX area event to sample\n"); + return -EINVAL; + } return auxtrace_validate_aux_sample_size(evlist, opts); } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 772f4879c4923..ad7665a546cf1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -846,6 +846,9 @@ static void apply_config_terms(struct evsel *evsel, case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT: attr->aux_output = term->val.aux_output ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + /* Already applied by auxtrace */ + break; default: break; } @@ -905,6 +908,19 @@ static bool is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } +struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, + enum evsel_term_type type) +{ + struct perf_evsel_config_term *term, *found_term = NULL; + + list_for_each_entry(term, &evsel->config_terms, list) { + if (term->type == type) + found_term = term; + } + + return found_term; +} + /* * The enable_on_exec/disabled value strategy: * diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 8a7648037c18e..6e654ede8fbe2 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -25,6 +25,7 @@ enum evsel_term_type { PERF_EVSEL__CONFIG_TERM_BRANCH, PERF_EVSEL__CONFIG_TERM_PERCORE, PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, + PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, }; struct perf_evsel_config_term { @@ -44,7 +45,17 @@ struct perf_evsel_config_term { unsigned long max_events; bool percore; bool aux_output; + u32 aux_sample_size; } val; bool weak; }; + +struct evsel; + +struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, + enum evsel_term_type type); + +#define perf_evsel__get_config_term(evsel, type) \ + __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type) + #endif // __PERF_EVSEL_CONFIG_H diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6bae9d6edc121..fc5e27bc8315b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -996,6 +996,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", + [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", }; static bool config_term_shrinked; @@ -1126,6 +1127,15 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: + CHECK_TYPE_VAL(NUM); + if (term->val.num > UINT_MAX) { + parse_events__handle_error(err, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; default: parse_events__handle_error(err, term->err_term, strdup("unknown term"), @@ -1177,6 +1187,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: return config_term_common(attr, term, err); default: if (err) { @@ -1272,6 +1283,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0); break; + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: + ADD_CONFIG_TERM(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num); + break; default: break; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ff367f248fe82..27596cbd0ba06 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -77,6 +77,7 @@ enum { PARSE_EVENTS__TERM_TYPE_DRV_CFG, PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, + PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7469497cd28e4..7b1c8ee537cf6 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -285,6 +285,7 @@ overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } +aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } From ba2675bf15fc3ec1d54b9bf938cf5b28392f79fb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:18 +0200 Subject: [PATCH 15/26] perf inject: Cut AUX area samples After decoding AUX area samples, the AUX area data is no longer needed (having been replaced by synthesized events) so cut it out. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 1e5d28311e143..9664a72a089da 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -45,6 +45,7 @@ struct perf_inject { u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; + char event_copy[PERF_SAMPLE_MAX_SIZE]; }; struct event_entry { @@ -214,6 +215,28 @@ static int perf_event__drop_aux(struct perf_tool *tool, return 0; } +static union perf_event * +perf_inject__cut_auxtrace_sample(struct perf_inject *inject, + union perf_event *event, + struct perf_sample *sample) +{ + size_t sz1 = sample->aux_sample.data - (void *)event; + size_t sz2 = event->header.size - sample->aux_sample.size - sz1; + union perf_event *ev = (union perf_event *)inject->event_copy; + + if (sz1 > event->header.size || sz2 > event->header.size || + sz1 + sz2 > event->header.size || + sz1 < sizeof(struct perf_event_header) + sizeof(u64)) + return event; + + memcpy(ev, event, sz1); + memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); + ev->header.size = sz1 + sz2; + ((u64 *)((void *)ev + sz1))[-1] = 0; + + return ev; +} + typedef int (*inject_handler)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -226,6 +249,9 @@ static int perf_event__repipe_sample(struct perf_tool *tool, struct evsel *evsel, struct machine *machine) { + struct perf_inject *inject = container_of(tool, struct perf_inject, + tool); + if (evsel && evsel->handler) { inject_handler f = evsel->handler; return f(tool, event, sample, evsel, machine); @@ -233,6 +259,9 @@ static int perf_event__repipe_sample(struct perf_tool *tool, build_id__mark_dso_hit(tool, event, sample, evsel, machine); + if (inject->itrace_synth_opts.set && sample->aux_sample.size) + event = perf_inject__cut_auxtrace_sample(inject, event, sample); + return perf_event__repipe_synth(tool, event); } From b04b8dd1e4265525dbd74647f747e63e85540189 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:19 +0200 Subject: [PATCH 16/26] perf auxtrace: Add support for dumping AUX area samples Add support for dumping AUX area samples i.e. via the perf script/report -D (--dump-raw-trace) option. Committer notes: Add __maybe_unused to the two args for auxtrace__dump_auxtrace_sample() for when we don't HAVE_AUXTRACE_SUPPORT. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 10 ++++++++++ tools/perf/util/auxtrace.h | 11 +++++++++++ tools/perf/util/session.c | 9 +++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 026585b67a3c5..4f5c5fe3516b6 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2417,6 +2417,16 @@ int auxtrace__process_event(struct perf_session *session, union perf_event *even return session->auxtrace->process_event(session, event, sample, tool); } +void auxtrace__dump_auxtrace_sample(struct perf_session *session, + struct perf_sample *sample) +{ + if (!session->auxtrace || !session->auxtrace->dump_auxtrace_sample || + auxtrace__dont_decode(session)) + return; + + session->auxtrace->dump_auxtrace_sample(session, sample); +} + int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool) { if (!session->auxtrace) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index ab48de13c3535..4a8ac7de6e22d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -141,6 +141,7 @@ struct auxtrace_index { * struct auxtrace - session callbacks to allow AUX area data decoding. * @process_event: lets the decoder see all session events * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event + * @dump_auxtrace_sample: dump AUX area sample data * @flush_events: process any remaining data * @free_events: free resources associated with event processing * @free: free resources associated with the session @@ -153,6 +154,8 @@ struct auxtrace { int (*process_auxtrace_event)(struct perf_session *session, union perf_event *event, struct perf_tool *tool); + void (*dump_auxtrace_sample)(struct perf_session *session, + struct perf_sample *sample); int (*flush_events)(struct perf_session *session, struct perf_tool *tool); void (*free_events)(struct perf_session *session); @@ -555,6 +558,8 @@ int auxtrace_parse_filters(struct evlist *evlist); int auxtrace__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool); +void auxtrace__dump_auxtrace_sample(struct perf_session *session, + struct perf_sample *sample); int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); @@ -674,6 +679,12 @@ int auxtrace__process_event(struct perf_session *session __maybe_unused, return 0; } +static inline +void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused, + struct perf_sample *sample __maybe_unused) +{ +} + static inline int auxtrace__flush_events(struct perf_session *session __maybe_unused, struct perf_tool *tool __maybe_unused) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbdb47624dec1..ab4dae1efea3e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1496,8 +1496,13 @@ static int perf_session__deliver_event(struct perf_session *session, if (ret > 0) return 0; - return machines__deliver_event(&session->machines, session->evlist, - event, &sample, tool, file_offset); + ret = machines__deliver_event(&session->machines, session->evlist, + event, &sample, tool, file_offset); + + if (dump_trace && sample.aux_sample.size) + auxtrace__dump_auxtrace_sample(session, &sample); + + return ret; } static s64 perf_session__process_user_event(struct perf_session *session, From 103ed40e4bfa6986d80983b3e67be9d2f61fc9ee Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:20 +0200 Subject: [PATCH 17/26] perf session: Add facility to peek at all events AUX area samples are not limited in how far back in time the sample could start. Consequently samples must be queued in advance to allow for time-ordered processing. To achieve that, add perf_session__peek_events() that walks and peeks at all the events. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/session.h | 5 +++++ 2 files changed, 33 insertions(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ab4dae1efea3e..d0d7d25b23e3b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1659,6 +1659,34 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, return 0; } +int perf_session__peek_events(struct perf_session *session, u64 offset, + u64 size, peek_events_cb_t cb, void *data) +{ + u64 max_offset = offset + size; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *event; + int err; + + do { + err = perf_session__peek_event(session, offset, buf, + PERF_SAMPLE_MAX_SIZE, &event, + NULL); + if (err) + return err; + + err = cb(session, event, offset, data); + if (err) + return err; + + offset += event->header.size; + if (event->header.type == PERF_RECORD_AUXTRACE) + offset += event->auxtrace.size; + + } while (offset < max_offset); + + return err; +} + static s64 perf_session__process_event(struct perf_session *session, union perf_event *event, u64 file_offset) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 8456e1d868fda..f76480166d38e 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -64,6 +64,11 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, void *buf, size_t buf_sz, union perf_event **event_ptr, struct perf_sample *sample); +typedef int (*peek_events_cb_t)(struct perf_session *session, + union perf_event *event, u64 offset, + void *data); +int perf_session__peek_events(struct perf_session *session, u64 offset, + u64 size, peek_events_cb_t cb, void *data); int perf_session__process_events(struct perf_session *session); From ac2f445fc8989e152dc35eb7af368fd34b92e48a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:21 +0200 Subject: [PATCH 18/26] perf auxtrace: Add support for queuing AUX area samples Add functions to queue AUX area samples in advance (auxtrace_queue_data()) or individually (auxtrace_queues__add_sample()) or find out what queue a sample belongs on (auxtrace_queues__sample_queue()). auxtrace_queue_data() can also queue snapshot data which keeps snapshots and samples ordered with respect to each other in case support for that is desired. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 107 +++++++++++++++++++++++++++++++++++++ tools/perf/util/auxtrace.h | 15 ++++++ 2 files changed, 122 insertions(+) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 4f5c5fe3516b6..eb087e7df6f4b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1004,6 +1004,113 @@ struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, } } +struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *queues, + struct perf_sample *sample, + struct perf_session *session) +{ + struct perf_sample_id *sid; + unsigned int idx; + u64 id; + + id = sample->id; + if (!id) + return NULL; + + sid = perf_evlist__id2sid(session->evlist, id); + if (!sid) + return NULL; + + idx = sid->idx; + + if (idx >= queues->nr_queues) + return NULL; + + return &queues->queue_array[idx]; +} + +int auxtrace_queues__add_sample(struct auxtrace_queues *queues, + struct perf_session *session, + struct perf_sample *sample, u64 data_offset, + u64 reference) +{ + struct auxtrace_buffer buffer = { + .pid = -1, + .data_offset = data_offset, + .reference = reference, + .size = sample->aux_sample.size, + }; + struct perf_sample_id *sid; + u64 id = sample->id; + unsigned int idx; + + if (!id) + return -EINVAL; + + sid = perf_evlist__id2sid(session->evlist, id); + if (!sid) + return -ENOENT; + + idx = sid->idx; + buffer.tid = sid->tid; + buffer.cpu = sid->cpu; + + return auxtrace_queues__add_buffer(queues, session, idx, &buffer, NULL); +} + +struct queue_data { + bool samples; + bool events; +}; + +static int auxtrace_queue_data_cb(struct perf_session *session, + union perf_event *event, u64 offset, + void *data) +{ + struct queue_data *qd = data; + struct perf_sample sample; + int err; + + if (qd->events && event->header.type == PERF_RECORD_AUXTRACE) { + if (event->header.size < sizeof(struct perf_record_auxtrace)) + return -EINVAL; + offset += event->header.size; + return session->auxtrace->queue_data(session, NULL, event, + offset); + } + + if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) + return 0; + + err = perf_evlist__parse_sample(session->evlist, event, &sample); + if (err) + return err; + + if (!sample.aux_sample.size) + return 0; + + offset += sample.aux_sample.data - (void *)event; + + return session->auxtrace->queue_data(session, &sample, NULL, offset); +} + +int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) +{ + struct queue_data qd = { + .samples = samples, + .events = events, + }; + + if (auxtrace__dont_decode(session)) + return 0; + + if (!session->auxtrace || !session->auxtrace->queue_data) + return -EINVAL; + + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + auxtrace_queue_data_cb, &qd); +} + void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd) { size_t adj = buffer->data_offset & (page_size - 1); diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 4a8ac7de6e22d..749d72cd9c7b0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -141,6 +141,8 @@ struct auxtrace_index { * struct auxtrace - session callbacks to allow AUX area data decoding. * @process_event: lets the decoder see all session events * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event + * @queue_data: queue an AUX sample or PERF_RECORD_AUXTRACE event for later + * processing * @dump_auxtrace_sample: dump AUX area sample data * @flush_events: process any remaining data * @free_events: free resources associated with event processing @@ -154,6 +156,9 @@ struct auxtrace { int (*process_auxtrace_event)(struct perf_session *session, union perf_event *event, struct perf_tool *tool); + int (*queue_data)(struct perf_session *session, + struct perf_sample *sample, union perf_event *event, + u64 data_offset); void (*dump_auxtrace_sample)(struct perf_session *session, struct perf_sample *sample); int (*flush_events)(struct perf_session *session, @@ -467,9 +472,19 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct perf_session *session, union perf_event *event, off_t data_offset, struct auxtrace_buffer **buffer_ptr); +struct auxtrace_queue * +auxtrace_queues__sample_queue(struct auxtrace_queues *queues, + struct perf_sample *sample, + struct perf_session *session); +int auxtrace_queues__add_sample(struct auxtrace_queues *queues, + struct perf_session *session, + struct perf_sample *sample, u64 data_offset, + u64 reference); void auxtrace_queues__free(struct auxtrace_queues *queues); int auxtrace_queues__process_index(struct auxtrace_queues *queues, struct perf_session *session); +int auxtrace_queue_data(struct perf_session *session, bool samples, + bool events); struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, struct auxtrace_buffer *buffer); void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd); From a1ac7de6902c1ea6def7a743f1d2e6ba429684b3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:22 +0200 Subject: [PATCH 19/26] perf pmu: When using default config, record which bits of config were changed by the user Default config for a PMU is defined before selected events are parsed. That allows the user-entered config to override the default config. However that does not allow for changing the default config based on other options. For example, if the user chooses AUX area sampling mode, in the case of Intel PT, the psb_period needs to be small for sampling, so there is a need to set the default psb_period to 0 (2 KiB) in that case. However that should not override a value set by the user. To allow for that, when using default config, record which bits of config were changed by the user. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 ++ tools/perf/util/evsel_config.h | 2 ++ tools/perf/util/parse-events.c | 42 +++++++++++++++++++++++++++++++++- tools/perf/util/pmu.c | 10 ++++++++ tools/perf/util/pmu.h | 1 + 5 files changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ad7665a546cf1..f4dea055b0808 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -849,6 +849,8 @@ static void apply_config_terms(struct evsel *evsel, case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: /* Already applied by auxtrace */ break; + case PERF_EVSEL__CONFIG_TERM_CFG_CHG: + break; default: break; } diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 6e654ede8fbe2..1f8d2fe0b66ed 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -26,6 +26,7 @@ enum evsel_term_type { PERF_EVSEL__CONFIG_TERM_PERCORE, PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, + PERF_EVSEL__CONFIG_TERM_CFG_CHG, }; struct perf_evsel_config_term { @@ -46,6 +47,7 @@ struct perf_evsel_config_term { bool percore; bool aux_output; u32 aux_sample_size; + u64 cfg_chg; } val; bool weak; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index fc5e27bc8315b..6c313c4087edc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1290,7 +1290,40 @@ do { \ break; } } -#undef ADD_EVSEL_CONFIG + return 0; +} + +/* + * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for + * each bit of attr->config that the user has changed. + */ +static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, + struct list_head *head_terms) +{ + struct parse_events_term *term; + u64 bits = 0; + int type; + + list_for_each_entry(term, head_config, list) { + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_USER: + type = perf_pmu__format_type(&pmu->format, term->config); + if (type != PERF_PMU_FORMAT_VALUE_CONFIG) + continue; + bits |= perf_pmu__format_bits(&pmu->format, term->config); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG: + bits = ~(u64)0; + break; + default: + break; + } + } + + if (bits) + ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits); + +#undef ADD_CONFIG_TERM return 0; } @@ -1419,6 +1452,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; + /* + * When using default config, record which bits of attr->config were + * changed by the user. + */ + if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) + return -ENOMEM; + if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { struct perf_evsel_config_term *pos, *tmp; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index db1e57113f4ba..e8d3489880260 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -931,6 +931,16 @@ __u64 perf_pmu__format_bits(struct list_head *formats, const char *name) return bits; } +int perf_pmu__format_type(struct list_head *formats, const char *name) +{ + struct perf_pmu_format *format = pmu_find_format(formats, name); + + if (!format) + return -1; + + return format->value; +} + /* * Sets value based on the format definition (format parameter) * and unformated value (value parameter). diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 2eb7a70013077..6737e3d5d568c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -72,6 +72,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct list_head *head_terms, bool zero, struct parse_events_error *error); __u64 perf_pmu__format_bits(struct list_head *formats, const char *name); +int perf_pmu__format_type(struct list_head *formats, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, From c4ab2f0f763da64d88cec6f20fd664f2347eca60 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:23 +0200 Subject: [PATCH 20/26] perf intel-pt: Add support for recording AUX area samples Set up the default number of mmap pages, default sample size and default psb_period for AUX area sampling. Add documentation also. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 59 ++++++++++++++++++- tools/perf/arch/x86/util/auxtrace.c | 2 + tools/perf/arch/x86/util/intel-pt.c | 81 ++++++++++++++++++++++++++- 3 files changed, 139 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index e0d9e7dd4f176..2cf2d9e9d0da1 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -434,6 +434,56 @@ pwr_evt Enable power events. The power events provide information about "0" otherwise. +AUX area sampling option +------------------------ + +To select Intel PT "sampling" the AUX area sampling option can be used: + + --aux-sample + +Optionally it can be followed by the sample size in bytes e.g. + + --aux-sample=8192 + +In addition, the Intel PT event to sample must be defined e.g. + + -e intel_pt//u + +Samples on other events will be created containing Intel PT data e.g. the +following will create Intel PT samples on the branch-misses event, note the +events must be grouped using {}: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' + +An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to +events. In this case, the grouping is implied e.g. + + perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u + +is the same as: + + perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' + +but allows for also using an address filter e.g.: + + perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls + +It is important to select a sample size that is big enough to contain at least +one PSB packet. If not a warning will be displayed: + + Intel PT sample size (%zu) may be too small for PSB period (%zu) + +The calculation used for that is: if sample_size <= psb_period + 256 display the +warning. When sampling is used, psb_period defaults to 0 (2KiB). + +The default sample size is 4KiB. + +The sample size is passed in aux_sample_size in struct perf_event_attr. The +sample size is limited by the maximum event size which is 64KiB. It is +difficult to know how big the event might be without the trace sample attached, +but the tool validates that the sample size is not greater than 60KiB. + + new snapshot option ------------------- @@ -487,8 +537,8 @@ their mlock limit (which defaults to 64KiB but is not multiplied by the number of cpus). In full-trace mode, powers of two are allowed for buffer size, with a minimum -size of 2 pages. In snapshot mode, it is the same but the minimum size is -1 page. +size of 2 pages. In snapshot mode or sampling mode, it is the same but the +minimum size is 1 page. The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. @@ -501,12 +551,17 @@ Intel PT modes of operation Intel PT can be used in 2 modes: full-trace mode + sample mode snapshot mode Full-trace mode traces continuously e.g. perf record -e intel_pt//u uname +Sample mode attaches a Intel PT sample to other events e.g. + + perf record --aux-sample -e intel_pt//u -e branch-misses:u + Snapshot mode captures the available data when a signal is sent e.g. perf record -v -e intel_pt//u -S ./loopy 1000000000 & diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 96f4a2c118937..092543cad3245 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -26,6 +26,8 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, bool found_bts = false; intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + if (intel_pt_pmu) + intel_pt_pmu->auxtrace = true; intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); evlist__for_each_entry(evlist, evsel) { diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index d6d26256915f8..20df442fdf36d 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -17,6 +17,7 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/evsel_config.h" #include "../../util/cpumap.h" #include "../../util/mmap.h" #include @@ -551,6 +552,43 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, evsel->core.attr.config); } +static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu, + struct evsel *evsel) +{ + struct perf_evsel_config_term *term; + u64 user_bits = 0, bits; + + term = perf_evsel__get_config_term(evsel, CFG_CHG); + if (term) + user_bits = term->val.cfg_chg; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period"); + + /* Did user change psb_period */ + if (bits & user_bits) + return; + + /* Set psb_period to 0 */ + evsel->core.attr.config &= ~bits; +} + +static void intel_pt_min_max_sample_sz(struct evlist *evlist, + size_t *min_sz, size_t *max_sz) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + size_t sz = evsel->core.attr.aux_sample_size; + + if (!sz) + continue; + if (min_sz && (sz < *min_sz || !*min_sz)) + *min_sz = sz; + if (max_sz && sz > *max_sz) + *max_sz = sz; + } +} + /* * Currently, there is not enough information to disambiguate different PEBS * events, so only allow one. @@ -606,6 +644,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, return -EINVAL; } + if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) { + pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n"); + return -EINVAL; + } + if (opts->use_clockid) { pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); return -EINVAL; @@ -617,6 +660,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; + if (opts->auxtrace_sample_mode) + intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel); + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); if (err) return err; @@ -666,6 +712,34 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, opts->auxtrace_snapshot_size, psb_period); } + /* Set default sizes for sample mode */ + if (opts->auxtrace_sample_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + size_t min_sz = 0, max_sz = 0; + + intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz); + if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = round_up(max_sz, page_size) / page_size; + + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n", + max_sz, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n", + min_sz, max_sz); + if (psb_period && + min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n", + min_sz, psb_period); + } + /* Set default sizes for full trace mode */ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { if (privileged) { @@ -682,7 +756,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; size_t min_sz; - if (opts->auxtrace_snapshot_mode) + if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode) min_sz = KiB(4); else min_sz = KiB(8); @@ -1136,5 +1210,10 @@ struct auxtrace_record *intel_pt_recording_init(int *err) ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; ptr->itr.reference = intel_pt_reference; ptr->itr.read_finish = intel_pt_read_finish; + /* + * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K + * should give at least 1 PSB per sample. + */ + ptr->itr.default_aux_sample_size = 4096; return &ptr->itr; } From dbd134322e74f19dbabf174b2cbf7fca9bbc34d3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:24 +0200 Subject: [PATCH 21/26] perf intel-pt: Add support for decoding AUX area samples Add support for dumping, queuing and decoding AUX area samples. Decoding samples is the same as regular decoding, except in the case where there are no timestamps, in which case buffers are decoded immediately before the sample event. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-15-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 109 ++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a1c9eb6d4f40d..409afc611be98 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -233,6 +233,16 @@ static void intel_pt_log_event(union perf_event *event) perf_event__fprintf(event, f); } +static void intel_pt_dump_sample(struct perf_session *session, + struct perf_sample *sample) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + printf("\n"); + intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -836,6 +846,18 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) return have_tsc; } +static bool intel_pt_sampling_mode(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) && + evsel->core.attr.aux_sample_size) + return true; + } + return false; +} + static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) { u64 quot, rem; @@ -2320,6 +2342,56 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, return 0; } +static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct perf_sample *sample) +{ + struct machine *m = ptq->pt->machine; + + ptq->pid = sample->pid; + ptq->tid = sample->tid; + ptq->cpu = queue->cpu; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid == -1) + return; + + if (ptq->pid == -1) { + ptq->thread = machine__find_thread(m, -1, ptq->tid); + if (ptq->thread) + ptq->pid = ptq->thread->pid_; + return; + } + + ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid); +} + +static int intel_pt_process_timeless_sample(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + u64 ts = 0; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + if (!queue) + return -EINVAL; + + ptq = queue->priv; + if (!ptq) + return 0; + + ptq->stop = false; + ptq->time = sample->time; + intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample); + intel_pt_run_decoder(ptq, &ts); + return 0; +} + static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, @@ -2550,7 +2622,11 @@ static int intel_pt_process_event(struct perf_session *session, } if (pt->timeless_decoding) { - if (event->header.type == PERF_RECORD_EXIT) { + if (pt->sampling_mode) { + if (sample->aux_sample.size) + err = intel_pt_process_timeless_sample(pt, + sample); + } else if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, event->fork.tid, sample->time); @@ -2676,6 +2752,28 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session, return 0; } +static int intel_pt_queue_data(struct perf_session *session, + struct perf_sample *sample, + union perf_event *event, u64 data_offset) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + + if (event) { + return auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, NULL); + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + return auxtrace_queues__add_sample(&pt->queues, session, sample, + data_offset, timestamp); +} + struct intel_pt_synth { struct perf_tool dummy_tool; struct perf_session *session; @@ -3178,7 +3276,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (pt->timeless_decoding && !pt->tc.time_mult) pt->tc.time_mult = 1; pt->have_tsc = intel_pt_have_tsc(pt); - pt->sampling_mode = false; + pt->sampling_mode = intel_pt_sampling_mode(pt); pt->est_tsc = !pt->timeless_decoding; pt->unknown_thread = thread__new(999999999, 999999999); @@ -3205,6 +3303,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->auxtrace.process_event = intel_pt_process_event; pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.queue_data = intel_pt_queue_data; + pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample; pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; @@ -3282,7 +3382,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_setup_pebs_events(pt); - err = auxtrace_queues__process_index(&pt->queues, session); + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) + err = auxtrace_queue_data(session, true, true); + else + err = auxtrace_queues__process_index(&pt->queues, session); if (err) goto err_delete_thread; From 32a1ece4bdbde24734ab16484bad7316f03fc42d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:25 +0200 Subject: [PATCH 22/26] perf intel-bts: Does not support AUX area sampling Add an error message because Intel BTS does not support AUX area sampling. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-16-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/auxtrace.c | 2 ++ tools/perf/arch/x86/util/intel-bts.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 092543cad3245..7abc9fd4cbec4 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -29,6 +29,8 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, if (intel_pt_pmu) intel_pt_pmu->auxtrace = true; intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + if (intel_bts_pmu) + intel_bts_pmu->auxtrace = true; evlist__for_each_entry(evlist, evsel) { if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type) diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index f7f68a50a5cd5..27d9e214d0680 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -113,6 +113,11 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, const struct perf_cpu_map *cpus = evlist->core.cpus; bool privileged = perf_event_paranoid_check(-1); + if (opts->auxtrace_sample_mode) { + pr_err("Intel BTS does not support AUX area sampling\n"); + return -EINVAL; + } + btsr->evlist = evlist; btsr->snapshot_mode = opts->auxtrace_snapshot_mode; From 68401a1799fa14cb72c2a129bbefdacd44279772 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 14 Nov 2019 13:37:19 +0000 Subject: [PATCH 23/26] libtraceevent: Fix header installation When we passed some location in DESTDIR, install_headers called do_install with DESTDIR as part of the second argument. But do_install is again using '$(DESTDIR_SQ)$2', so as a result the headers were installed in a location $DESTDIR/$DESTDIR. In my testing I passed DESTDIR=/home/sudip/test and the headers were installed in: /home/sudip/test/home/sudip/test/usr/include/traceevent. Lets remove DESTDIR from the second argument of do_install so that the headers are installed in the correct location. Signed-off-by: Sudipm Mukherjee Reviewed-by: Steven Rostedt (VMware) Cc: Sudipm Mukherjee Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20191114133719.309-1-sudipm.mukherjee@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 5315f3787f8d6..cbb429f550625 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -232,10 +232,10 @@ install_pkgconfig: install_headers: $(call QUIET_INSTALL, headers) \ - $(call do_install,event-parse.h,$(DESTDIR)$(includedir_SQ),644); \ - $(call do_install,event-utils.h,$(DESTDIR)$(includedir_SQ),644); \ - $(call do_install,trace-seq.h,$(DESTDIR)$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(DESTDIR)$(includedir_SQ),644) + $(call do_install,event-parse.h,$(includedir_SQ),644); \ + $(call do_install,event-utils.h,$(includedir_SQ),644); \ + $(call do_install,trace-seq.h,$(includedir_SQ),644); \ + $(call do_install,kbuffer.h,$(includedir_SQ),644) install: install_lib From 10992af6bf46a2048ad964985a5b77464e5563b1 Mon Sep 17 00:00:00 2001 From: Hewenliang Date: Mon, 18 Nov 2019 20:44:15 -0500 Subject: [PATCH 24/26] libtraceevent: Fix memory leakage in copy_filter_type It is necessary to free the memory that we have allocated when error occurs. Fixes: ef3072cd1d5c ("tools lib traceevent: Get rid of die in add_filter_type()") Signed-off-by: Hewenliang Reviewed-by: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov Link: http://lore.kernel.org/lkml/20191119014415.57210-1-hewenliang4@huawei.com Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/parse-filter.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 552592d153fb8..f3cbf86e51acf 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1473,8 +1473,10 @@ static int copy_filter_type(struct tep_event_filter *filter, if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { /* Add trivial event */ arg = allocate_arg(); - if (arg == NULL) + if (arg == NULL) { + free(str); return -1; + } arg->type = TEP_FILTER_ARG_BOOLEAN; if (strcmp(str, "TRUE") == 0) @@ -1483,8 +1485,11 @@ static int copy_filter_type(struct tep_event_filter *filter, arg->boolean.value = 0; filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) + if (filter_type == NULL) { + free(str); + free_arg(arg); return -1; + } filter_type->filter = arg; From 358f98ee8a3578bbf464ac767b726c5de1ce0647 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Nov 2019 09:26:23 +0000 Subject: [PATCH 25/26] perf probe: Fix spelling mistake "addrees" -> "address" There is a spelling mistake in a pr_warning message. Fix it. Signed-off-by: Colin King Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20191121092623.374896-1-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 38d6cd22779f0..c470c49a804fd 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -812,7 +812,7 @@ static int verify_representive_line(struct probe_finder *pf, const char *fname, if (strcmp(fname, __fname) || lineno == __lineno) return 0; - pr_warning("This line is sharing the addrees with other lines.\n"); + pr_warning("This line is sharing the address with other lines.\n"); if (pf->pev->point.function) { /* Find best match function name and lines */ From 4584f084aa9d8033d5911935837dbee7b082d0e9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 20 Nov 2019 10:09:25 -0800 Subject: [PATCH 26/26] perf parse: Fix potential memory leak when handling tracepoint errors An error may be in place when tracepoint_error is called, use parse_events__handle_error to avoid a memory leak and to capture the first and last error. Error detected by LLVM's libFuzzer using the following event: $ perf stat -e 'msr/event/,f:e' event syntax error: 'msr/event/,f:e' \___ can't access trace events Error: No permissions to read /sys/kernel/debug/tracing/events/f/e Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing/' Initial error: event syntax error: 'msr/event/,f:e' \___ no value assigned for term Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20191120180925.21787-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6c313c4087edc..ed7c008b9c8bd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -511,6 +511,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, static void tracepoint_error(struct parse_events_error *e, int err, const char *sys, const char *name) { + const char *str; char help[BUFSIZ]; if (!e) @@ -524,18 +525,18 @@ static void tracepoint_error(struct parse_events_error *e, int err, switch (err) { case EACCES: - e->str = strdup("can't access trace events"); + str = "can't access trace events"; break; case ENOENT: - e->str = strdup("unknown tracepoint"); + str = "unknown tracepoint"; break; default: - e->str = strdup("failed to add tracepoint"); + str = "failed to add tracepoint"; break; } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - e->help = strdup(help); + parse_events__handle_error(e, 0, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx,