From e2f56da1d6670070f6f55d43007cb7b03ee04c2f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 4 Jun 2015 15:50:55 +0200 Subject: [PATCH 01/15] perf stat: Add id into perf_stat struct We need fast way to identify evsel as transaction event for shadow counters computation. Currently we are using possition (in evlist) based way. Adding 'id' into 'struct perf_stat' so it can carry transaction event ID and we can use it for shadow counters computations. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20150604135055.GB23625@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 6 ++---- tools/perf/util/stat.c | 31 ++++++++++++++++++++++++++++++- tools/perf/util/stat.h | 20 ++++++++++++++++++++ 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fd577f725d231..a6ae1007f1f9f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -147,10 +147,6 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); static volatile int done = 0; -struct perf_stat { - struct stats res_stats[3]; -}; - static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -180,6 +176,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) for (i = 0; i < 3; i++) init_stats(&ps->res_stats[i]); + + perf_stat_evsel_id_init(evsel); } static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 6506b3dfb6059..8e9f6bb7581bc 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,6 +1,6 @@ #include - #include "stat.h" +#include "evsel.h" void update_stats(struct stats *stats, u64 val) { @@ -61,3 +61,32 @@ double rel_stddev_stats(double stddev, double avg) return pct; } + +bool __perf_evsel_stat__is(struct perf_evsel *evsel, + enum perf_stat_evsel_id id) +{ + struct perf_stat *ps = evsel->priv; + + return ps->id == id; +} + +#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name +static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { + ID(NONE, x), +}; +#undef ID + +void perf_stat_evsel_id_init(struct perf_evsel *evsel) +{ + struct perf_stat *ps = evsel->priv; + int i; + + /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ + + for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { + if (!strcmp(perf_evsel__name(evsel), id_str[i])) { + ps->id = i; + break; + } + } +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5667fc3e39cf4..f4136cfd3cc9a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -9,6 +9,16 @@ struct stats u64 max, min; }; +enum perf_stat_evsel_id { + PERF_STAT_EVSEL_ID__NONE = 0, + PERF_STAT_EVSEL_ID__MAX, +}; + +struct perf_stat { + struct stats res_stats[3]; + enum perf_stat_evsel_id id; +}; + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -22,4 +32,14 @@ static inline void init_stats(struct stats *stats) stats->min = (u64) -1; stats->max = 0; } + +struct perf_evsel; +bool __perf_evsel_stat__is(struct perf_evsel *evsel, + enum perf_stat_evsel_id id); + +#define perf_stat_evsel__is(evsel, id) \ + __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) + +void perf_stat_evsel_id_init(struct perf_evsel *evsel); + #endif From 4c358d5cf36192f22b8d331779cb92e3ede9cddf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:52 +0200 Subject: [PATCH 02/15] perf stat: Replace transaction event possition check with id check Using perf_stat::id to check for transaction events, instead of current position based way. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 55 +++++---------------------------------- tools/perf/util/stat.c | 6 ++++- tools/perf/util/stat.h | 4 +++ 3 files changed, 16 insertions(+), 49 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a6ae1007f1f9f..514493d703da7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -96,17 +96,6 @@ static const char * const transaction_limited_attrs[] = { "}" }; -/* must match transaction_attrs and the beginning limited_attrs */ -enum { - T_TASK_CLOCK, - T_INSTRUCTIONS, - T_CYCLES, - T_CYCLES_IN_TX, - T_TRANSACTION_START, - T_ELISION_START, - T_CYCLES_IN_TX_CP, -}; - static struct perf_evlist *evsel_list; static struct target target = { @@ -352,29 +341,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } -static struct perf_evsel *nth_evsel(int n) -{ - static struct perf_evsel **array; - static int array_len; - struct perf_evsel *ev; - int j; - - /* Assumes this only called when evsel_list does not change anymore. */ - if (!array) { - evlist__for_each(evsel_list, ev) - array_len++; - array = malloc(array_len * sizeof(void *)); - if (!array) - exit(ENOMEM); - j = 0; - evlist__for_each(evsel_list, ev) - array[j++] = ev; - } - if (n < array_len) - return array[n]; - return NULL; -} - /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, @@ -389,14 +355,11 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); - else if (transaction_run && - perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) + else if (transaction_run && perf_stat_evsel__is(counter, CYCLES_IN_TX)) update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); - else if (transaction_run && - perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) + else if (transaction_run && perf_stat_evsel__is(counter, TRANSACTION_START)) update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); - else if (transaction_run && - perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) + else if (transaction_run && perf_stat_evsel__is(counter, ELISION_START)) update_stats(&runtime_elision_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); @@ -1207,15 +1170,13 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } else { fprintf(output, " "); } - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { + } else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) fprintf(output, " # %5.2f%% transactional cycles ", 100.0 * (avg / total)); - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) { + } else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) @@ -1224,8 +1185,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, " # %5.2f%% aborted cycles ", 100.0 * ((total2-avg) / total)); - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) && + } else if (transaction_run && perf_stat_evsel__is(evsel, TRANSACTION_START) && avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); @@ -1234,8 +1194,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) ratio = total / avg; fprintf(output, " # %8.0f cycles / transaction ", ratio); - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) && + } else if (transaction_run && perf_stat_evsel__is(evsel, ELISION_START) && avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8e9f6bb7581bc..60b92822f6558 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -72,7 +72,11 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { - ID(NONE, x), + ID(NONE, x), + ID(CYCLES_IN_TX, cpu/cycles-t/), + ID(TRANSACTION_START, cpu/tx-start/), + ID(ELISION_START, cpu/el-start/), + ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), }; #undef ID diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f4136cfd3cc9a..3df529bd07743 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -11,6 +11,10 @@ struct stats enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__NONE = 0, + PERF_STAT_EVSEL_ID__CYCLES_IN_TX, + PERF_STAT_EVSEL_ID__TRANSACTION_START, + PERF_STAT_EVSEL_ID__ELISION_START, + PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, PERF_STAT_EVSEL_ID__MAX, }; From a454742c1252d6242e00b5f4f6f9e5fbce3859d7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:53 +0200 Subject: [PATCH 03/15] perf stat: Remove setup_events function We can use already existing parse_events interface. Both transaction_attrs and transaction_limited_attrs are changed to be single strings. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 514493d703da7..0c0071cf4fbad 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -73,8 +73,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix); static void print_aggr(char *prefix); /* Default events used for perf stat -T */ -static const char * const transaction_attrs[] = { - "task-clock", +static const char *transaction_attrs = { + "task-clock," "{" "instructions," "cycles," @@ -86,8 +86,8 @@ static const char * const transaction_attrs[] = { }; /* More limited version when the CPU does not have all events. */ -static const char * const transaction_limited_attrs[] = { - "task-clock", +static const char * transaction_limited_attrs = { + "task-clock," "{" "instructions," "cycles," @@ -1533,17 +1533,6 @@ static int perf_stat_init_aggr_mode(void) return 0; } -static int setup_events(const char * const *attrs, unsigned len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (parse_events(evsel_list, attrs[i], NULL)) - return -1; - } - return 0; -} - /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: @@ -1665,12 +1654,10 @@ static int add_default_attributes(void) int err; if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) - err = setup_events(transaction_attrs, - ARRAY_SIZE(transaction_attrs)); + err = parse_events(evsel_list, transaction_attrs, NULL); else - err = setup_events(transaction_limited_attrs, - ARRAY_SIZE(transaction_limited_attrs)); - if (err < 0) { + err = parse_events(evsel_list, transaction_limited_attrs, NULL); + if (err) { fprintf(stderr, "Cannot set up transaction events\n"); return -1; } From 3e99e2f5e78e601591dbcf777c67a84aa9ea2ae5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:54 +0200 Subject: [PATCH 04/15] perf stat: Remove transaction_run from shadow update/print code It's no longer needed, because we use nameid to recognize transaction events. Keeping it only in stat code to initialize transaction events. I.e. struct perf_stat::id, accessible via evsel->priv, will be only set for transaction related events. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0c0071cf4fbad..b3e08ce2c5644 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -355,11 +355,11 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); - else if (transaction_run && perf_stat_evsel__is(counter, CYCLES_IN_TX)) + else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); - else if (transaction_run && perf_stat_evsel__is(counter, TRANSACTION_START)) + else if (perf_stat_evsel__is(counter, TRANSACTION_START)) update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); - else if (transaction_run && perf_stat_evsel__is(counter, ELISION_START)) + else if (perf_stat_evsel__is(counter, ELISION_START)) update_stats(&runtime_elision_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); @@ -1170,13 +1170,13 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } else { fprintf(output, " "); } - } else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { + } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) fprintf(output, " # %5.2f%% transactional cycles ", 100.0 * (avg / total)); - } else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { + } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) @@ -1185,7 +1185,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, " # %5.2f%% aborted cycles ", 100.0 * ((total2-avg) / total)); - } else if (transaction_run && perf_stat_evsel__is(evsel, TRANSACTION_START) && + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); @@ -1194,7 +1194,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) ratio = total / avg; fprintf(output, " # %8.0f cycles / transaction ", ratio); - } else if (transaction_run && perf_stat_evsel__is(evsel, ELISION_START) && + } else if (perf_stat_evsel__is(evsel, ELISION_START) && avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); From 1eda3b2144391e1ec9e1870bb32d5216ac7b384c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:55 +0200 Subject: [PATCH 05/15] perf stat: Introduce reset_shadow_stats function Move shadow counters reset code into separate function as preparation for moving it into its own object. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b3e08ce2c5644..fc85e6b9bd13d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -279,15 +279,8 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -static void perf_stat__reset_stats(struct perf_evlist *evlist) +static void reset_shadow_stats(void) { - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); - } - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); @@ -307,6 +300,18 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); } +static void perf_stat__reset_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__reset_stat_priv(evsel); + perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); + } + + reset_shadow_stats(); +} + static int create_perf_stat_counter(struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; From 556b1fb7f9c1a9fd43ea4dacd5d14ec39ac6296a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:56 +0200 Subject: [PATCH 06/15] perf stat: Introduce print_shadow_stats function Move shadow counters display code into separate function as preparation for moving it into its own object. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 72 +++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fc85e6b9bd13d..2ff2e22aa1405 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1059,43 +1059,11 @@ static void print_ll_cache_misses(int cpu, fprintf(output, " of all LL-cache hits "); } -static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) +static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) { double total, ratio = 0.0, total2; - double sc = evsel->scale; - const char *fmt; - int cpu = cpu_map__id_to_cpu(id); int ctx = evsel_context(evsel); - if (csv_output) { - fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; - } else { - if (big_num) - fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; - else - fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; - } - - aggr_printout(evsel, id, nr); - - if (aggr_mode == AGGR_GLOBAL) - cpu = 0; - - fprintf(output, fmt, avg, csv_sep); - - if (evsel->unit) - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - evsel->unit, csv_sep); - - fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); - - if (evsel->cgrp) - fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - - if (csv_output || interval) - return; - if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { @@ -1226,6 +1194,44 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } } +static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) +{ + double sc = evsel->scale; + const char *fmt; + int cpu = cpu_map__id_to_cpu(id); + + if (csv_output) { + fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + } else { + if (big_num) + fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + } + + aggr_printout(evsel, id, nr); + + if (aggr_mode == AGGR_GLOBAL) + cpu = 0; + + fprintf(output, fmt, avg, csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + evsel->unit, csv_sep); + + fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); + + if (evsel->cgrp) + fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); + + if (csv_output || interval) + return; + + print_shadow_stats(evsel, avg, cpu); +} + static void print_aggr(char *prefix) { struct perf_evsel *counter; From 4d982740cd598bdd876d9a396cc919724af32bc9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:57 +0200 Subject: [PATCH 07/15] perf stat: Add output file argument to print_shadow_stats function As preparation for moving shadow counters code into its own object. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 112 +++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2ff2e22aa1405..14a75ddb60c7b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -899,7 +899,7 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(int cpu, +static void print_stalled_cycles_frontend(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -914,12 +914,12 @@ static void print_stalled_cycles_frontend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " frontend cycles idle "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " frontend cycles idle "); } -static void print_stalled_cycles_backend(int cpu, +static void print_stalled_cycles_backend(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -934,12 +934,12 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " backend cycles idle "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " backend cycles idle "); } -static void print_branch_misses(int cpu, +static void print_branch_misses(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -954,12 +954,12 @@ static void print_branch_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all branches "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all branches "); } -static void print_l1_dcache_misses(int cpu, +static void print_l1_dcache_misses(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -974,12 +974,12 @@ static void print_l1_dcache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all L1-dcache hits "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all L1-dcache hits "); } -static void print_l1_icache_misses(int cpu, +static void print_l1_icache_misses(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -994,12 +994,12 @@ static void print_l1_icache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all L1-icache hits "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all L1-icache hits "); } -static void print_dtlb_cache_misses(int cpu, +static void print_dtlb_cache_misses(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -1014,12 +1014,12 @@ static void print_dtlb_cache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all dTLB cache hits "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all dTLB cache hits "); } -static void print_itlb_cache_misses(int cpu, +static void print_itlb_cache_misses(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -1034,12 +1034,12 @@ static void print_itlb_cache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all iTLB cache hits "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all iTLB cache hits "); } -static void print_ll_cache_misses(int cpu, +static void print_ll_cache_misses(FILE *out, int cpu, struct perf_evsel *evsel __maybe_unused, double avg) { @@ -1054,12 +1054,12 @@ static void print_ll_cache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all LL-cache hits "); + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all LL-cache hits "); } -static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) +static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu) { double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -1068,59 +1068,59 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { ratio = avg / total; - fprintf(output, " # %5.2f insns per cycle ", ratio); + fprintf(out, " # %5.2f insns per cycle ", ratio); } else { - fprintf(output, " "); + fprintf(out, " "); } total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); if (total && avg) { ratio = total / avg; - fprintf(output, "\n"); + fprintf(out, "\n"); if (aggr_mode == AGGR_NONE) - fprintf(output, " "); - fprintf(output, " # %5.2f stalled cycles per insn", ratio); + fprintf(out, " "); + fprintf(out, " # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(cpu, evsel, avg); + print_branch_misses(out, cpu, evsel, avg); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(cpu, evsel, avg); + print_l1_dcache_misses(out, cpu, evsel, avg); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(cpu, evsel, avg); + print_l1_icache_misses(out, cpu, evsel, avg); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(cpu, evsel, avg); + print_dtlb_cache_misses(out, cpu, evsel, avg); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(cpu, evsel, avg); + print_itlb_cache_misses(out, cpu, evsel, avg); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(cpu, evsel, avg); + print_ll_cache_misses(out, cpu, evsel, avg); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && runtime_cacherefs_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); @@ -1128,25 +1128,25 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) if (total) ratio = avg * 100 / total; - fprintf(output, " # %8.3f %% of all cache refs ", ratio); + fprintf(out, " # %8.3f %% of all cache refs ", ratio); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg); + print_stalled_cycles_frontend(out, cpu, evsel, avg); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg); + print_stalled_cycles_backend(out, cpu, evsel, avg); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) { ratio = avg / total; - fprintf(output, " # %8.3f GHz ", ratio); + fprintf(out, " # %8.3f GHz ", ratio); } else { - fprintf(output, " "); + fprintf(out, " "); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) - fprintf(output, + fprintf(out, " # %5.2f%% transactional cycles ", 100.0 * (avg / total)); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { @@ -1155,7 +1155,7 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) if (total2 < avg) total2 = avg; if (total) - fprintf(output, + fprintf(out, " # %5.2f%% aborted cycles ", 100.0 * ((total2-avg) / total)); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && @@ -1166,7 +1166,7 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) if (total) ratio = total / avg; - fprintf(output, " # %8.0f cycles / transaction ", ratio); + fprintf(out, " # %8.0f cycles / transaction ", ratio); } else if (perf_stat_evsel__is(evsel, ELISION_START) && avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { @@ -1175,7 +1175,7 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) if (total) ratio = total / avg; - fprintf(output, " # %8.0f cycles / elision ", ratio); + fprintf(out, " # %8.0f cycles / elision ", ratio); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; @@ -1188,9 +1188,9 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu) unit = 'K'; } - fprintf(output, " # %8.3f %c/sec ", ratio, unit); + fprintf(out, " # %8.3f %c/sec ", ratio, unit); } else { - fprintf(output, " "); + fprintf(out, " "); } } @@ -1229,7 +1229,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (csv_output || interval) return; - print_shadow_stats(evsel, avg, cpu); + print_shadow_stats(output, evsel, avg, cpu); } static void print_aggr(char *prefix) From 7a23f57c89cec0e6d3189d420d992902d4465ff4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:58 +0200 Subject: [PATCH 08/15] perf stat: Add aggr_mode argument to print_shadow_stats function As preparation for moving shadow counters code into its own object. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 14a75ddb60c7b..50918dc9fb319 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1059,7 +1059,8 @@ static void print_ll_cache_misses(FILE *out, int cpu, fprintf(out, " of all LL-cache hits "); } -static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu) +static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, + double avg, int cpu, enum aggr_mode aggr) { double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -1078,7 +1079,7 @@ static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, if (total && avg) { ratio = total / avg; fprintf(out, "\n"); - if (aggr_mode == AGGR_NONE) + if (aggr == AGGR_NONE) fprintf(out, " "); fprintf(out, " # %5.2f stalled cycles per insn", ratio); } @@ -1229,7 +1230,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (csv_output || interval) return; - print_shadow_stats(output, evsel, avg, cpu); + print_shadow_stats(output, evsel, avg, cpu, aggr_mode); } static void print_aggr(char *prefix) From f87027b9689d591ec22720944563a2d43ec835c4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Jun 2015 16:25:59 +0200 Subject: [PATCH 09/15] perf stat: Move shadow stat counters into separate object Separating shadow counters code into separate object as a cleanup, but mainly for upcomming changes, so could use it from script command context. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1433341559-31848-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 444 +--------------------------------- tools/perf/util/Build | 1 + tools/perf/util/stat-shadow.c | 434 +++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 16 ++ 4 files changed, 455 insertions(+), 440 deletions(-) create mode 100644 tools/perf/util/stat-shadow.c diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 50918dc9fb319..ff3d25803400a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -102,13 +102,6 @@ static struct target target = { .uid = UINT_MAX, }; -enum aggr_mode { - AGGR_NONE, - AGGR_GLOBAL, - AGGR_SOCKET, - AGGR_CORE, -}; - static int run_count = 1; static bool no_inherit = false; static bool scale = true; @@ -234,72 +227,6 @@ static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) return -1; } -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX - -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats walltime_nsecs_stats; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; - -static int evsel_context(struct perf_evsel *evsel) -{ - int ctx = 0; - - if (evsel->attr.exclude_kernel) - ctx |= CTX_BIT_KERNEL; - if (evsel->attr.exclude_user) - ctx |= CTX_BIT_USER; - if (evsel->attr.exclude_hv) - ctx |= CTX_BIT_HV; - if (evsel->attr.exclude_host) - ctx |= CTX_BIT_HOST; - if (evsel->attr.exclude_idle) - ctx |= CTX_BIT_IDLE; - - return ctx; -} - -static void reset_shadow_stats(void) -{ - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); -} - static void perf_stat__reset_stats(struct perf_evlist *evlist) { struct perf_evsel *evsel; @@ -309,7 +236,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); } - reset_shadow_stats(); + perf_stat__reset_shadow_stats(); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -346,46 +273,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } -/* - * Update various tracking values we maintain to print - * more semantic information such as miss/hit ratios, - * instruction rates, etc: - */ -static void update_shadow_stats(struct perf_evsel *counter, u64 *count, - int cpu) -{ - int ctx = evsel_context(counter); - - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); - else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); - else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); - else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); -} - static void zero_per_pkg(struct perf_evsel *counter) { if (counter->per_pkg_mask) @@ -446,7 +333,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, perf_counts_values__scale(count, scale, NULL); evsel->counts->cpu[cpu] = *count; if (aggr_mode == AGGR_NONE) - update_shadow_stats(evsel, count->values, cpu); + perf_stat__update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -494,7 +381,7 @@ static int read_counter_aggr(struct perf_evsel *counter) /* * Save the full runtime - to allow normalization during printout: */ - update_shadow_stats(counter, count, 0); + perf_stat__update_shadow_stats(counter, count, 0); return 0; } @@ -872,329 +759,6 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, " "); } -/* used for get_ratio_color() */ -enum grc_type { - GRC_STALLED_CYCLES_FE, - GRC_STALLED_CYCLES_BE, - GRC_CACHE_MISSES, - GRC_MAX_NR -}; - -static const char *get_ratio_color(enum grc_type type, double ratio) -{ - static const double grc_table[GRC_MAX_NR][3] = { - [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, - [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, - [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, - }; - const char *color = PERF_COLOR_NORMAL; - - if (ratio > grc_table[type][0]) - color = PERF_COLOR_RED; - else if (ratio > grc_table[type][1]) - color = PERF_COLOR_MAGENTA; - else if (ratio > grc_table[type][2]) - color = PERF_COLOR_YELLOW; - - return color; -} - -static void print_stalled_cycles_frontend(FILE *out, int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " frontend cycles idle "); -} - -static void print_stalled_cycles_backend(FILE *out, int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " backend cycles idle "); -} - -static void print_branch_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_branches_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all branches "); -} - -static void print_l1_dcache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-dcache hits "); -} - -static void print_l1_icache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-icache hits "); -} - -static void print_dtlb_cache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all dTLB cache hits "); -} - -static void print_itlb_cache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all iTLB cache hits "); -} - -static void print_ll_cache_misses(FILE *out, int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - int ctx = evsel_context(evsel); - - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all LL-cache hits "); -} - -static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr) -{ - double total, ratio = 0.0, total2; - int ctx = evsel_context(evsel); - - if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - if (total) { - ratio = avg / total; - fprintf(out, " # %5.2f insns per cycle ", ratio); - } else { - fprintf(out, " "); - } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); - - if (total && avg) { - ratio = total / avg; - fprintf(out, "\n"); - if (aggr == AGGR_NONE) - fprintf(out, " "); - fprintf(out, " # %5.2f stalled cycles per insn", ratio); - } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(out, cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(out, cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(out, cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(out, cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(out, cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[ctx][cpu].n != 0) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); - - if (total) - ratio = avg * 100 / total; - - fprintf(out, " # %8.3f %% of all cache refs ", ratio); - - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); - - if (total) { - ratio = avg / total; - fprintf(out, " # %8.3f GHz ", ratio); - } else { - fprintf(out, " "); - } - } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - if (total) - fprintf(out, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); - } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - if (total2 < avg) - total2 = avg; - if (total) - fprintf(out, - " # %5.2f%% aborted cycles ", - 100.0 * ((total2-avg) / total)); - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - avg > 0 && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - - if (total) - ratio = total / avg; - - fprintf(out, " # %8.0f cycles / transaction ", ratio); - } else if (perf_stat_evsel__is(evsel, ELISION_START) && - avg > 0 && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - - if (total) - ratio = total / avg; - - fprintf(out, " # %8.0f cycles / elision ", ratio); - } else if (runtime_nsecs_stats[cpu].n != 0) { - char unit = 'M'; - - total = avg_stats(&runtime_nsecs_stats[cpu]); - - if (total) - ratio = 1000.0 * avg / total; - if (ratio < 0.001) { - ratio *= 1000; - unit = 'K'; - } - - fprintf(out, " # %8.3f %c/sec ", ratio, unit); - } else { - fprintf(out, " "); - } -} - static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { double sc = evsel->scale; @@ -1230,7 +794,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (csv_output || interval) return; - print_shadow_stats(output, evsel, avg, cpu, aggr_mode); + perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); } static void print_aggr(char *prefix) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e4b676de2f643..586a59d46022a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -68,6 +68,7 @@ libperf-y += rblist.o libperf-y += intlist.o libperf-y += vdso.o libperf-y += stat.o +libperf-y += stat-shadow.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c new file mode 100644 index 0000000000000..53e8bb7bc8521 --- /dev/null +++ b/tools/perf/util/stat-shadow.c @@ -0,0 +1,434 @@ +#include +#include "evsel.h" +#include "stat.h" +#include "color.h" + +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; +static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; + +struct stats walltime_nsecs_stats; + +static int evsel_context(struct perf_evsel *evsel) +{ + int ctx = 0; + + if (evsel->attr.exclude_kernel) + ctx |= CTX_BIT_KERNEL; + if (evsel->attr.exclude_user) + ctx |= CTX_BIT_USER; + if (evsel->attr.exclude_hv) + ctx |= CTX_BIT_HV; + if (evsel->attr.exclude_host) + ctx |= CTX_BIT_HOST; + if (evsel->attr.exclude_idle) + ctx |= CTX_BIT_IDLE; + + return ctx; +} + +void perf_stat__reset_shadow_stats(void) +{ + memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); + memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); + memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); + memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); + memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); + memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); + memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); + memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); + memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); + memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); + memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); + memset(runtime_cycles_in_tx_stats, 0, + sizeof(runtime_cycles_in_tx_stats)); + memset(runtime_transaction_stats, 0, + sizeof(runtime_transaction_stats)); + memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +/* + * Update various tracking values we maintain to print + * more semantic information such as miss/hit ratios, + * instruction rates, etc: + */ +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu) +{ + int ctx = evsel_context(counter); + + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) + update_stats(&runtime_nsecs_stats[cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) + update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) + update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TRANSACTION_START)) + update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, ELISION_START)) + update_stats(&runtime_elision_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) + update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) + update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) + update_stats(&runtime_branches_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) + update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) + update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) + update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) + update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) + update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) + update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); +} + +/* used for get_ratio_color() */ +enum grc_type { + GRC_STALLED_CYCLES_FE, + GRC_STALLED_CYCLES_BE, + GRC_CACHE_MISSES, + GRC_MAX_NR +}; + +static const char *get_ratio_color(enum grc_type type, double ratio) +{ + static const double grc_table[GRC_MAX_NR][3] = { + [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, + [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, + [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, + }; + const char *color = PERF_COLOR_NORMAL; + + if (ratio > grc_table[type][0]) + color = PERF_COLOR_RED; + else if (ratio > grc_table[type][1]) + color = PERF_COLOR_MAGENTA; + else if (ratio > grc_table[type][2]) + color = PERF_COLOR_YELLOW; + + return color; +} + +static void print_stalled_cycles_frontend(FILE *out, int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " frontend cycles idle "); +} + +static void print_stalled_cycles_backend(FILE *out, int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " backend cycles idle "); +} + +static void print_branch_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_branches_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all branches "); +} + +static void print_l1_dcache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all L1-dcache hits "); +} + +static void print_l1_icache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all L1-icache hits "); +} + +static void print_dtlb_cache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all dTLB cache hits "); +} + +static void print_itlb_cache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all iTLB cache hits "); +} + +static void print_ll_cache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all LL-cache hits "); +} + +void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, + double avg, int cpu, enum aggr_mode aggr) +{ + double total, ratio = 0.0, total2; + int ctx = evsel_context(evsel); + + if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + if (total) { + ratio = avg / total; + fprintf(out, " # %5.2f insns per cycle ", ratio); + } else { + fprintf(out, " "); + } + total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); + total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + if (total && avg) { + ratio = total / avg; + fprintf(out, "\n"); + if (aggr == AGGR_NONE) + fprintf(out, " "); + fprintf(out, " # %5.2f stalled cycles per insn", ratio); + } + + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && + runtime_branches_stats[ctx][cpu].n != 0) { + print_branch_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_l1_dcache_stats[ctx][cpu].n != 0) { + print_l1_dcache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_l1_icache_stats[ctx][cpu].n != 0) { + print_l1_icache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_dtlb_cache_stats[ctx][cpu].n != 0) { + print_dtlb_cache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_itlb_cache_stats[ctx][cpu].n != 0) { + print_itlb_cache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_ll_cache_stats[ctx][cpu].n != 0) { + print_ll_cache_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && + runtime_cacherefs_stats[ctx][cpu].n != 0) { + total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + + if (total) + ratio = avg * 100 / total; + + fprintf(out, " # %8.3f %% of all cache refs ", ratio); + + } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { + print_stalled_cycles_frontend(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { + print_stalled_cycles_backend(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { + total = avg_stats(&runtime_nsecs_stats[cpu]); + + if (total) { + ratio = avg / total; + fprintf(out, " # %8.3f GHz ", ratio); + } else { + fprintf(out, " "); + } + } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + if (total) + fprintf(out, + " # %5.2f%% transactional cycles ", + 100.0 * (avg / total)); + } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + if (total2 < avg) + total2 = avg; + if (total) + fprintf(out, + " # %5.2f%% aborted cycles ", + 100.0 * ((total2-avg) / total)); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && + avg > 0 && + runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + + if (total) + ratio = total / avg; + + fprintf(out, " # %8.0f cycles / transaction ", ratio); + } else if (perf_stat_evsel__is(evsel, ELISION_START) && + avg > 0 && + runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + + if (total) + ratio = total / avg; + + fprintf(out, " # %8.0f cycles / elision ", ratio); + } else if (runtime_nsecs_stats[cpu].n != 0) { + char unit = 'M'; + + total = avg_stats(&runtime_nsecs_stats[cpu]); + + if (total) + ratio = 1000.0 * avg / total; + if (ratio < 0.001) { + ratio *= 1000; + unit = 'K'; + } + + fprintf(out, " # %8.3f %c/sec ", ratio, unit); + } else { + fprintf(out, " "); + } +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 3df529bd07743..615c779eb42ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -2,6 +2,7 @@ #define __PERF_STATS_H #include +#include struct stats { @@ -23,6 +24,13 @@ struct perf_stat { enum perf_stat_evsel_id id; }; +enum aggr_mode { + AGGR_NONE, + AGGR_GLOBAL, + AGGR_SOCKET, + AGGR_CORE, +}; + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -46,4 +54,12 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct stats walltime_nsecs_stats; + +void perf_stat__reset_shadow_stats(void); +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu); +void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, + double avg, int cpu, enum aggr_mode aggr); + #endif From e3d09ec8126fe2c9a3ade661e2126e215ca27a80 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 28 May 2015 13:28:54 +0000 Subject: [PATCH 10/15] tools lib traceevent: Export dynamic symbols used by traceevent plugins Traceevent plugins need dynamic symbols exported from libtraceevent.a, otherwise a dlopen error will occur during plugins loading. This patch uses dynamic-list-file to export dynamic symbols which will be used in plugins to perf executable. The problem is covered up if feature-libpython is enabled, because PYTHON_EMBED_LDOPTS contains '-Xlinker --export-dynamic' which adds all symbols to the dynamic symbol table. So we should reproduce the problem by setting NO_LIBPYTHON=1. Before this patch: (Prepare plugins) $ ls /root/.traceevent/plugins/ plugin_sched_switch.so plugin_function.so ... $ perf record -e 'ftrace:function' ls $ perf script Warning: could not load plugin '/mnt/data/root/.traceevent/plugins/plugin_sched_switch.so' /root/.traceevent/plugins/plugin_sched_switch.so: undefined symbol: pevent_unregister_event_handler Warning: could not load plugin '/root/.traceevent/plugins/plugin_function.so' /root/.traceevent/plugins/plugin_function.so: undefined symbol: warning ... :1049 1049 [000] 9666.754487: ftrace:function: ffffffff8118bc50 <-- ffffffff8118c5b3 :1049 1049 [000] 9666.754487: ftrace:function: ffffffff818e2440 <-- ffffffff8118bc75 :1049 1049 [000] 9666.754487: ftrace:function: ffffffff8106eee0 <-- ffffffff811212e2 After this patch: $ perf record -e 'ftrace:function' ls $ perf script :1049 1049 [000] 9666.754487: ftrace:function: __set_task_comm :1049 1049 [000] 9666.754487: ftrace:function: _raw_spin_lock :1049 1049 [000] 9666.754487: ftrace:function: task_tgid_nr_ns ... Signed-off-by: He Kuang Acked-by: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1432819735-35040-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 14 +++++++++++++- tools/perf/Makefile.perf | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 84640394ebf98..6daaff652affd 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -23,6 +23,7 @@ endef # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,NM,$(CROSS_COMPILE)nm) EXT = -std=gnu99 INSTALL = install @@ -157,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o) TE_IN := $(OUTPUT)libtraceevent-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list -CMD_TARGETS = $(LIB_FILE) $(PLUGINS) +CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE) TARGETS = $(CMD_TARGETS) @@ -175,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN) $(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) + $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) + plugins: $(PLUGINS) __plugin_obj = $(notdir $@) @@ -244,6 +249,13 @@ define do_install_plugins done endef +define do_generate_dynamic_list_file + (echo '{'; \ + $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u; \ + echo '};'; \ + ) > $2 +endef + install_lib: all_cmd install_plugins $(call QUIET_INSTALL, $(LIB_FILE)) \ $(call do_install,$(LIB_FILE),$(libdir_SQ)) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5816a3bb7e9fb..b1dfcd8e93e3d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -173,6 +173,9 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT +LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list +LDFLAGS += -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) + LIBAPI = $(LIB_PATH)libapi.a export LIBAPI @@ -278,7 +281,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE $(Q)$(MAKE) $(build)=perf -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ $(GTK_IN): FORCE @@ -373,7 +376,13 @@ $(LIB_FILE): $(LIBPERF_IN) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) $(LIBTRACEEVENT): FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a + +libtraceevent_plugins: FORCE + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins + +$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) @@ -555,4 +564,5 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep +.PHONY: libtraceevent_plugins From 38e096249b4fca1a26ca8908ea2018a5faf366e2 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 28 May 2015 13:28:55 +0000 Subject: [PATCH 11/15] tools lib traceevent: Ignore libtrace-dynamic-list file The libtrace-dynamic-list file is used to export symbols used by traceevent plugins. Signed-off-by: He Kuang Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1432819735-35040-2-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore index 35f56be5a4cdb..3c60335fe7be7 100644 --- a/tools/lib/traceevent/.gitignore +++ b/tools/lib/traceevent/.gitignore @@ -1 +1,2 @@ TRACEEVENT-CFLAGS +libtraceevent-dynamic-list From 457ae94ae047330e75c13f28ead6de31eab245ed Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 28 May 2015 13:17:30 +0000 Subject: [PATCH 12/15] perf record: Fix perf.data size in no-buildid mode The size of perf.data is missing update in no-buildid mode, which gives wrong output result. Before this patch: $ perf.perf record -B -e syscalls:sys_enter_open uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB perf.data ] After this patch: $ perf.perf record -B -e syscalls:sys_enter_open uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data ] Signed-off-by: He Kuang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1432819050-30511-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 91aa2a3dcf19e..d3731cce7c1cc 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -345,12 +345,9 @@ static int process_buildids(struct record *rec) struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; - u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - if (size == 0) + if (file->size == 0) return 0; - file->size = size; - /* * During this process, it'll load kernel map and replace the * dso->long_name to a real pathname it found. In this case @@ -719,6 +716,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (!err && !file->is_pipe) { rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); if (!rec->no_buildid) { process_buildids(rec); From 9f2de31542f1ac38a15117f90ee6b8449951d86e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 1 Jun 2015 12:01:02 -0300 Subject: [PATCH 13/15] perf machine: Fix up some more method names Calling the function 'machine__new_module' implies a new 'module' will be allocated, when in fact what is returned is a 'struct map' instance, that not necessarily will be instantiated, as if one already exists with the given module name, it will be returned instead. So be consistent with other "find and if not there, create" like functions, like machine__findnew_thread, machine__findnew_dso, etc, and rename it to machine__findnew_module_map(), that in turn will call machine__findnew_module_dso(). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-acv830vd3hwww2ih5vjtbmu3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 18 +++++++++--------- tools/perf/util/machine.h | 4 ++-- tools/perf/util/probe-event.c | 2 +- tools/perf/util/symbol.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f15ed24a22ac8..dfd419797e6ea 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -490,9 +490,9 @@ int machine__process_lost_samples_event(struct machine *machine __maybe_unused, return 0; } -static struct dso* -machine__module_dso(struct machine *machine, struct kmod_path *m, - const char *filename) +static struct dso *machine__findnew_module_dso(struct machine *machine, + struct kmod_path *m, + const char *filename) { struct dso *dso; @@ -534,8 +534,8 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } -struct map *machine__new_module(struct machine *machine, u64 start, - const char *filename) +struct map *machine__findnew_module_map(struct machine *machine, u64 start, + const char *filename) { struct map *map = NULL; struct dso *dso; @@ -549,7 +549,7 @@ struct map *machine__new_module(struct machine *machine, u64 start, if (map) goto out; - dso = machine__module_dso(machine, &m, filename); + dso = machine__findnew_module_dso(machine, &m, filename); if (dso == NULL) goto out; @@ -1017,7 +1017,7 @@ static int machine__create_module(void *arg, const char *name, u64 start) struct machine *machine = arg; struct map *map; - map = machine__new_module(machine, start, name); + map = machine__findnew_module_map(machine, start, name); if (map == NULL) return -1; @@ -1140,8 +1140,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, strlen(kmmap_prefix) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { - map = machine__new_module(machine, event->mmap.start, - event->mmap.filename); + map = machine__findnew_module_map(machine, event->mmap.start, + event->mmap.filename); if (map == NULL) goto out_problem; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 8e1f796fd1377..ca267c41f28d3 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -189,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, filter); } -struct map *machine__new_module(struct machine *machine, u64 start, - const char *filename); +struct map *machine__findnew_module_map(struct machine *machine, u64 start, + const char *filename); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, symbol_filter_t filter); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e6f215b7a0523..d4cf50b918393 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -168,7 +168,7 @@ static struct map *kernel_get_module_map(const char *module) /* A file path -- this is an offline module */ if (module && strchr(module, '/')) - return machine__new_module(host_machine, 0, module); + return machine__findnew_module_map(host_machine, 0, module); if (!module) module = "kernel"; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a3e80d6ad70ae..eaee5d32d39dc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1364,7 +1364,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: /* * kernel modules know their symtab type - it's set when - * creating a module dso in machine__new_module(). + * creating a module dso in machine__findnew_module_map(). */ return kmod && dso->symtab_type == type; From e88078442232f3bbcb4ff1d24b3f9ab3dca472b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 1 Jun 2015 15:40:01 -0300 Subject: [PATCH 14/15] perf tools: Protect accesses the dso rbtrees/lists with a rw lock To allow concurrent access, next step: refcount struct dso instances, so that we can ditch unused them when the last map pointing to it goes away. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-yk1k08etpd2aoe3tnrf0oizn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 52 +++++++++++++++++++++++++++----------- tools/perf/util/dso.h | 10 +++++--- tools/perf/util/machine.c | 27 +++++++++++++++----- tools/perf/util/vdso.c | 53 +++++++++++++++++++++------------------ 4 files changed, 95 insertions(+), 47 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5ec9e892c89b8..ff0204ac4321e 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -889,8 +889,8 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, * Either one of the dso or name parameter must be non-NULL or the * function will not work. */ -static struct dso *dso__findlink_by_longname(struct rb_root *root, - struct dso *dso, const char *name) +static struct dso *__dso__findlink_by_longname(struct rb_root *root, + struct dso *dso, const char *name) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -937,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root, return NULL; } -static inline struct dso * -dso__find_by_longname(const struct rb_root *root, const char *name) +static inline struct dso *__dso__find_by_longname(struct rb_root *root, + const char *name) { - return dso__findlink_by_longname((struct rb_root *)root, NULL, name); + return __dso__findlink_by_longname(root, NULL, name); } void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) @@ -1149,14 +1149,20 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } -void dsos__add(struct dsos *dsos, struct dso *dso) +void __dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); - dso__findlink_by_longname(&dsos->root, dso, NULL); + __dso__findlink_by_longname(&dsos->root, dso, NULL); +} + +void dsos__add(struct dsos *dsos, struct dso *dso) +{ + pthread_rwlock_wrlock(&dsos->lock); + __dsos__add(dsos, dso); + pthread_rwlock_unlock(&dsos->lock); } -struct dso *dsos__find(const struct dsos *dsos, const char *name, - bool cmp_short) +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { struct dso *pos; @@ -1166,15 +1172,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name, return pos; return NULL; } - return dso__find_by_longname(&dsos->root, name); + return __dso__find_by_longname(&dsos->root, name); } -struct dso *dsos__addnew(struct dsos *dsos, const char *name) +struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +{ + struct dso *dso; + pthread_rwlock_rdlock(&dsos->lock); + dso = __dsos__find(dsos, name, cmp_short); + pthread_rwlock_unlock(&dsos->lock); + return dso; +} + +struct dso *__dsos__addnew(struct dsos *dsos, const char *name) { struct dso *dso = dso__new(name); if (dso != NULL) { - dsos__add(dsos, dso); + __dsos__add(dsos, dso); dso__set_basename(dso); } return dso; @@ -1182,9 +1197,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name) struct dso *__dsos__findnew(struct dsos *dsos, const char *name) { - struct dso *dso = dsos__find(dsos, name, false); + struct dso *dso = __dsos__find(dsos, name, false); + + return dso ? dso : __dsos__addnew(dsos, name); +} - return dso ? dso : dsos__addnew(dsos, name); +struct dso *dsos__findnew(struct dsos *dsos, const char *name) +{ + struct dso *dso; + pthread_rwlock_wrlock(&dsos->lock); + dso = __dsos__findnew(dsos, name); + pthread_rwlock_unlock(&dsos->lock); + return dso; } size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ba2d90ed881fa..c16ab5d849c39 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include "map.h" @@ -124,6 +125,7 @@ struct dso_cache { struct dsos { struct list_head head; struct rb_root root; /* rbtree root sorted by long name */ + pthread_rwlock_t lock; }; struct auxtrace_cache; @@ -297,11 +299,13 @@ struct map *dso__new_map(const char *name); struct dso *machine__findnew_kernel(struct machine *machine, const char *name, const char *short_name, int dso_type); +void __dsos__add(struct dsos *dsos, struct dso *dso); void dsos__add(struct dsos *dsos, struct dso *dso); -struct dso *dsos__addnew(struct dsos *dsos, const char *name); -struct dso *dsos__find(const struct dsos *dsos, const char *name, - bool cmp_short); +struct dso *__dsos__addnew(struct dsos *dsos, const char *name); +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); +struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); struct dso *__dsos__findnew(struct dsos *dsos, const char *name); +struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index dfd419797e6ea..0cf56d6f073ad 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -20,6 +20,7 @@ static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); dsos->root = RB_ROOT; + pthread_rwlock_init(&dsos->lock, NULL); } int machine__init(struct machine *machine, const char *root_dir, pid_t pid) @@ -81,15 +82,21 @@ struct machine *machine__new_host(void) return NULL; } -static void dsos__delete(struct dsos *dsos) +static void dsos__exit(struct dsos *dsos) { struct dso *pos, *n; + pthread_rwlock_wrlock(&dsos->lock); + list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); list_del(&pos->node); dso__delete(pos); } + + pthread_rwlock_unlock(&dsos->lock); + + pthread_rwlock_destroy(&dsos->lock); } void machine__delete_threads(struct machine *machine) @@ -110,7 +117,7 @@ void machine__delete_threads(struct machine *machine) void machine__exit(struct machine *machine) { map_groups__exit(&machine->kmaps); - dsos__delete(&machine->dsos); + dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); @@ -496,11 +503,13 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, { struct dso *dso; - dso = dsos__find(&machine->dsos, m->name, true); + pthread_rwlock_wrlock(&machine->dsos.lock); + + dso = __dsos__find(&machine->dsos, m->name, true); if (!dso) { - dso = dsos__addnew(&machine->dsos, m->name); + dso = __dsos__addnew(&machine->dsos, m->name); if (dso == NULL) - return NULL; + goto out_unlock; if (machine__is_host(machine)) dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; @@ -515,6 +524,8 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__set_long_name(dso, strdup(filename), true); } +out_unlock: + pthread_rwlock_unlock(&machine->dsos.lock); return dso; } @@ -1156,6 +1167,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *kernel = NULL; struct dso *dso; + pthread_rwlock_rdlock(&machine->dsos.lock); + list_for_each_entry(dso, &machine->dsos.head, node) { /* @@ -1184,6 +1197,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, break; } + pthread_rwlock_unlock(&machine->dsos.lock); + if (kernel == NULL) kernel = machine__findnew_dso(machine, kmmap_prefix); if (kernel == NULL) @@ -1948,5 +1963,5 @@ int machine__get_kernel_start(struct machine *machine) struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return __dsos__findnew(&machine->dsos, filename); + return dsos__findnew(&machine->dsos, filename); } diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 2e8f6886ca72d..c646c74c34f84 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -120,14 +120,14 @@ void machine__exit_vdso(struct machine *machine) zfree(&machine->vdso_info); } -static struct dso *machine__addnew_vdso(struct machine *machine, const char *short_name, - const char *long_name) +static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name, + const char *long_name) { struct dso *dso; dso = dso__new(short_name); if (dso != NULL) { - dsos__add(&machine->dsos, dso); + __dsos__add(&machine->dsos, dso); dso__set_long_name(dso, long_name, false); } @@ -230,27 +230,31 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file) return vdso_file->temp_file_name; } -static struct dso *vdso__findnew_compat(struct machine *machine, - struct vdso_file *vdso_file) +static struct dso *__machine__findnew_compat(struct machine *machine, + struct vdso_file *vdso_file) { const char *file_name; struct dso *dso; - dso = dsos__find(&machine->dsos, vdso_file->dso_name, true); + pthread_rwlock_wrlock(&machine->dsos.lock); + dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); if (dso) - return dso; + goto out_unlock; file_name = vdso__get_compat_file(vdso_file); if (!file_name) - return NULL; + goto out_unlock; - return machine__addnew_vdso(machine, vdso_file->dso_name, file_name); + dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); +out_unlock: + pthread_rwlock_unlock(&machine->dsos.lock); + return dso; } -static int machine__findnew_vdso_compat(struct machine *machine, - struct thread *thread, - struct vdso_info *vdso_info, - struct dso **dso) +static int __machine__findnew_vdso_compat(struct machine *machine, + struct thread *thread, + struct vdso_info *vdso_info, + struct dso **dso) { enum dso_type dso_type; @@ -267,10 +271,10 @@ static int machine__findnew_vdso_compat(struct machine *machine, switch (dso_type) { case DSO__TYPE_32BIT: - *dso = vdso__findnew_compat(machine, &vdso_info->vdso32); + *dso = __machine__findnew_compat(machine, &vdso_info->vdso32); return 1; case DSO__TYPE_X32BIT: - *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32); + *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32); return 1; case DSO__TYPE_UNKNOWN: case DSO__TYPE_64BIT: @@ -285,31 +289,32 @@ struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread __maybe_unused) { struct vdso_info *vdso_info; - struct dso *dso; + struct dso *dso = NULL; + pthread_rwlock_wrlock(&machine->dsos.lock); if (!machine->vdso_info) machine->vdso_info = vdso_info__new(); vdso_info = machine->vdso_info; if (!vdso_info) - return NULL; + goto out_unlock; #if BITS_PER_LONG == 64 - if (machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) - return dso; + if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) + goto out_unlock; #endif - dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); + dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true); if (!dso) { char *file; file = get_file(&vdso_info->vdso); - if (!file) - return NULL; - - dso = machine__addnew_vdso(machine, DSO__NAME_VDSO, file); + if (file) + dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file); } +out_unlock: + pthread_rwlock_unlock(&machine->dsos.lock); return dso; } From d3a7c489c7fd2463e3b2c3a2179c7be879dd9cb4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jun 2015 11:53:26 -0300 Subject: [PATCH 15/15] perf tools: Reference count struct dso This has a different model than the 'thread' and 'map' struct lifetimes: there is not a definitive "don't use this DSO anymore" event, i.e. we may get many 'struct map' holding references to the '/usr/lib64/libc-2.20.so' DSO but then at some point some DSO may have no references but we still don't want to straight away release its resources, because "soon" we may get a new 'struct map' that needs it and we want to reuse its symtab or other resources. So we need some way to garbage collect it when crossing some memory usage threshold, which is left for anoter patch, for now it is sufficient to release it when calling dsos__exit(), i.e. when deleting the whole list as part of deleting the 'struct machine' containing it, which will leave only referenced objects being used. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-majzgz07cm90t2tejrjy4clf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dso-data.c | 4 ++-- tools/perf/tests/hists_common.c | 6 +++++- tools/perf/util/dso.c | 37 ++++++++++++++++++++++++++++++++- tools/perf/util/dso.h | 14 ++++++++++++- tools/perf/util/header.c | 1 + tools/perf/util/machine.c | 15 +++++++++---- tools/perf/util/map.c | 11 ++++++++-- tools/perf/util/probe-finder.c | 2 +- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol.c | 2 +- tools/perf/util/vdso.c | 1 + 11 files changed, 81 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3e41c61bd8610..a218aeaf56a00 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -166,7 +166,7 @@ int test__dso_data(void) free(buf); } - dso__delete(dso); + dso__put(dso); unlink(file); return 0; } @@ -226,7 +226,7 @@ static void dsos__delete(int cnt) struct dso *dso = dsos[i]; unlink(dso->name); - dso__delete(dso); + dso__put(dso); } free(dsos); diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 915f60af6a0eb..ce80b274b0973 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines) sym = symbol__new(fsym->start, fsym->length, STB_GLOBAL, fsym->name); - if (sym == NULL) + if (sym == NULL) { + dso__put(dso); goto out; + } symbols__insert(&dso->symbols[MAP__FUNCTION], sym); } + + dso__put(dso); } return machine; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ff0204ac4321e..7c0c08386a1d9 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1049,6 +1049,7 @@ struct dso *dso__new(const char *name) INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); pthread_mutex_init(&dso->lock, NULL); + atomic_set(&dso->refcnt, 1); } return dso; @@ -1083,6 +1084,19 @@ void dso__delete(struct dso *dso) free(dso); } +struct dso *dso__get(struct dso *dso) +{ + if (dso) + atomic_inc(&dso->refcnt); + return dso; +} + +void dso__put(struct dso *dso) +{ + if (dso && atomic_dec_and_test(&dso->refcnt)) + dso__delete(dso); +} + void dso__set_build_id(struct dso *dso, void *build_id) { memcpy(dso->build_id, build_id, sizeof(dso->build_id)); @@ -1153,6 +1167,27 @@ void __dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); __dso__findlink_by_longname(&dsos->root, dso, NULL); + /* + * It is now in the linked list, grab a reference, then garbage collect + * this when needing memory, by looking at LRU dso instances in the + * list with atomic_read(&dso->refcnt) == 1, i.e. no references + * anywhere besides the one for the list, do, under a lock for the + * list: remove it from the list, then a dso__put(), that probably will + * be the last and will then call dso__delete(), end of life. + * + * That, or at the end of the 'struct machine' lifetime, when all + * 'struct dso' instances will be removed from the list, in + * dsos__exit(), if they have no other reference from some other data + * structure. + * + * E.g.: after processing a 'perf.data' file and storing references + * to objects instantiated while processing events, we will have + * references to the 'thread', 'map', 'dso' structs all from 'struct + * hist_entry' instances, but we may not need anything not referenced, + * so we might as well call machines__exit()/machines__delete() and + * garbage collect it. + */ + dso__get(dso); } void dsos__add(struct dsos *dsos, struct dso *dso) @@ -1206,7 +1241,7 @@ struct dso *dsos__findnew(struct dsos *dsos, const char *name) { struct dso *dso; pthread_rwlock_wrlock(&dsos->lock); - dso = __dsos__findnew(dsos, name); + dso = dso__get(__dsos__findnew(dsos, name)); pthread_rwlock_unlock(&dsos->lock); return dso; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c16ab5d849c39..2fe98bb0e95b0 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -1,6 +1,7 @@ #ifndef __PERF_DSO #define __PERF_DSO +#include #include #include #include @@ -179,7 +180,7 @@ struct dso { void *priv; u64 db_id; }; - + atomic_t refcnt; char name[0]; }; @@ -206,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); int dso__name_len(const struct dso *dso); +struct dso *dso__get(struct dso *dso); +void dso__put(struct dso *dso); + +static inline void __dso__zput(struct dso **dso) +{ + dso__put(*dso); + *dso = NULL; +} + +#define dso__zput(dso) __dso__zput(&dso) + bool dso__loaded(const struct dso *dso, enum map_type type); bool dso__sorted_by_name(const struct dso *dso, enum map_type type); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ac5aaaeed7ffd..21a77e7a171e8 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1277,6 +1277,7 @@ static int __event_process_build_id(struct build_id_event *bev, sbuild_id); pr_debug("build id event received for %s: %s\n", dso->long_name, sbuild_id); + dso__put(dso); } err = 0; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 0cf56d6f073ad..132e357651019 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -82,7 +82,7 @@ struct machine *machine__new_host(void) return NULL; } -static void dsos__exit(struct dsos *dsos) +static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; @@ -90,12 +90,16 @@ static void dsos__exit(struct dsos *dsos) list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); - list_del(&pos->node); - dso__delete(pos); + list_del_init(&pos->node); + dso__put(pos); } pthread_rwlock_unlock(&dsos->lock); +} +static void dsos__exit(struct dsos *dsos) +{ + dsos__purge(dsos); pthread_rwlock_destroy(&dsos->lock); } @@ -524,6 +528,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__set_long_name(dso, strdup(filename), true); } + dso__get(dso); out_unlock: pthread_rwlock_unlock(&machine->dsos.lock); return dso; @@ -1205,8 +1210,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine, goto out_problem; kernel->kernel = kernel_type; - if (__machine__create_kernel_maps(machine, kernel) < 0) + if (__machine__create_kernel_maps(machine, kernel) < 0) { + dso__put(kernel); goto out_problem; + } if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 365011c233a68..1241ab989cf5d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -132,7 +132,7 @@ void map__init(struct map *map, enum map_type type, map->end = end; map->pgoff = pgoff; map->reloc = 0; - map->dso = dso; + map->dso = dso__get(dso); map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; RB_CLEAR_NODE(&map->rb_node); @@ -198,6 +198,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (type != MAP__FUNCTION) dso__set_loaded(dso, map->type); } + dso__put(dso); } return map; out_delete: @@ -224,9 +225,15 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return map; } -void map__delete(struct map *map) +static void map__exit(struct map *map) { BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); + dso__zput(map->dso); +} + +void map__delete(struct map *map) +{ + map__exit(map); free(map); } diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c50da392e2562..2da65a7108932 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path) continue; dinfo = __debuginfo__new(buf); } - dso__delete(dso); + dso__put(dso); out: /* if failed to open all distro debuginfo, open given binary */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a93ba85509b28..65f7e389ae099 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1016,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map, curr_map = map__new2(start, curr_dso, map->type); if (curr_map == NULL) { - dso__delete(curr_dso); + dso__put(curr_dso); goto out_elf_end; } if (adjust_kernel_syms) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index eaee5d32d39dc..504f2d73b7eef 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -786,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, curr_map = map__new2(pos->start, ndso, map->type); if (curr_map == NULL) { - dso__delete(ndso); + dso__put(ndso); return -1; } diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index c646c74c34f84..4b89118f158db 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -314,6 +314,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, } out_unlock: + dso__get(dso); pthread_rwlock_unlock(&machine->dsos.lock); return dso; }