Skip to content

Commit

Permalink
perf diff: Report noisy for cycles diff
Browse files Browse the repository at this point in the history
This patch prints the stddev and hist for the cycles diff of program
block. It can help us to understand if the cycles is noisy or not.

This patch is inspired by Andi Kleen's patch:

  https://lwn.net/Articles/600471/

We create new option '--cycles-hist'.

Example:

  perf record -b ./div
  perf record -b ./div
  perf diff -c cycles

  # Baseline                                [Program Block Range] Cycles Diff  Shared Object      Symbol
  # ........  .......................................................... ....  .................  ............................
  #
      46.72%                                      [div.c:40 -> div.c:40]    0  div                [.] main
      46.72%                                      [div.c:42 -> div.c:44]    0  div                [.] main
      46.72%                                      [div.c:42 -> div.c:39]    0  div                [.] main
      20.54%                          [random_r.c:357 -> random_r.c:394]    1  libc-2.27.so       [.] __random_r
      20.54%                          [random_r.c:357 -> random_r.c:380]    0  libc-2.27.so       [.] __random_r
      20.54%                          [random_r.c:388 -> random_r.c:388]    0  libc-2.27.so       [.] __random_r
      20.54%                          [random_r.c:388 -> random_r.c:391]    0  libc-2.27.so       [.] __random_r
      17.04%                              [random.c:288 -> random.c:291]    0  libc-2.27.so       [.] __random
      17.04%                              [random.c:291 -> random.c:291]    0  libc-2.27.so       [.] __random
      17.04%                              [random.c:293 -> random.c:293]    0  libc-2.27.so       [.] __random
      17.04%                              [random.c:295 -> random.c:295]    0  libc-2.27.so       [.] __random
      17.04%                              [random.c:295 -> random.c:295]    0  libc-2.27.so       [.] __random
      17.04%                              [random.c:298 -> random.c:298]    0  libc-2.27.so       [.] __random
       8.40%                                      [div.c:22 -> div.c:25]    0  div                [.] compute_flag
       8.40%                                      [div.c:27 -> div.c:28]    0  div                [.] compute_flag
       5.14%                                    [rand.c:26 -> rand.c:27]    0  libc-2.27.so       [.] rand
       5.14%                                    [rand.c:28 -> rand.c:28]    0  libc-2.27.so       [.] rand
       2.15%                                  [rand@plt+0 -> rand@plt+0]    0  div                [.] rand@plt
       0.00%                                                                   [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
       0.00%                                [do_mmap+714 -> do_mmap+732]  -10  [kernel.kallsyms]  [k] do_mmap
       0.00%                                [do_mmap+737 -> do_mmap+765]    1  [kernel.kallsyms]  [k] do_mmap
       0.00%                                [do_mmap+262 -> do_mmap+299]    0  [kernel.kallsyms]  [k] do_mmap
       0.00%  [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]    7  [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
       0.00%            [native_sched_clock+0 -> native_sched_clock+119]   -1  [kernel.kallsyms]  [k] native_sched_clock
       0.00%                 [native_write_msr+0 -> native_write_msr+16]  -13  [kernel.kallsyms]  [k] native_write_msr

When we enable the option '--cycles-hist', the output is

  perf diff -c cycles --cycles-hist

  # Baseline                                [Program Block Range] Cycles Diff        stddev/Hist  Shared Object      Symbol
  # ........  .......................................................... ....  .................  .................  ............................
  #
      46.72%                                      [div.c:40 -> div.c:40]    0  ± 37.8% ▁█▁▁██▁█   div                [.] main
      46.72%                                      [div.c:42 -> div.c:44]    0  ± 49.4% ▁▁▂█▂▂▂▂   div                [.] main
      46.72%                                      [div.c:42 -> div.c:39]    0  ± 24.1% ▃█▂▄▁▃▂▁   div                [.] main
      20.54%                          [random_r.c:357 -> random_r.c:394]    1  ± 33.5% ▅▂▁█▃▁▂▁   libc-2.27.so       [.] __random_r
      20.54%                          [random_r.c:357 -> random_r.c:380]    0  ± 39.4% ▁▁█▁██▅▁   libc-2.27.so       [.] __random_r
      20.54%                          [random_r.c:388 -> random_r.c:388]    0                     libc-2.27.so       [.] __random_r
      20.54%                          [random_r.c:388 -> random_r.c:391]    0  ± 41.2% ▁▃▁▂█▄▃▁   libc-2.27.so       [.] __random_r
      17.04%                              [random.c:288 -> random.c:291]    0  ± 48.8% ▁▁▁▁███▁   libc-2.27.so       [.] __random
      17.04%                              [random.c:291 -> random.c:291]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
      17.04%                              [random.c:293 -> random.c:293]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
      17.04%                              [random.c:295 -> random.c:295]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
      17.04%                              [random.c:295 -> random.c:295]    0                     libc-2.27.so       [.] __random
      17.04%                              [random.c:298 -> random.c:298]    0  ± 75.6% ▃█▁▁▁▁▁▁   libc-2.27.so       [.] __random
       8.40%                                      [div.c:22 -> div.c:25]    0  ± 42.1% ▁▃▁▁███▁   div                [.] compute_flag
       8.40%                                      [div.c:27 -> div.c:28]    0  ± 41.8% ██▁▁▄▁▁▄   div                [.] compute_flag
       5.14%                                    [rand.c:26 -> rand.c:27]    0  ± 37.8% ▁▁▁████▁   libc-2.27.so       [.] rand
       5.14%                                    [rand.c:28 -> rand.c:28]    0                     libc-2.27.so       [.] rand
       2.15%                                  [rand@plt+0 -> rand@plt+0]    0                     div                [.] rand@plt
       0.00%                                                                                      [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
       0.00%                                [do_mmap+714 -> do_mmap+732]  -10                     [kernel.kallsyms]  [k] do_mmap
       0.00%                                [do_mmap+737 -> do_mmap+765]    1                     [kernel.kallsyms]  [k] do_mmap
       0.00%                                [do_mmap+262 -> do_mmap+299]    0                     [kernel.kallsyms]  [k] do_mmap
       0.00%  [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]    7                     [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
       0.00%            [native_sched_clock+0 -> native_sched_clock+119]   -1  ± 38.5% ▄█▁        [kernel.kallsyms]  [k] native_sched_clock
       0.00%                 [native_write_msr+0 -> native_write_msr+16]  -13  ± 47.1% ▁█▇▃▁▁     [kernel.kallsyms]  [k] native_write_msr

 v8:
 ---
 Rebase to perf/core branch

 v7:
 ---
 1. v6 got Jiri's ACK.
 2. Rebase to latest perf/core branch.

 v6:
 ---
 1. Jiri provides better code for using data__hpp_register() in ui_init().
    Use this code in v6.

 v5:
 ---
 1. Refine the use of data__hpp_register() in ui_init() according to
    Jiri's suggestion.

 v4:
 ---
 1. Rename the new option from '--noisy' to '--cycles-hist'
 2. Remove the option '-n'.
 3. Only update the spark value and stats when '--cycles-hist' is enabled.
 4. Remove the code of printing '..'.

 v3:
 ---
 1. Move the histogram to a separate column
 2. Move the svals[] out of struct stats

 v2:
 ---
 Jiri got a compile error,

  CC       builtin-diff.o
  builtin-diff.c: In function ‘compute_cycles_diff’:
  builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value]
  712 |          labs(pair->block_info->cycles_spark[i] -
      |          ^~~~

 Because the result of u64 - u64 is still u64. Now we change the type of
 cycles_spark[] to s64.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Jin Yao authored and Arnaldo Carvalho de Melo committed Oct 11, 2019
1 parent 5554211 commit cebf7d5
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 0 deletions.
5 changes: 5 additions & 0 deletions tools/perf/Documentation/perf-diff.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ OPTIONS
diff.compute config option. See COMPARISON METHODS section for
more info.

--cycles-hist::
Report a histogram and the standard deviation for cycles data.
It can help us to judge if the reported cycles data is noisy or
not. This option should be used with '-c cycles'.

-p::
--period::
Show period values for both compared hist entries.
Expand Down
143 changes: 143 additions & 0 deletions tools/perf/builtin-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "util/time-utils.h"
#include "util/annotate.h"
#include "util/map.h"
#include "util/spark.h"
#include <linux/err.h>
#include <linux/zalloc.h>
#include <subcmd/pager.h>
Expand Down Expand Up @@ -53,6 +54,7 @@ enum {
PERF_HPP_DIFF__FORMULA,
PERF_HPP_DIFF__DELTA_ABS,
PERF_HPP_DIFF__CYCLES,
PERF_HPP_DIFF__CYCLES_HIST,

PERF_HPP_DIFF__MAX_INDEX
};
Expand Down Expand Up @@ -87,6 +89,7 @@ static bool force;
static bool show_period;
static bool show_formula;
static bool show_baseline_only;
static bool cycles_hist;
static unsigned int sort_compute = 1;

static s64 compute_wdiff_w1;
Expand Down Expand Up @@ -164,6 +167,10 @@ static struct header_column {
[PERF_HPP_DIFF__CYCLES] = {
.name = "[Program Block Range] Cycles Diff",
.width = 70,
},
[PERF_HPP_DIFF__CYCLES_HIST] = {
.name = "stddev/Hist",
.width = NUM_SPARKS + 9,
}
};

Expand Down Expand Up @@ -610,6 +617,9 @@ static void init_block_info(struct block_info *bi, struct symbol *sym,
bi->cycles_aggr = ch->cycles_aggr;
bi->num = ch->num;
bi->num_aggr = ch->num_aggr;

memcpy(bi->cycles_spark, ch->cycles_spark,
NUM_SPARKS * sizeof(u64));
}

static int process_block_per_sym(struct hist_entry *he)
Expand Down Expand Up @@ -689,6 +699,21 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
return NULL;
}

static void init_spark_values(unsigned long *svals, int num)
{
for (int i = 0; i < num; i++)
svals[i] = 0;
}

static void update_spark_value(unsigned long *svals, int num,
struct stats *stats, u64 val)
{
int n = stats->n;

if (n < num)
svals[n] = val;
}

static void compute_cycles_diff(struct hist_entry *he,
struct hist_entry *pair)
{
Expand All @@ -697,6 +722,26 @@ static void compute_cycles_diff(struct hist_entry *he,
pair->diff.cycles =
pair->block_info->cycles_aggr / pair->block_info->num_aggr -
he->block_info->cycles_aggr / he->block_info->num_aggr;

if (!cycles_hist)
return;

init_stats(&pair->diff.stats);
init_spark_values(pair->diff.svals, NUM_SPARKS);

for (int i = 0; i < pair->block_info->num; i++) {
u64 val;

if (i >= he->block_info->num || i >= NUM_SPARKS)
break;

val = labs(pair->block_info->cycles_spark[i] -
he->block_info->cycles_spark[i]);

update_spark_value(pair->diff.svals, NUM_SPARKS,
&pair->diff.stats, val);
update_stats(&pair->diff.stats, val);
}
}
}

Expand Down Expand Up @@ -1255,6 +1300,9 @@ static const struct option options[] = {
"Show period values."),
OPT_BOOLEAN('F', "formula", &show_formula,
"Show formula."),
OPT_BOOLEAN(0, "cycles-hist", &cycles_hist,
"Show cycles histogram and standard deviation "
"- WARNING: use only with -c cycles."),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
Expand Down Expand Up @@ -1462,6 +1510,90 @@ static int hpp__color_cycles(struct perf_hpp_fmt *fmt,
return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES);
}

static int all_zero(unsigned long *vals, int len)
{
int i;

for (i = 0; i < len; i++)
if (vals[i] != 0)
return 0;
return 1;
}

static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n)
{
int printed;

if (n <= 1)
return 0;

if (n > NUM_SPARKS)
n = NUM_SPARKS;
if (all_zero(svals, n))
return 0;

printed = print_spark(bf, size, svals, n);
printed += scnprintf(bf + printed, size - printed, " ");
return printed;
}

static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp, struct hist_entry *he)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
struct hist_entry *pair = get_pair_fmt(he, dfmt);
struct block_hist *bh = container_of(he, struct block_hist, he);
struct block_hist *bh_pair;
struct hist_entry *block_he;
char spark[32], buf[128];
double r;
int ret, pad;

if (!pair) {
if (bh->block_idx)
hpp->skip = true;

goto no_print;
}

bh_pair = container_of(pair, struct block_hist, he);

block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx);
if (!block_he) {
hpp->skip = true;
goto no_print;
}

ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals,
block_he->diff.stats.n);

r = rel_stddev_stats(stddev_stats(&block_he->diff.stats),
avg_stats(&block_he->diff.stats));

if (ret) {
/*
* Padding spaces if number of sparks less than NUM_SPARKS
* otherwise the output is not aligned.
*/
pad = NUM_SPARKS - ((ret - 1) / 3);
scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark);
ret = scnprintf(hpp->buf, hpp->size, "%*s",
dfmt->header_width, buf);

if (pad) {
ret += scnprintf(hpp->buf + ret, hpp->size - ret,
"%-*s", pad, " ");
}

return ret;
}

no_print:
return scnprintf(hpp->buf, hpp->size, "%*s",
dfmt->header_width, " ");
}

static void
hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
{
Expand Down Expand Up @@ -1667,6 +1799,10 @@ static void data__hpp_register(struct data__file *d, int idx)
fmt->color = hpp__color_cycles;
fmt->sort = hist_entry__cmp_nop;
break;
case PERF_HPP_DIFF__CYCLES_HIST:
fmt->color = hpp__color_cycles_hist;
fmt->sort = hist_entry__cmp_nop;
break;
default:
fmt->sort = hist_entry__cmp_nop;
break;
Expand All @@ -1692,10 +1828,14 @@ static int ui_init(void)
* PERF_HPP_DIFF__DELTA
* PERF_HPP_DIFF__RATIO
* PERF_HPP_DIFF__WEIGHTED_DIFF
* PERF_HPP_DIFF__CYCLES
*/
data__hpp_register(d, i ? compute_2_hpp[compute] :
PERF_HPP_DIFF__BASELINE);

if (cycles_hist && i)
data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST);

/*
* And the rest:
*
Expand Down Expand Up @@ -1850,6 +1990,9 @@ int cmd_diff(int argc, const char **argv)
if (quiet)
perf_quiet_option();

if (cycles_hist && (compute != COMPUTE_CYCLES))
usage_with_options(diff_usage, options);

symbol__annotation_init();

if (symbol__init(NULL) < 0)
Expand Down
1 change: 1 addition & 0 deletions tools/perf/util/Build
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ perf-y += cloexec.o
perf-y += call-path.o
perf-y += rwsem.o
perf-y += thread-stack.o
perf-y += spark.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-$(CONFIG_AUXTRACE) += intel-pt.o
Expand Down
4 changes: 4 additions & 0 deletions tools/perf/util/annotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,10 @@ static int __symbol__account_cycles(struct cyc_hist *ch,
ch[offset].start < start)
return 0;
}

if (ch[offset].num < NUM_SPARKS)
ch[offset].cycles_spark[ch[offset].num] = cycles;

ch[offset].have_start = have_start;
ch[offset].start = start;
ch[offset].cycles += cycles;
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/util/annotate.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <pthread.h>
#include <asm/bug.h>
#include "symbol_conf.h"
#include "spark.h"

struct hist_browser_timer;
struct hist_entry;
Expand Down Expand Up @@ -235,6 +236,7 @@ struct cyc_hist {
u64 cycles_aggr;
u64 cycles_max;
u64 cycles_min;
s64 cycles_spark[NUM_SPARKS];
u32 num;
u32 num_aggr;
u8 have_start;
Expand Down
4 changes: 4 additions & 0 deletions tools/perf/util/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "callchain.h"
#include "values.h"
#include "hist.h"
#include "stat.h"
#include "spark.h"

struct option;
struct thread;
Expand Down Expand Up @@ -71,6 +73,8 @@ struct hist_entry_diff {
/* PERF_HPP_DIFF__CYCLES */
s64 cycles;
};
struct stats stats;
unsigned long svals[NUM_SPARKS];
};

struct hist_entry_ops {
Expand Down
34 changes: 34 additions & 0 deletions tools/perf/util/spark.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include "spark.h"
#include "stat.h"

#define SPARK_SHIFT 8

/* Print spark lines on outf for numval values in val. */
int print_spark(char *bf, int size, unsigned long *val, int numval)
{
static const char *ticks[NUM_SPARKS] = {
"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"
};
int i, printed = 0;
unsigned long min = ULONG_MAX, max = 0, f;

for (i = 0; i < numval; i++) {
if (val[i] < min)
min = val[i];
if (val[i] > max)
max = val[i];
}
f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1);
if (f < 1)
f = 1;
for (i = 0; i < numval; i++) {
printed += scnprintf(bf + printed, size - printed, "%s",
ticks[((val[i] - min) << SPARK_SHIFT) / f]);
}

return printed;
}
8 changes: 8 additions & 0 deletions tools/perf/util/spark.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef SPARK_H
#define SPARK_H 1

#define NUM_SPARKS 8

int print_spark(char *bf, int size, unsigned long *val, int numval);

#endif
2 changes: 2 additions & 0 deletions tools/perf/util/symbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <stdio.h>
#include "path.h"
#include "symbol_conf.h"
#include "spark.h"

#ifdef HAVE_LIBELF_SUPPORT
#include <libelf.h>
Expand Down Expand Up @@ -111,6 +112,7 @@ struct block_info {
u64 end;
u64 cycles;
u64 cycles_aggr;
s64 cycles_spark[NUM_SPARKS];
int num;
int num_aggr;
refcount_t refcnt;
Expand Down

0 comments on commit cebf7d5

Please sign in to comment.