diff --git a/[refs] b/[refs] index bf43b958fe89..41e04218010b 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: c3305257cd4df63e03e21e331a0140ae9c0faccc +refs/heads/master: d6971822c288ce5547190c6f812cc978d6520777 diff --git a/trunk/scripts/Makefile.build b/trunk/scripts/Makefile.build index fdca952f6a40..6165622c3e29 100644 --- a/trunk/scripts/Makefile.build +++ b/trunk/scripts/Makefile.build @@ -255,6 +255,8 @@ sub_cmd_record_mcount = \ if [ $(@) != "scripts/mod/empty.o" ]; then \ $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \ fi; +recordmcount_source := $(srctree)/scripts/recordmcount.c \ + $(srctree)/scripts/recordmcount.h else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ @@ -262,6 +264,7 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; +recordmcount_source := $(srctree)/scripts/recordmcount.pl endif cmd_record_mcount = \ if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ @@ -282,13 +285,13 @@ define rule_cc_o_c endef # Built-in and composite module parts -$(obj)/%.o: $(src)/%.c FORCE +$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) # Single-part modules are special since we need to mark them in $(MODVERDIR) -$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE +$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod) diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c index a9f06715e44d..602c3c96fa1e 100644 --- a/trunk/tools/perf/builtin-stat.c +++ b/trunk/tools/perf/builtin-stat.c @@ -6,28 +6,24 @@ * * Sample output: - $ perf stat ./hackbench 10 + $ perf stat ~/hackbench 10 + Time: 0.104 - Time: 0.118 + Performance counter stats for '/home/mingo/hackbench': - Performance counter stats for './hackbench 10': + 1255.538611 task clock ticks # 10.143 CPU utilization factor + 54011 context switches # 0.043 M/sec + 385 CPU migrations # 0.000 M/sec + 17755 pagefaults # 0.014 M/sec + 3808323185 CPU cycles # 3033.219 M/sec + 1575111190 instructions # 1254.530 M/sec + 17367895 cache references # 13.833 M/sec + 7674421 cache misses # 6.112 M/sec - 1708.761321 task-clock # 11.037 CPUs utilized - 41,190 context-switches # 0.024 M/sec - 6,735 CPU-migrations # 0.004 M/sec - 17,318 page-faults # 0.010 M/sec - 5,205,202,243 cycles # 3.046 GHz - 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle - 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle - 2,603,501,247 instructions # 0.50 insns per cycle - # 1.48 stalled cycles per insn - 484,357,498 branches # 283.455 M/sec - 6,388,934 branch-misses # 1.32% of all branches - - 0.154822978 seconds time elapsed + Wall-clock time elapsed: 123.786620 msecs * - * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar * * Improvements and fixes by: * @@ -79,10 +75,22 @@ static struct perf_event_attr default_attrs[] = { }; /* - * Detailed stats (-d), covering the L1 and last level data caches: + * Detailed stats: */ static struct perf_event_attr detailed_attrs[] = { + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D << 0 | @@ -108,69 +116,6 @@ static struct perf_event_attr detailed_attrs[] = { (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; -/* - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: - */ -static struct perf_event_attr very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - -}; - -/* - * Very, very detailed stats (-d -d -d), adding prefetch events: - */ -static struct perf_event_attr very_very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; - - - struct perf_evlist *evsel_list; static bool system_wide = false; @@ -184,7 +129,7 @@ static pid_t target_pid = -1; static pid_t target_tid = -1; static pid_t child_pid = -1; static bool null_run = false; -static int detailed_run = 0; +static bool detailed_run = false; static bool sync_run = false; static bool big_num = true; static int big_num_opt = -1; @@ -261,10 +206,6 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; struct stats runtime_branches_stats[MAX_NR_CPUS]; struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; -struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; -struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; -struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; -struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; struct stats walltime_nsecs_stats; static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -321,14 +262,6 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) update_stats(&runtime_cacherefs_stats[0], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) update_stats(&runtime_l1_dcache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_l1_icache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[0], count[0]); } /* @@ -531,7 +464,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; char cpustr[16] = { '\0', }; - const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; + const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; if (no_aggr) sprintf(cpustr, "CPU%*d%s", @@ -642,98 +575,6 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(stderr, " of all L1-dcache hits "); } -static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_l1_icache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all L1-icache hits "); -} - -static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_dtlb_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all dTLB cache hits "); -} - -static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_itlb_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all iTLB cache hits "); -} - -static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_ll_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all LL-cache hits "); -} - static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0; @@ -743,9 +584,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (csv_output) fmt = "%s%.0f%s%s"; else if (big_num) - fmt = "%s%'18.0f%s%-25s"; + fmt = "%s%'18.0f%s%-24s"; else - fmt = "%s%18.0f%s%-25s"; + fmt = "%s%18.0f%s%-24s"; if (no_aggr) sprintf(cpustr, "CPU%*d%s", @@ -775,7 +616,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total && avg) { ratio = total / avg; - fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -788,34 +629,6 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_l1_dcache_stats[cpu].n != 0) { print_l1_dcache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[cpu].n != 0) { - print_l1_icache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[cpu].n != 0) { - print_dtlb_cache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[cpu].n != 0) { - print_itlb_cache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[cpu].n != 0) { - print_ll_cache_misses(cpu, evsel, avg); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && runtime_cacherefs_stats[cpu].n != 0) { total = avg_stats(&runtime_cacherefs_stats[cpu]); @@ -891,7 +704,7 @@ static void print_counter_aggr(struct perf_evsel *counter) avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled); + fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled); } fprintf(stderr, "\n"); } @@ -974,12 +787,10 @@ static void print_stat(int argc, const char **argv) } if (!csv_output) { - if (!null_run) - fprintf(stderr, "\n"); - fprintf(stderr, " %17.9f seconds time elapsed", + fprintf(stderr, "\n"); + fprintf(stderr, " %18.9f seconds time elapsed", avg_stats(&walltime_nsecs_stats)/1e9); if (run_count > 1) { - fprintf(stderr, " "); print_noise_pct(stddev_stats(&walltime_nsecs_stats), avg_stats(&walltime_nsecs_stats)); } @@ -1043,7 +854,7 @@ static const struct option options[] = { "repeat command and print average + stddev (max: 100)"), OPT_BOOLEAN('n', "null", &null_run, "null run - dont start any counters"), - OPT_INCR('d', "detailed", &detailed_run, + OPT_BOOLEAN('d', "detailed", &detailed_run, "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), @@ -1062,70 +873,6 @@ static const struct option options[] = { OPT_END() }; -/* - * Add default attributes, if there were no attributes specified or - * if -d/--detailed, -d -d or -d -d -d is used: - */ -static int add_default_attributes(void) -{ - struct perf_evsel *pos; - size_t attr_nr = 0; - size_t c; - - /* Set attrs if no event is selected and !null_run: */ - if (null_run) - return 0; - - if (!evsel_list->nr_entries) { - for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { - pos = perf_evsel__new(default_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - attr_nr += c; - } - - /* Detailed events get appended to the event list: */ - - if (detailed_run < 1) - return 0; - - /* Append detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { - pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - attr_nr += c; - - if (detailed_run < 2) - return 0; - - /* Append very detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { - pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - - if (detailed_run < 3) - return 0; - - /* Append very, very detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { - pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - - - return 0; -} - int cmd_stat(int argc, const char **argv, const char *prefix __used) { struct perf_evsel *pos; @@ -1171,8 +918,28 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) usage_with_options(stat_usage, options); } - if (add_default_attributes()) - goto out; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (detailed_run) { + size_t c; + + for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) { + pos = perf_evsel__new(&detailed_attrs[c], c); + if (pos == NULL) + goto out; + perf_evlist__add(evsel_list, pos); + } + } + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!detailed_run && !null_run && !evsel_list->nr_entries) { + size_t c; + + for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { + pos = perf_evsel__new(&default_attrs[c], c); + if (pos == NULL) + goto out; + perf_evlist__add(evsel_list, pos); + } + } if (target_pid != -1) target_tid = target_pid;