Skip to content

Commit

Permalink
perf tools: Make perf.data more self-descriptive (v8)
Browse files Browse the repository at this point in the history
The goal of this patch is to include more information about the host
environment into the perf.data so it is more self-descriptive. Overtime,
profiles are captured on various machines and it becomes hard to track
what was recorded, on what machine and when.

This patch provides a way to solve this by extending the perf.data file
with basic information about the host machine. To add those extensions,
we leverage the feature bits capabilities of the perf.data format.  The
change is backward compatible with existing perf.data files.

We define the following useful new extensions:
 - HEADER_HOSTNAME: the hostname
 - HEADER_OSRELEASE: the kernel release number
 - HEADER_ARCH: the hw architecture
 - HEADER_CPUDESC: generic CPU description
 - HEADER_NRCPUS: number of online/avail cpus
 - HEADER_CMDLINE: perf command line
 - HEADER_VERSION: perf version
 - HEADER_TOPOLOGY: cpu topology
 - HEADER_EVENT_DESC: full event description (attrs)
 - HEADER_CPUID: easy-to-parse low level CPU identication

The small granularity for the entries is to make it easier to extend
without breaking backward compatiblity. Many entries are provided as
ASCII strings.

Perf report/script have been modified to print the basic information as
easy-to-parse ASCII strings. Extended information about CPU and NUMA
topology may be requested with the -I option.

Thanks to David Ahern for reviewing and testing the many versions of
this patch.

 $ perf report --stdio
 # ========
 # captured on : Mon Sep 26 15:22:14 2011
 # hostname : quad
 # os release : 3.1.0-rc4-tip
 # perf version : 3.1.0-rc4
 # arch : x86_64
 # nrcpus online : 4
 # nrcpus avail : 4
 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz
 # cpuid : GenuineIntel,6,15,11
 # total memory : 8105360 kB
 # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date
 # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31,
 # HEADER_CPU_TOPOLOGY info available, use -I to display
 # HEADER_NUMA_TOPOLOGY info available, use -I to display
 # ========
 #
 ...

 $ perf report --stdio -I
 # ========
 # captured on : Mon Sep 26 15:22:14 2011
 # hostname : quad
 # os release : 3.1.0-rc4-tip
 # perf version : 3.1.0-rc4
 # arch : x86_64
 # nrcpus online : 4
 # nrcpus avail : 4
 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz
 # cpuid : GenuineIntel,6,15,11
 # total memory : 8105360 kB
 # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date
 # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31,
 # sibling cores   : 0-3
 # sibling threads : 0
 # sibling threads : 1
 # sibling threads : 2
 # sibling threads : 3
 # node0 meminfo  : total = 8320608 kB, free = 7571024 kB
 # node0 cpu list : 0-3
 # ========
 #
 ...

Reviewed-by: David Ahern <dsahern@gmail.com>
Tested-by: David Ahern <dsahern@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20110930134040.GA5575@quad
Signed-off-by: Stephane Eranian <eranian@google.com>
[ committer notes: Use --show-info in the tools as was in the docs, rename
  perf_header_fprintf_info to perf_file_section__fprintf_info, fixup
  conflict with f69b64f "perf: Support setting the disassembler style" ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Stephane Eranian authored and Arnaldo Carvalho de Melo committed Oct 7, 2011
1 parent be83f5e commit fbe96f2
Show file tree
Hide file tree
Showing 15 changed files with 1,308 additions and 35 deletions.
6 changes: 6 additions & 0 deletions tools/perf/Documentation/perf-report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ OPTIONS

--show-total-period:: Show a column with the sum of periods.

-I::
--show-info::
Display extended information about the perf.data file. This adds
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.

SEE ALSO
--------
linkperf:perf-stat[1]
7 changes: 7 additions & 0 deletions tools/perf/Documentation/perf-script.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ OPTIONS
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.

-I::
--show-info::
Display extended information about the perf.data file. This adds
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
It can only be used with the perf script report mode.

SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
Expand Down
1 change: 1 addition & 0 deletions tools/perf/arch/powerpc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
36 changes: 36 additions & 0 deletions tools/perf/arch/powerpc/util/header.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "../../util/header.h"

#define __stringify_1(x) #x
#define __stringify(x) __stringify_1(x)

#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
: "=r" (rval)); rval; })

#define SPRN_PVR 0x11F /* Processor Version Register */
#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */

int
get_cpuid(char *buffer, size_t sz)
{
unsigned long pvr;
int nb;

pvr = mfspr(SPRN_PVR);

nb = snprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));

/* look for end marker to ensure the entire data fit */
if (strchr(buffer, '$')) {
buffer[nb-1] = '\0';
return 0;
}
return -1;
}
1 change: 1 addition & 0 deletions tools/perf/arch/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
59 changes: 59 additions & 0 deletions tools/perf/arch/x86/util/header.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "../../util/header.h"

static inline void
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
unsigned int *d)
{
__asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
"movl %%ebx, %%esi\n\t.byte 0x5b"
: "=a" (*a),
"=S" (*b),
"=c" (*c),
"=d" (*d)
: "a" (op));
}

int
get_cpuid(char *buffer, size_t sz)
{
unsigned int a, b, c, d, lvl;
int family = -1, model = -1, step = -1;
int nb;
char vendor[16];

cpuid(0, &lvl, &b, &c, &d);
strncpy(&vendor[0], (char *)(&b), 4);
strncpy(&vendor[4], (char *)(&d), 4);
strncpy(&vendor[8], (char *)(&c), 4);
vendor[12] = '\0';

if (lvl >= 1) {
cpuid(1, &a, &b, &c, &d);

family = (a >> 8) & 0xf; /* bits 11 - 8 */
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
step = a & 0xf;

/* extended family */
if (family == 0xf)
family += (a >> 20) & 0xff;

/* extended model */
if (family >= 0x6)
model += ((a >> 16) & 0xf) << 4;
}
nb = snprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);

/* look for end marker to ensure the entire data fit */
if (strchr(buffer, '$')) {
buffer[nb-1] = '\0';
return 0;
}
return -1;
}
15 changes: 15 additions & 0 deletions tools/perf/builtin-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,19 @@ static int __cmd_record(int argc, const char **argv)
if (have_tracepoints(&evsel_list->entries))
perf_header__set_feat(&session->header, HEADER_TRACE_INFO);

perf_header__set_feat(&session->header, HEADER_HOSTNAME);
perf_header__set_feat(&session->header, HEADER_OSRELEASE);
perf_header__set_feat(&session->header, HEADER_ARCH);
perf_header__set_feat(&session->header, HEADER_CPUDESC);
perf_header__set_feat(&session->header, HEADER_NRCPUS);
perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
perf_header__set_feat(&session->header, HEADER_CMDLINE);
perf_header__set_feat(&session->header, HEADER_VERSION);
perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_CPUID);

/* 512 kiB: default amount of unprivileged mlocked memory */
if (mmap_pages == UINT_MAX)
mmap_pages = (512 * 1024) / page_size;
Expand Down Expand Up @@ -800,6 +813,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
int err = -ENOMEM;
struct perf_evsel *pos;

perf_header__set_cmdline(argc, argv);

evsel_list = perf_evlist__new(NULL, NULL);
if (evsel_list == NULL)
return -ENOMEM;
Expand Down
6 changes: 6 additions & 0 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ static char const *input_name = "perf.data";
static bool force, use_tui, use_stdio;
static bool hide_unresolved;
static bool dont_use_callchains;
static bool show_full_info;

static bool show_threads;
static struct perf_read_values show_threads_values;
Expand Down Expand Up @@ -273,6 +274,9 @@ static int __cmd_report(void)
goto out_delete;
}

if (use_browser <= 0)
perf_session__fprintf_info(session, stdout, show_full_info);

if (show_threads)
perf_read_values_init(&show_threads_values);

Expand Down Expand Up @@ -485,6 +489,8 @@ static const struct option options[] = {
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"Display extended information about perf.data file"),
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
Expand Down
6 changes: 5 additions & 1 deletion tools/perf/builtin-script.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ static u64 last_timestamp;
static u64 nr_unordered;
extern const struct option record_options[];
static bool no_callchain;
static bool show_full_info;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);

Expand Down Expand Up @@ -1083,7 +1084,8 @@ static const struct option options[] = {
"comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
parse_output_fields),
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),

OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_END()
};

Expand Down Expand Up @@ -1268,6 +1270,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
return -1;
}

perf_session__fprintf_info(session, stdout, show_full_info);

if (!no_callchain)
symbol_conf.use_callchain = true;
else
Expand Down
1 change: 0 additions & 1 deletion tools/perf/builtin.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "util/util.h"
#include "util/strbuf.h"

extern const char perf_version_string[];
extern const char perf_usage_string[];
extern const char perf_more_info_string[];

Expand Down
11 changes: 11 additions & 0 deletions tools/perf/perf.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,21 @@ void get_term_dimensions(struct winsize *ws);
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#endif

#if defined(__x86_64__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#endif

#ifdef __powerpc__
#include "../../arch/powerpc/include/asm/unistd.h"
#define rmb() asm volatile ("sync" ::: "memory")
#define cpu_relax() asm volatile ("" ::: "memory");
#define CPUINFO_PROC "cpu"
#endif

#ifdef __s390__
Expand All @@ -37,30 +40,35 @@ void get_term_dimensions(struct winsize *ws);
# define rmb() asm volatile("" ::: "memory")
#endif
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu type"
#endif

#ifdef __hppa__
#include "../../arch/parisc/include/asm/unistd.h"
#define rmb() asm volatile("" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory");
#define CPUINFO_PROC "cpu"
#endif

#ifdef __sparc__
#include "../../arch/sparc/include/asm/unistd.h"
#define rmb() asm volatile("":::"memory")
#define cpu_relax() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif

#ifdef __alpha__
#include "../../arch/alpha/include/asm/unistd.h"
#define rmb() asm volatile("mb" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif

#ifdef __ia64__
#include "../../arch/ia64/include/asm/unistd.h"
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
#endif

#ifdef __arm__
Expand All @@ -71,6 +79,7 @@ void get_term_dimensions(struct winsize *ws);
*/
#define rmb() ((void(*)(void))0xffff0fa0)()
#define cpu_relax() asm volatile("":::"memory")
#define CPUINFO_PROC "Processor"
#endif

#ifdef __mips__
Expand All @@ -83,6 +92,7 @@ void get_term_dimensions(struct winsize *ws);
: /* no input */ \
: "memory")
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif

#include <time.h>
Expand Down Expand Up @@ -171,5 +181,6 @@ struct ip_callchain {
};

extern bool perf_host, perf_guest;
extern const char perf_version_string[];

#endif
Loading

0 comments on commit fbe96f2

Please sign in to comment.