Skip to content

Commit

Permalink
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull perf events update from Ingo Molnar:
 "On the kernel side there's few changes, the one that stands out is
  PEBS machine state sampling support on x86, by Stephane Eranian.

  On the tooling side:

  User visible tooling changes:

   - Don't open the DWARF info multiple times, keeping instead a dwfl
     handle in struct dso, greatly speeding up 'perf report' on powerpc.
     (Sukadev Bhattiprolu)

   - Introduce PARSE_OPT_DISABLED option flag and use it to avoid
     showing undersired options in tools that provides frontends to
     'perf record', like sched, kvm, etc (Namhyung Kim)

   - Fallback to kallsyms when using the minimal 'ELF' loader (Arnaldo
     Carvalho de Melo)

   - Fix annotation with kcore (Adrian Hunter)

   - Support source line numbers in annotate using a hotkey (Andi Kleen)

   - Callchain improvements including:
     * Enable printing the srcline in the history
     * Make get_srcline fall back to sym+offset (Andi Kleen)

   - TUI hist_entry browser fixes, including showing missing overhead
     value for first level callchain.  Detected comparing the output of
     --stdio/--gui (that matched) with --tui, that had this problem.
     (Namhyung Kim)

   - Support handling complete branch stacks as histograms (Andi Kleen)

  Tooling infrastructure changes:

   - Prep work for supporting per-pkg and snapshot counters in 'perf
     stat' (Jiri Olsa)

   - 'perf stat' refactorings, moving stuff from it to evsel.c to use in
     per-pkg/snapshot format changes (Jiri Olsa)

   - Add per-pkg format file parsing (Matt Fleming)

   - Clean up libelf feature support code (Namhyung Kim)

   - Add gzip decompression support for kernel modules (Namhyung Kim)

   - More prep patches for Intel PT, including a a thread stack and more
     stuff made available via the database export mechanism (Adrian
     Hunter)

   - More Intel PT work, including a facility to export sample data
     (comms, threads, symbol names, etc) in a database friendly way,
     with an script to use this to create a postgresql database.
     (Adrian Hunter)

   - Make sure that thread->mg->machine points to the machine where the
     thread exists (it was being set only for the kmaps kernel modules
     case, do it as well for the mmaps) and use it to shorten function
     signatures (Arnaldo Carvalho de Melo)

  ... and lots of other fixes and smaller improvements"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (91 commits)
  perf report: In branch stack mode use address history sorting
  perf report: Add --branch-history option
  perf callchain: Support handling complete branch stacks as histograms
  perf stat: Add support for snapshot counters
  perf stat: Add support for per-pkg counters
  perf tools: Remove perf_evsel__read interface
  perf stat: Use read_counter in read_counter_aggr
  perf stat: Make read_counter work over the thread dimension
  perf stat: Use perf_evsel__read_cb in read_counter
  perf tools: Add snapshot format file parsing
  perf tools: Add per-pkg format file parsing
  perf evsel: Introduce perf_evsel__read_cb function
  perf evsel: Introduce perf_counts_values__scale function
  perf evsel: Introduce perf_evsel__compute_deltas function
  perf tools: Allow to force redirect pr_debug to stderr.
  perf tools: Fix segfault due to invalid kernel dso access
  perf callchain: Make get_srcline fall back to sym+offset
  perf symbols: Move bfd_demangle stubbing to its only user
  perf callchain: Enable printing the srcline in the history
  perf tools: Collapse first level callchain entry if it has sibling
  ...
  • Loading branch information
Linus Torvalds committed Dec 10, 2014
2 parents c301106 + cfa0bd5 commit 5706ffd
Show file tree
Hide file tree
Showing 99 changed files with 4,723 additions and 1,002 deletions.
3 changes: 3 additions & 0 deletions arch/x86/include/asm/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ struct x86_pmu_capability {
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
#define IBS_CAPS_OPBRNFUSE (1U<<8)
#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
#define IBS_CAPS_OPDATA4 (1U<<10)

#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/uapi/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */

/* Fam 16h MSRs */
Expand Down
4 changes: 4 additions & 0 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

/* Like UEVENT_CONSTRAINT, but match flags too */
#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)

#define INTEL_PLD_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/kernel/cpu/perf_event_amd_ibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,21 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
/*
* Read IbsBrTarget and IbsOpData4 separately
* depending on their availability.
* Can't add to offset_max as they are staggered
*/
if (ibs_caps & IBS_CAPS_BRNTRGT) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
size++;
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
size++;
}
}
ibs_data.size = sizeof(u64) * size;

regs = *iregs;
Expand Down
81 changes: 52 additions & 29 deletions arch/x86/kernel/cpu/perf_event_intel_ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,18 +552,18 @@ int intel_pmu_drain_bts_buffer(void)
* PEBS
*/
struct event_constraint intel_core2_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};

struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};

Expand All @@ -577,36 +577,36 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {

struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
EVENT_CONSTRAINT_END
};

struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
EVENT_CONSTRAINT_END
};

struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
Expand All @@ -617,7 +617,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
};

struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
Expand All @@ -628,7 +628,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
};

struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
Expand Down Expand Up @@ -886,6 +886,29 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;

if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs.ax = pebs->ax;
regs.bx = pebs->bx;
regs.cx = pebs->cx;
regs.dx = pebs->dx;
regs.si = pebs->si;
regs.di = pebs->di;
regs.bp = pebs->bp;
regs.sp = pebs->sp;

regs.flags = pebs->flags;
#ifndef CONFIG_X86_32
regs.r8 = pebs->r8;
regs.r9 = pebs->r9;
regs.r10 = pebs->r10;
regs.r11 = pebs->r11;
regs.r12 = pebs->r12;
regs.r13 = pebs->r13;
regs.r14 = pebs->r14;
regs.r15 = pebs->r15;
#endif
}

if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,11 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
static struct uncore_event_desc snbep_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
{ /* end: all zeroes */ },
};

Expand Down Expand Up @@ -2036,7 +2040,11 @@ static struct intel_uncore_type hswep_uncore_ha = {
static struct uncore_event_desc hswep_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
{ /* end: all zeroes */ },
};

Expand Down
37 changes: 20 additions & 17 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct perf_branch_stack {
struct perf_branch_entry entries[0];
};

struct perf_regs_user {
struct perf_regs {
__u64 abi;
struct pt_regs *regs;
};
Expand Down Expand Up @@ -580,34 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event,


struct perf_sample_data {
u64 type;
/*
* Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched.
*/
u64 addr;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
u64 period;
u64 weight;
u64 txn;
union perf_mem_data_src data_src;

/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
*/
u64 type;
u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
u64 addr;
u64 id;
u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
u64 period;
union perf_mem_data_src data_src;
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
struct perf_regs_user regs_user;
struct perf_regs regs_user;
struct perf_regs regs_intr;
u64 stack_user_size;
u64 weight;
/*
* Transaction flags for abort events:
*/
u64 txn;
};
} ____cacheline_aligned;

/* default value for data source */
#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
Expand All @@ -624,9 +630,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->raw = NULL;
data->br_stack = NULL;
data->period = period;
data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
data->regs_user.regs = NULL;
data->stack_user_size = 0;
data->weight = 0;
data->data_src.val = PERF_MEM_NA;
data->txn = 0;
Expand Down
15 changes: 14 additions & 1 deletion include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_DATA_SRC = 1U << 15,
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,

PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
};

/*
Expand Down Expand Up @@ -238,6 +239,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */

/*
* Hardware event_id to monitor via a performance monitoring event:
Expand Down Expand Up @@ -334,6 +336,15 @@ struct perf_event_attr {

/* Align to u64. */
__u32 __reserved_2;
/*
* Defines set of regs to dump for each sample
* state captured on:
* - precise = 0: PMU interrupt
* - precise > 0: sampled instruction
*
* See asm/perf_regs.h for details.
*/
__u64 sample_regs_intr;
};

#define perf_flags(attr) (*(&(attr)->read_format + 1))
Expand Down Expand Up @@ -686,6 +697,8 @@ enum perf_event_type {
* { u64 weight; } && PERF_SAMPLE_WEIGHT
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* };
*/
PERF_RECORD_SAMPLE = 9,
Expand Down
Loading

0 comments on commit 5706ffd

Please sign in to comment.