Skip to content

Commit

Permalink
perf/x86/intel: Add mem-loads/stores support for Haswell
Browse files Browse the repository at this point in the history
mem-loads is basically the same as Sandy Bridge,
but we use a separate string for changes later.

Haswell doesn't support the full precise store mode,
so we emulate it using the "DataLA" facility.
This allows to do everything, but for data sources we
can only detect L1 hit or not.

There is no explicit enable bit anymore, so we have
to tie it to a perf internal only flag.

The address is supported for all memory related PEBS
events with DataLA. Instead of only logging for the
load and store events we allow logging it for all
(it will be simply 0 if the current event does not
support it)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andi Kleen <ak@linux.jf.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/1371515812-9646-7-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Andi Kleen authored and Ingo Molnar committed Jun 19, 2013
1 parent 135c561 commit f9134f3
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
6 changes: 6 additions & 0 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct event_constraint {
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */

struct amd_nb {
int nb_id; /* NorthBridge id */
Expand Down Expand Up @@ -250,6 +251,11 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)

/* DataLA version of store sampling without extra enable bit. */
#define INTEL_PST_HSW_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)

#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)

Expand Down
10 changes: 10 additions & 0 deletions arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -2036,6 +2036,15 @@ static __init void intel_nehalem_quirk(void)
}
}

EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")

static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
NULL
};

__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
Expand Down Expand Up @@ -2279,6 +2288,7 @@ __init int intel_pmu_init(void)

x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
pr_cont("Haswell events, ");
break;

Expand Down
32 changes: 25 additions & 7 deletions arch/x86/kernel/cpu/perf_event_intel_ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
return val;
}

static u64 precise_store_data_hsw(u64 status)
{
union perf_mem_data_src dse;

dse.val = 0;
dse.mem_op = PERF_MEM_OP_STORE;
dse.mem_lvl = PERF_MEM_LVL_NA;
if (status & 1)
dse.mem_lvl = PERF_MEM_LVL_L1;
/* Nothing else supported. Sorry. */
return dse.val;
}

static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
Expand Down Expand Up @@ -566,13 +579,13 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {

struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
/* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
/* MEM_UOPS_RETIRED.STLB_MISS_STORES */
Expand All @@ -582,7 +595,7 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
/* MEM_UOPS_RETIRED.SPLIT_STORES */
INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
Expand Down Expand Up @@ -759,7 +772,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;

fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
PERF_X86_EVENT_PEBS_ST_HSW);

perf_sample_data_init(&data, 0, event->hw.last_period);

Expand All @@ -770,9 +784,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* if PEBS-LL or PreciseStore
*/
if (fll || fst) {
if (sample_type & PERF_SAMPLE_ADDR)
data.addr = pebs->dla;

/*
* Use latency for weight (only avail with PEBS-LL)
*/
Expand All @@ -785,6 +796,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC) {
if (fll)
data.data_src.val = load_latency_data(pebs->dse);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
data.data_src.val =
precise_store_data_hsw(pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
Expand Down Expand Up @@ -814,6 +828,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;

if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
x86_pmu.intel_cap.pebs_format >= 1)
data.addr = pebs->dla;

if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;

Expand Down

0 comments on commit f9134f3

Please sign in to comment.