Skip to content

Commit

Permalink
perf/x86/intel: Add perf core PMU support for Sapphire Rapids
Browse files Browse the repository at this point in the history
Add perf core PMU support for the Intel Sapphire Rapids server, which is
the successor of the Intel Ice Lake server. The enabling code is based
on Ice Lake, but there are several new features introduced.

The event encoding is changed and simplified, e.g., the event codes
which are below 0x90 are restricted to counters 0-3. The event codes
which above 0x90 are likely to have no restrictions. The event
constraints, extra_regs(), and hardware cache events table are changed
accordingly.

A new Precise Distribution (PDist) facility is introduced, which
further minimizes the skid when a precise event is programmed on the GP
counter 0. Enable the Precise Distribution (PDist) facility with :ppp
event. For this facility to work, the period must be initialized with a
value larger than 127. Add spr_limit_period() to apply the limit for
:ppp event.

Two new data source fields, data block & address block, are added in the
PEBS Memory Info Record for the load latency event. To enable the
feature,
- An auxiliary event has to be enabled together with the load latency
  event on Sapphire Rapids. A new flag PMU_FL_MEM_LOADS_AUX is
  introduced to indicate the case. A new event, mem-loads-aux, is
  exposed to sysfs for the user tool.
  Add a check in hw_config(). If the auxiliary event is not detected,
  return an unique error -ENODATA.
- The union perf_mem_data_src is extended to support the new fields.
- Ice Lake and earlier models do not support block information, but the
  fields may be set by HW on some machines. Add pebs_no_block to
  explicitly indicate the previous platforms which don't support the new
  block fields. Accessing the new block fields are ignored on those
  platforms.

A new store Latency facility is introduced, which leverages the PEBS
facility where it can provide additional information about sampled
stores. The additional information includes the data address, memory
auxiliary info (e.g. Data Source, STLB miss) and the latency of the
store access. To enable the facility, the new event (0x02cd) has to be
programed on the GP counter 0. A new flag PERF_X86_EVENT_PEBS_STLAT is
introduced to indicate the event. The store_latency_data() is introduced
to parse the memory auxiliary info.

The layout of access latency field of PEBS Memory Info Record has been
changed. Two latency, instruction latency (bit 15:0) and cache access
latency (bit 47:32) are recorded.
- The cache access latency is similar to previous memory access latency.
  For loads, the latency starts by the actual cache access until the
  data is returned by the memory subsystem.
  For stores, the latency starts when the demand write accesses the L1
  data cache and lasts until the cacheline write is completed in the
  memory subsystem.
  The cache access latency is stored in low 32bits of the sample type
  PERF_SAMPLE_WEIGHT_STRUCT.
- The instruction latency starts by the dispatch of the load operation
  for execution and lasts until completion of the instruction it belongs
  to.
  Add a new flag PMU_FL_INSTR_LATENCY to indicate the instruction
  latency support. The instruction latency is stored in the bit 47:32
  of the sample type PERF_SAMPLE_WEIGHT_STRUCT.

Extends the PERF_METRICS MSR to feature TMA method level 2 metrics. The
lower half of the register is the TMA level 1 metrics (legacy). The
upper half is also divided into four 8-bit fields for the new level 2
metrics. Expose all eight Topdown metrics events to user space.

The full description for the SPR features can be found at Intel
Architecture Instruction Set Extensions and Future Features
Programming Reference, 319433-041.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1611873611-156687-5-git-send-email-kan.liang@linux.intel.com
  • Loading branch information
Kan Liang authored and Peter Zijlstra committed Feb 1, 2021
1 parent 1ab5f23 commit 61b985e
Show file tree
Hide file tree
Showing 5 changed files with 443 additions and 14 deletions.
307 changes: 302 additions & 5 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,55 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};

static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};

static struct event_constraint intel_spr_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),

INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
/*
* Generally event codes < 0x90 are restricted to counters 0-3.
* The 0x2E and 0x3C are exception, which has no restriction.
*/
INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),

INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
INTEL_EVENT_CONSTRAINT(0xce, 0x1),
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
/*
* Generally event codes >= 0x90 are likely to have no restrictions.
* The exception are defined as above.
*/
INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),

EVENT_CONSTRAINT_END
};


EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
Expand Down Expand Up @@ -314,11 +363,15 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");

EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84");
EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85");
EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86");
EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87");

static struct attribute *snb_events_attrs[] = {
EVENT_PTR(td_slots_issued),
Expand Down Expand Up @@ -384,6 +437,108 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}

static __initconst const u64 spr_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0,
[ C(RESULT_MISS) ] = 0xe124,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_MISS) ] = 0xe424,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x12a,
[ C(RESULT_MISS) ] = 0x12a,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x12a,
[ C(RESULT_MISS) ] = 0x12a,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0,
[ C(RESULT_MISS) ] = 0xe12,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0,
[ C(RESULT_MISS) ] = 0xe13,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = 0xe11,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x4c4,
[ C(RESULT_MISS) ] = 0x4c5,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x12a,
[ C(RESULT_MISS) ] = 0x12a,
},
},
};

static __initconst const u64 spr_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x10001,
[ C(RESULT_MISS) ] = 0x3fbfc00001,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
[ C(RESULT_MISS) ] = 0x3f3fc00002,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x10c000001,
[ C(RESULT_MISS) ] = 0x3fb3000001,
},
},
};

/*
* Notes on the events:
* - data reads do not include code reads (comparable to earlier tables)
Expand Down Expand Up @@ -3478,6 +3633,17 @@ static bool is_available_metric_event(struct perf_event *event)
event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
}

static inline bool is_mem_loads_event(struct perf_event *event)
{
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
}

static inline bool is_mem_loads_aux_event(struct perf_event *event)
{
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
}


static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
Expand Down Expand Up @@ -3580,6 +3746,33 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
}

/*
* The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
* doesn't function quite right. As a work-around it needs to always be
* co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
* The actual count of this second event is irrelevant it just needs
* to be active to make the first event function correctly.
*
* In a group, the auxiliary event must be in front of the load latency
* event. The rule is to simplify the implementation of the check.
* That's because perf cannot have a complete group at the moment.
*/
if (x86_pmu.flags & PMU_FL_MEM_LOADS_AUX &&
(event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
is_mem_loads_event(event)) {
struct perf_event *leader = event->group_leader;
struct perf_event *sibling = NULL;

if (!is_mem_loads_aux_event(leader)) {
for_each_sibling_event(sibling, leader) {
if (is_mem_loads_aux_event(sibling))
break;
}
if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
return -ENODATA;
}
}

if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0;

Expand Down Expand Up @@ -3759,6 +3952,29 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return hsw_get_event_constraints(cpuc, idx, event);
}

static struct event_constraint *
spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;

c = icl_get_event_constraints(cpuc, idx, event);

/*
* The :ppp indicates the Precise Distribution (PDist) facility, which
* is only supported on the GP counter 0. If a :ppp event which is not
* available on the GP counter 0, error out.
*/
if (event->attr.precise_ip == 3) {
if (c->idxmsk64 & BIT_ULL(0))
return &counter0_constraint;

return &emptyconstraint;
}

return c;
}

static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
Expand Down Expand Up @@ -3848,6 +4064,14 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left)
return max(left, 32ULL);
}

static u64 spr_limit_period(struct perf_event *event, u64 left)
{
if (event->attr.precise_ip == 3)
return max(left, 128ULL);

return left;
}

PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
Expand Down Expand Up @@ -4559,6 +4783,42 @@ static struct attribute *icl_tsx_events_attrs[] = {
NULL,
};


EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2");
EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82");

static struct attribute *spr_events_attrs[] = {
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_spr),
EVENT_PTR(mem_ld_aux),
NULL,
};

static struct attribute *spr_td_events_attrs[] = {
EVENT_PTR(slots),
EVENT_PTR(td_retiring),
EVENT_PTR(td_bad_spec),
EVENT_PTR(td_fe_bound),
EVENT_PTR(td_be_bound),
EVENT_PTR(td_heavy_ops),
EVENT_PTR(td_br_mispredict),
EVENT_PTR(td_fetch_lat),
EVENT_PTR(td_mem_bound),
NULL,
};

static struct attribute *spr_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_abort),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_capacity_read),
EVENT_PTR(tx_capacity_write),
EVENT_PTR(tx_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
NULL,
};

static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
Expand Down Expand Up @@ -5341,6 +5601,43 @@ __init int intel_pmu_init(void)
name = "icelake";
break;

case INTEL_FAM6_SAPPHIRERAPIDS_X:
pmem = true;
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

x86_pmu.event_constraints = intel_spr_event_constraints;
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
x86_pmu.extra_regs = intel_spr_extra_regs;
x86_pmu.limit_period = spr_limit_period;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.pebs_block = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;

x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = spr_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_skl_attr = skl_format_attr;
mem_attr = spr_events_attrs;
td_attr = spr_td_events_attrs;
tsx_attr = spr_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 8;
x86_pmu.update_topdown_event = icl_update_topdown_event;
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
pr_cont("Sapphire Rapids events, ");
name = "sapphire_rapids";
break;

default:
switch (x86_pmu.version) {
case 1:
Expand Down
Loading

0 comments on commit 61b985e

Please sign in to comment.