-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf/x86: Add support for Intel Xeon-Phi Knights Corner PMU
The following patch adds perf_event support for the Xeon-Phi PMU, as documented in the "Intel Xeon Phi Coprocessor (codename: Knights Corner) Performance Monitoring Units" manual. Even though it is a co-processor, a Phi runs a full Linux environment and can support performance counters. This is just barebones support, it does not add support for interesting new features such as the SPFLT intruction that allows starting/stopping events without entering the kernel. The PMU internally is just like that of an original Pentium, but a "P6-like" MSR interface is provided. The interface is different enough from a real P6 that it's not easy (or practical) to re-use the code in perf_event_p6.c Acked-by: Lawrence F Meadows <lawrence.f.meadows@intel.com> Acked-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Vince Weaver <vincent.weaver@maine.edu> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Cc: eranian@gmail.com Cc: Lawrence F <lawrence.f.meadows@intel.com> Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1209261405320.8398@vincent-weaver-1.um.maine.edu Signed-off-by: Ingo Molnar <mingo@kernel.org>
- Loading branch information
Vince Weaver
authored and
Ingo Molnar
committed
Oct 4, 2012
1 parent
b3eda8d
commit e717bf4
Showing
6 changed files
with
262 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
/* Driver for Intel Xeon Phi "Knights Corner" PMU */ | ||
|
||
#include <linux/perf_event.h> | ||
#include <linux/types.h> | ||
|
||
#include "perf_event.h" | ||
|
||
static const u64 knc_perfmon_event_map[] = | ||
{ | ||
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a, | ||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, | ||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, | ||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029, | ||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, | ||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, | ||
}; | ||
|
||
static __initconst u64 knc_hw_cache_event_ids | ||
[PERF_COUNT_HW_CACHE_MAX] | ||
[PERF_COUNT_HW_CACHE_OP_MAX] | ||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
{ | ||
[ C(L1D) ] = { | ||
[ C(OP_READ) ] = { | ||
/* On Xeon Phi event "0" is a valid DATA_READ */ | ||
/* (L1 Data Cache Reads) Instruction. */ | ||
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ | ||
/* bit will always be set in x86_pmu_hw_config(). */ | ||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | ||
/* DATA_READ */ | ||
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | ||
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ | ||
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ | ||
}, | ||
}, | ||
[ C(L1I ) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | ||
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0, | ||
[ C(RESULT_MISS) ] = 0x0, | ||
}, | ||
}, | ||
[ C(LL ) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0, | ||
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ | ||
[ C(RESULT_MISS) ] = 0, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ | ||
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ | ||
}, | ||
}, | ||
[ C(DTLB) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, | ||
/* DATA_READ */ | ||
/* see note on L1 OP_READ */ | ||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ | ||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0, | ||
[ C(RESULT_MISS) ] = 0x0, | ||
}, | ||
}, | ||
[ C(ITLB) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ | ||
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
}, | ||
[ C(BPU ) ] = { | ||
[ C(OP_READ) ] = { | ||
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ | ||
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ | ||
}, | ||
[ C(OP_WRITE) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
[ C(OP_PREFETCH) ] = { | ||
[ C(RESULT_ACCESS) ] = -1, | ||
[ C(RESULT_MISS) ] = -1, | ||
}, | ||
}, | ||
}; | ||
|
||
|
||
static u64 knc_pmu_event_map(int hw_event) | ||
{ | ||
return knc_perfmon_event_map[hw_event]; | ||
} | ||
|
||
static struct event_constraint knc_event_constraints[] = | ||
{ | ||
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ | ||
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ | ||
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ | ||
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ | ||
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ | ||
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ | ||
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ | ||
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ | ||
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ | ||
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ | ||
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ | ||
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ | ||
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ | ||
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ | ||
EVENT_CONSTRAINT_END | ||
}; | ||
|
||
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d | ||
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e | ||
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f | ||
|
||
#define KNC_ENABLE_COUNTER0 0x00000001 | ||
#define KNC_ENABLE_COUNTER1 0x00000002 | ||
|
||
static void knc_pmu_disable_all(void) | ||
{ | ||
u64 val; | ||
|
||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | ||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
} | ||
|
||
static void knc_pmu_enable_all(int added) | ||
{ | ||
u64 val; | ||
|
||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); | ||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); | ||
} | ||
|
||
static inline void | ||
knc_pmu_disable_event(struct perf_event *event) | ||
{ | ||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
struct hw_perf_event *hwc = &event->hw; | ||
u64 val; | ||
|
||
val = hwc->config; | ||
if (cpuc->enabled) | ||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
|
||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | ||
} | ||
|
||
static void knc_pmu_enable_event(struct perf_event *event) | ||
{ | ||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
struct hw_perf_event *hwc = &event->hw; | ||
u64 val; | ||
|
||
val = hwc->config; | ||
if (cpuc->enabled) | ||
val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
|
||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | ||
} | ||
|
||
PMU_FORMAT_ATTR(event, "config:0-7" ); | ||
PMU_FORMAT_ATTR(umask, "config:8-15" ); | ||
PMU_FORMAT_ATTR(edge, "config:18" ); | ||
PMU_FORMAT_ATTR(inv, "config:23" ); | ||
PMU_FORMAT_ATTR(cmask, "config:24-31" ); | ||
|
||
static struct attribute *intel_knc_formats_attr[] = { | ||
&format_attr_event.attr, | ||
&format_attr_umask.attr, | ||
&format_attr_edge.attr, | ||
&format_attr_inv.attr, | ||
&format_attr_cmask.attr, | ||
NULL, | ||
}; | ||
|
||
static __initconst struct x86_pmu knc_pmu = { | ||
.name = "knc", | ||
.handle_irq = x86_pmu_handle_irq, | ||
.disable_all = knc_pmu_disable_all, | ||
.enable_all = knc_pmu_enable_all, | ||
.enable = knc_pmu_enable_event, | ||
.disable = knc_pmu_disable_event, | ||
.hw_config = x86_pmu_hw_config, | ||
.schedule_events = x86_schedule_events, | ||
.eventsel = MSR_KNC_EVNTSEL0, | ||
.perfctr = MSR_KNC_PERFCTR0, | ||
.event_map = knc_pmu_event_map, | ||
.max_events = ARRAY_SIZE(knc_perfmon_event_map), | ||
.apic = 1, | ||
.max_period = (1ULL << 31) - 1, | ||
.version = 0, | ||
.num_counters = 2, | ||
/* in theory 40 bits, early silicon is buggy though */ | ||
.cntval_bits = 32, | ||
.cntval_mask = (1ULL << 32) - 1, | ||
.get_event_constraints = x86_get_event_constraints, | ||
.event_constraints = knc_event_constraints, | ||
.format_attrs = intel_knc_formats_attr, | ||
}; | ||
|
||
__init int knc_pmu_init(void) | ||
{ | ||
x86_pmu = knc_pmu; | ||
|
||
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, | ||
sizeof(hw_cache_event_ids)); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters