-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf vendor events: Add metrics for Tigerlake
Add JSON metrics for Tigerlake to perf. Based on TMA metrics 4.21 at 01.org. https://download.01.org/perfmon/ Reviewed-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20210719070058.4159-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
- Loading branch information
Jin Yao
authored and
Arnaldo Carvalho de Melo
committed
Aug 2, 2021
1 parent
4babba5
commit b9efd75
Showing
1 changed file
with
231 additions
and
0 deletions.
There are no files selected for viewing
231 changes: 231 additions & 0 deletions
231
tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
[ | ||
{ | ||
"BriefDescription": "Instructions Per Cycle (per Logical Processor)", | ||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", | ||
"MetricGroup": "Summary", | ||
"MetricName": "IPC" | ||
}, | ||
{ | ||
"BriefDescription": "Instruction per taken branch", | ||
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", | ||
"MetricGroup": "Branches;FetchBW;PGO", | ||
"MetricName": "IpTB" | ||
}, | ||
{ | ||
"BriefDescription": "Cycles Per Instruction (per Logical Processor)", | ||
"MetricExpr": "1 / IPC", | ||
"MetricGroup": "Pipeline", | ||
"MetricName": "CPI" | ||
}, | ||
{ | ||
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", | ||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD", | ||
"MetricGroup": "Pipeline", | ||
"MetricName": "CLKS" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions Per Cycle (per physical core)", | ||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.DISTRIBUTED", | ||
"MetricGroup": "SMT;TmaL1", | ||
"MetricName": "CoreIPC" | ||
}, | ||
{ | ||
"BriefDescription": "Floating Point Operations Per Cycle", | ||
"MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.DISTRIBUTED", | ||
"MetricGroup": "Flops", | ||
"MetricName": "FLOPc" | ||
}, | ||
{ | ||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", | ||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", | ||
"MetricGroup": "Pipeline;PortsUtil", | ||
"MetricName": "ILP" | ||
}, | ||
{ | ||
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", | ||
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", | ||
"MetricGroup": "BrMispredicts", | ||
"MetricName": "IpMispredict" | ||
}, | ||
{ | ||
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", | ||
"MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED", | ||
"MetricGroup": "SMT", | ||
"MetricName": "CORE_CLKS" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", | ||
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", | ||
"MetricGroup": "InsType", | ||
"MetricName": "IpLoad" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", | ||
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", | ||
"MetricGroup": "InsType", | ||
"MetricName": "IpStore" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", | ||
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", | ||
"MetricGroup": "Branches;InsType", | ||
"MetricName": "IpBranch" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", | ||
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", | ||
"MetricGroup": "Branches", | ||
"MetricName": "IpCall" | ||
}, | ||
{ | ||
"BriefDescription": "Branch instructions per taken branch. ", | ||
"MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", | ||
"MetricGroup": "Branches;PGO", | ||
"MetricName": "BpTkBranch" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", | ||
"MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", | ||
"MetricGroup": "Flops;FpArith;InsType", | ||
"MetricName": "IpFLOP" | ||
}, | ||
{ | ||
"BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST", | ||
"MetricExpr": "INST_RETIRED.ANY", | ||
"MetricGroup": "Summary;TmaL1", | ||
"MetricName": "Instructions" | ||
}, | ||
{ | ||
"BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", | ||
"MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", | ||
"MetricGroup": "LSD", | ||
"MetricName": "LSD_Coverage" | ||
}, | ||
{ | ||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", | ||
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", | ||
"MetricGroup": "DSB;FetchBW", | ||
"MetricName": "DSB_Coverage" | ||
}, | ||
{ | ||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", | ||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", | ||
"MetricGroup": "MemoryBound;MemoryLat", | ||
"MetricName": "Load_Miss_Real_Latency" | ||
}, | ||
{ | ||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", | ||
"MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", | ||
"MetricGroup": "MemoryBound;MemoryBW", | ||
"MetricName": "MLP" | ||
}, | ||
{ | ||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", | ||
"MetricConstraint": "NO_NMI_WATCHDOG", | ||
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING ) / ( 2 * CORE_CLKS )", | ||
"MetricGroup": "MemoryTLB", | ||
"MetricName": "Page_Walks_Utilization" | ||
}, | ||
{ | ||
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", | ||
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", | ||
"MetricGroup": "MemoryBW", | ||
"MetricName": "L1D_Cache_Fill_BW" | ||
}, | ||
{ | ||
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", | ||
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", | ||
"MetricGroup": "MemoryBW", | ||
"MetricName": "L2_Cache_Fill_BW" | ||
}, | ||
{ | ||
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", | ||
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", | ||
"MetricGroup": "MemoryBW;Offcore", | ||
"MetricName": "L3_Cache_Access_BW" | ||
}, | ||
{ | ||
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", | ||
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", | ||
"MetricGroup": "CacheMisses", | ||
"MetricName": "L1MPKI" | ||
}, | ||
{ | ||
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", | ||
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", | ||
"MetricGroup": "CacheMisses", | ||
"MetricName": "L2MPKI" | ||
}, | ||
{ | ||
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", | ||
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", | ||
"MetricGroup": "CacheMisses", | ||
"MetricName": "L3MPKI" | ||
}, | ||
{ | ||
"BriefDescription": "Average CPU Utilization", | ||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", | ||
"MetricGroup": "HPC;Summary", | ||
"MetricName": "CPU_Utilization" | ||
}, | ||
{ | ||
"BriefDescription": "Measured Average Frequency for unhalted processors [GHz]", | ||
"MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time", | ||
"MetricGroup": "Summary;Power", | ||
"MetricName": "Average_Frequency" | ||
}, | ||
{ | ||
"BriefDescription": "Giga Floating Point Operations Per Second", | ||
"MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", | ||
"MetricGroup": "Flops;HPC", | ||
"MetricName": "GFLOPs" | ||
}, | ||
{ | ||
"BriefDescription": "Average Frequency Utilization relative nominal frequency", | ||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", | ||
"MetricGroup": "Power", | ||
"MetricName": "Turbo_Utilization" | ||
}, | ||
{ | ||
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", | ||
"MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED", | ||
"MetricGroup": "SMT", | ||
"MetricName": "SMT_2T_Utilization" | ||
}, | ||
{ | ||
"BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", | ||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD", | ||
"MetricGroup": "OS", | ||
"MetricName": "Kernel_Utilization" | ||
}, | ||
{ | ||
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", | ||
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", | ||
"MetricGroup": "Branches;OS", | ||
"MetricName": "IpFarBranch" | ||
}, | ||
{ | ||
"BriefDescription": "C6 residency percent per core", | ||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", | ||
"MetricGroup": "Power", | ||
"MetricName": "C6_Core_Residency" | ||
}, | ||
{ | ||
"BriefDescription": "C7 residency percent per core", | ||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", | ||
"MetricGroup": "Power", | ||
"MetricName": "C7_Core_Residency" | ||
}, | ||
{ | ||
"BriefDescription": "C6 residency percent per package", | ||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", | ||
"MetricGroup": "Power", | ||
"MetricName": "C6_Pkg_Residency" | ||
}, | ||
{ | ||
"BriefDescription": "C7 residency percent per package", | ||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", | ||
"MetricGroup": "Power", | ||
"MetricName": "C7_Pkg_Residency" | ||
} | ||
] |