perf vendor events: Update Intel ivybridge
Update to v22, the metrics are based on TMA 4.4 full. Use script at: https://github.com/intel/event-converter-for-linux-perf/blob/master/download_and_gen.py to download and generate the latest events and metrics. Manually copy the ivybridge files into perf and update mapfile.csv. Tested on a non-ivybridge with 'perf test': 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Ok 10.4: Parsing of PMU event table metrics with fake PMUs : Ok Signed-off-by: Ian Rogers <irogers@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexandre Torgue <alexandre.torgue@foss.st.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Caleb Biggers <caleb.biggers@intel.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.garry@huawei.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Perry Taylor <perry.taylor@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sedat Dilek <sedat.dilek@gmail.com> Cc: Stephane Eranian <eranian@google.com> Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com> Link: http://lore.kernel.org/lkml/20220727220832.2865794-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
d214d0c261
commit
80c14459f6
@ -130,17 +130,11 @@
|
|||||||
"MetricName": "FLOPc_SMT"
|
"MetricName": "FLOPc_SMT"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
|
||||||
"MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
|
"MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
|
||||||
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
|
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
|
||||||
"MetricName": "ILP"
|
"MetricName": "ILP"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
|
|
||||||
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
|
|
||||||
"MetricGroup": "Bad;BadSpec;BrMispredicts",
|
|
||||||
"MetricName": "IpMispredict"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
|
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
|
||||||
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
|
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
|
||||||
@ -196,6 +190,18 @@
|
|||||||
"MetricGroup": "Summary;TmaL1",
|
"MetricGroup": "Summary;TmaL1",
|
||||||
"MetricName": "Instructions"
|
"MetricName": "Instructions"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
|
||||||
|
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
|
||||||
|
"MetricGroup": "Pipeline;Ret",
|
||||||
|
"MetricName": "Retire"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "",
|
||||||
|
"MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
|
||||||
|
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
|
||||||
|
"MetricName": "Execute"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
|
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
|
||||||
"MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
|
"MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
|
||||||
@ -203,11 +209,16 @@
|
|||||||
"MetricName": "DSB_Coverage"
|
"MetricName": "DSB_Coverage"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
|
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
|
||||||
|
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
|
||||||
|
"MetricGroup": "Bad;BadSpec;BrMispredicts",
|
||||||
|
"MetricName": "IpMispredict"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
|
||||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||||
"MetricGroup": "Mem;MemoryBound;MemoryLat",
|
"MetricGroup": "Mem;MemoryBound;MemoryLat",
|
||||||
"MetricName": "Load_Miss_Real_Latency",
|
"MetricName": "Load_Miss_Real_Latency"
|
||||||
"PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
|
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
|
||||||
@ -215,24 +226,6 @@
|
|||||||
"MetricGroup": "Mem;MemoryBound;MemoryBW",
|
"MetricGroup": "Mem;MemoryBound;MemoryBW",
|
||||||
"MetricName": "MLP"
|
"MetricName": "MLP"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
|
|
||||||
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
|
|
||||||
"MetricGroup": "Mem;MemoryBW",
|
|
||||||
"MetricName": "L1D_Cache_Fill_BW"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
|
|
||||||
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
|
|
||||||
"MetricGroup": "Mem;MemoryBW",
|
|
||||||
"MetricName": "L2_Cache_Fill_BW"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
|
|
||||||
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
|
|
||||||
"MetricGroup": "Mem;MemoryBW",
|
|
||||||
"MetricName": "L3_Cache_Fill_BW"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
|
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
|
||||||
"MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
|
"MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
|
||||||
@ -264,6 +257,48 @@
|
|||||||
"MetricGroup": "Mem;MemoryTLB_SMT",
|
"MetricGroup": "Mem;MemoryTLB_SMT",
|
||||||
"MetricName": "Page_Walks_Utilization_SMT"
|
"MetricName": "Page_Walks_Utilization_SMT"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
|
||||||
|
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
|
||||||
|
"MetricGroup": "Mem;MemoryBW",
|
||||||
|
"MetricName": "L1D_Cache_Fill_BW"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
|
||||||
|
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
|
||||||
|
"MetricGroup": "Mem;MemoryBW",
|
||||||
|
"MetricName": "L2_Cache_Fill_BW"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
|
||||||
|
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
|
||||||
|
"MetricGroup": "Mem;MemoryBW",
|
||||||
|
"MetricName": "L3_Cache_Fill_BW"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
|
||||||
|
"MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
|
||||||
|
"MetricGroup": "Mem;MemoryBW",
|
||||||
|
"MetricName": "L1D_Cache_Fill_BW_1T"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
|
||||||
|
"MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
|
||||||
|
"MetricGroup": "Mem;MemoryBW",
|
||||||
|
"MetricName": "L2_Cache_Fill_BW_1T"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
|
||||||
|
"MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
|
||||||
|
"MetricGroup": "Mem;MemoryBW",
|
||||||
|
"MetricName": "L3_Cache_Fill_BW_1T"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
|
||||||
|
"MetricExpr": "0",
|
||||||
|
"MetricGroup": "Mem;MemoryBW;Offcore",
|
||||||
|
"MetricName": "L3_Cache_Access_BW_1T"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Average CPU Utilization",
|
"BriefDescription": "Average CPU Utilization",
|
||||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||||
@ -280,7 +315,8 @@
|
|||||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||||
"MetricExpr": "( ( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE ) / 1000000000 ) / duration_time",
|
"MetricExpr": "( ( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE ) / 1000000000 ) / duration_time",
|
||||||
"MetricGroup": "Cor;Flops;HPC",
|
"MetricGroup": "Cor;Flops;HPC",
|
||||||
"MetricName": "GFLOPs"
|
"MetricName": "GFLOPs",
|
||||||
|
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||||
|
@ -676,7 +676,7 @@
|
|||||||
"UMask": "0x3"
|
"UMask": "0x3"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
|
"BriefDescription": "Number of occurrences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
|
||||||
"Counter": "0,1,2,3",
|
"Counter": "0,1,2,3",
|
||||||
"CounterHTOff": "0,1,2,3,4,5,6,7",
|
"CounterHTOff": "0,1,2,3,4,5,6,7",
|
||||||
"CounterMask": "1",
|
"CounterMask": "1",
|
||||||
|
@ -82,10 +82,10 @@
|
|||||||
{
|
{
|
||||||
"BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
|
"BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
|
||||||
"Counter": "Fixed",
|
"Counter": "Fixed",
|
||||||
|
"EventCode": "0xff",
|
||||||
"EventName": "UNC_CLOCK.SOCKET",
|
"EventName": "UNC_CLOCK.SOCKET",
|
||||||
"PerPkg": "1",
|
"PerPkg": "1",
|
||||||
"PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
|
"PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
|
||||||
"UMask": "0x01",
|
|
||||||
"Unit": "ARB"
|
"Unit": "ARB"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -12,7 +12,7 @@ GenuineIntel-6-(3C|45|46),v31,haswell,core
|
|||||||
GenuineIntel-6-3F,v25,haswellx,core
|
GenuineIntel-6-3F,v25,haswellx,core
|
||||||
GenuineIntel-6-(7D|7E|A7),v1.14,icelake,core
|
GenuineIntel-6-(7D|7E|A7),v1.14,icelake,core
|
||||||
GenuineIntel-6-6[AC],v1.15,icelakex,core
|
GenuineIntel-6-6[AC],v1.15,icelakex,core
|
||||||
GenuineIntel-6-3A,v18,ivybridge,core
|
GenuineIntel-6-3A,v22,ivybridge,core
|
||||||
GenuineIntel-6-3E,v19,ivytown,core
|
GenuineIntel-6-3E,v19,ivytown,core
|
||||||
GenuineIntel-6-2D,v20,jaketown,core
|
GenuineIntel-6-2D,v20,jaketown,core
|
||||||
GenuineIntel-6-57,v9,knightslanding,core
|
GenuineIntel-6-57,v9,knightslanding,core
|
||||||
|
|
Loading…
Reference in New Issue
Block a user