The performance event updates for v5.12 are:
- Add CPU-PMU support for Intel Sapphire Rapids CPUs - Extend the perf ABI with PERF_SAMPLE_WEIGHT_STRUCT, to offer two-parameter sampling event feedback. Not used yet, but is intended for Golden Cove CPU-PMU, which can provide both the instruction latency and the cache latency information for memory profiling events. - Remove experimental, default-disabled perfmon-v4 counter_freezing support that could only be enabled via a boot option. The hardware is hopelessly broken, we'd like to make sure nobody starts relying on this, as it would only end in tears. - Fix energy/power events on Intel SPR platforms - Simplify the uprobes resume_execution() logic - Misc smaller fixes. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmAtf7kRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iJ2xAAvygKF8hm/UAGyT2R3iEruO49wRrmUfgt 13iBBA1DotKw2b8F5UN5MqjfwS8UgGKuAd8agvQ6XXANpnJ5mpy0nrzgjEXUx4j+ sQUqL7vxSdZ5J3kKblSZ4QoMzLVYSUkEDmw818vsa4eFWN8z58FJsv+ySegIFbXx +I3hF1O9a8MERZBUz4T5xHlgcbSDGEX6EvYRcO+zZ0rXfARfo9StfHYv1V53j6iY EOotFEKEn/5naczAd/sQo1SE1IgHtX2cbjOaKF7LulgEwZQWHpdKq0gww6nFK5yz XMSE9oXAFXRkRCJbrSqC0Dvrrf8hdlxWbKYbj9L7XILoxw199AdOBDbliJm6P/UH 6+JSEu/N4R0TFYc7TX6yef7ncw12e+64USjKOlWWwww97rVWWH1/tFTdlXhS6s+d jVI3yEECKyZlddrDdsetRdUj+QKyZQfDqbMXPXiDTv9P6AFqBvNLZYT0UPU3akk5 jXueHJQYSSgqnN+eRaIwvm4ZYWa031YHJXxiq2E89RnzL4JJArBYaddpukgxTYka c6Tn8L7f4zP5Bghu7hHv5Vy69i1N/3YvzUoYc6ljjmapgAJzxzq/yoEKrBlKnjtA MrstHhnwnPJl+PKjlbLpjl74rtcCiKJxjVhm+a5UbEcYoVuzJ86lmQK2WrLaoCTU B/zFplUF8C4= =BCcg -----END PGP SIGNATURE----- Merge tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull performance event updates from Ingo Molnar: - Add CPU-PMU support for Intel Sapphire Rapids CPUs - Extend the perf ABI with PERF_SAMPLE_WEIGHT_STRUCT, to offer two-parameter sampling event feedback. Not used yet, but is intended for Golden Cove CPU-PMU, which can provide both the instruction latency and the cache latency information for memory profiling events. - Remove experimental, default-disabled perfmon-v4 counter_freezing support that could only be enabled via a boot option. The hardware is hopelessly broken, we'd like to make sure nobody starts relying on this, as it would only end in tears. - Fix energy/power events on Intel SPR platforms - Simplify the uprobes resume_execution() logic - Misc smaller fixes. * tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/rapl: Fix psys-energy event on Intel SPR platform perf/x86/rapl: Only check lower 32bits for RAPL energy counters perf/x86/rapl: Add msr mask support perf/x86/kvm: Add Cascade Lake Xeon steppings to isolation_ucodes[] perf/x86/intel: Support CPUID 10.ECX to disable fixed counters perf/x86/intel: Add perf core PMU support for Sapphire Rapids perf/x86/intel: Filter unsupported Topdown metrics event perf/x86/intel: Factor out intel_update_topdown_event() perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT perf/intel: Remove Perfmon-v4 counter_freezing support x86/perf: Use static_call for x86_pmu.guest_get_msrs perf/x86/intel/uncore: With > 8 nodes, get pci bus die id from NUMA info perf/x86/intel/uncore: Store the logical die id instead of the physical die id. x86/kprobes: Do not decode opcode in resume_execution()
This commit is contained in:
commit
d310ec03a3
@ -945,12 +945,6 @@
|
||||
causing system reset or hang due to sending
|
||||
INIT from AP to BSP.
|
||||
|
||||
perf_v4_pmi= [X86,INTEL]
|
||||
Format: <bool>
|
||||
Disable Intel PMU counter freezing feature.
|
||||
The feature only exists starting from
|
||||
Arch Perfmon v4 (Skylake and newer).
|
||||
|
||||
disable_ddw [PPC/PSERIES]
|
||||
Disable Dynamic DMA Window support. Use this
|
||||
to workaround buggy firmware.
|
||||
|
@ -2195,7 +2195,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
|
||||
ppmu->get_mem_weight)
|
||||
ppmu->get_mem_weight(&data.weight);
|
||||
ppmu->get_mem_weight(&data.weight.full);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
power_pmu_stop(event, 0);
|
||||
|
@ -81,6 +81,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
|
||||
|
||||
u64 __read_mostly hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
@ -253,6 +255,8 @@ static bool check_hw_exists(void)
|
||||
if (ret)
|
||||
goto msr_fail;
|
||||
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
|
||||
if (fixed_counter_disabled(i))
|
||||
continue;
|
||||
if (val & (0x03 << i*4)) {
|
||||
bios_fail = 1;
|
||||
val_fail = val;
|
||||
@ -665,6 +669,12 @@ void x86_pmu_disable_all(void)
|
||||
}
|
||||
}
|
||||
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||
{
|
||||
return static_call(x86_pmu_guest_get_msrs)(nr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
|
||||
|
||||
/*
|
||||
* There may be PMI landing after enabled=0. The PMI hitting could be before or
|
||||
* after disable_all.
|
||||
@ -1523,6 +1533,8 @@ void perf_event_print_debug(void)
|
||||
cpu, idx, prev_left);
|
||||
}
|
||||
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
|
||||
if (fixed_counter_disabled(idx))
|
||||
continue;
|
||||
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
|
||||
|
||||
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
|
||||
@ -1923,6 +1935,8 @@ static void x86_pmu_static_call_update(void)
|
||||
|
||||
static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
|
||||
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
|
||||
|
||||
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
|
||||
}
|
||||
|
||||
static void _x86_pmu_read(struct perf_event *event)
|
||||
@ -1930,6 +1944,13 @@ static void _x86_pmu_read(struct perf_event *event)
|
||||
x86_perf_event_update(event);
|
||||
}
|
||||
|
||||
static inline struct perf_guest_switch_msr *
|
||||
perf_guest_get_msrs_nop(int *nr)
|
||||
{
|
||||
*nr = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int __init init_hw_perf_events(void)
|
||||
{
|
||||
struct x86_pmu_quirk *quirk;
|
||||
@ -1995,12 +2016,17 @@ static int __init init_hw_perf_events(void)
|
||||
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
|
||||
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
|
||||
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
|
||||
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
|
||||
pr_info("... fixed-purpose events: %lu\n",
|
||||
hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1)
|
||||
<< INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl));
|
||||
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
|
||||
|
||||
if (!x86_pmu.read)
|
||||
x86_pmu.read = _x86_pmu_read;
|
||||
|
||||
if (!x86_pmu.guest_get_msrs)
|
||||
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
|
||||
|
||||
x86_pmu_static_call_update();
|
||||
|
||||
/*
|
||||
|
@ -275,6 +275,55 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_spr_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
|
||||
|
||||
INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
|
||||
INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
|
||||
/*
|
||||
* Generally event codes < 0x90 are restricted to counters 0-3.
|
||||
* The 0x2E and 0x3C are exception, which has no restriction.
|
||||
*/
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
|
||||
|
||||
INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
|
||||
INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
|
||||
INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
|
||||
INTEL_EVENT_CONSTRAINT(0xce, 0x1),
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
|
||||
/*
|
||||
* Generally event codes >= 0x90 are likely to have no restrictions.
|
||||
* The exception are defined as above.
|
||||
*/
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),
|
||||
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
||||
@ -314,11 +363,15 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
|
||||
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
|
||||
"4", "2");
|
||||
|
||||
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
|
||||
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
|
||||
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
|
||||
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
|
||||
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
|
||||
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
|
||||
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
|
||||
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
|
||||
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
|
||||
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
|
||||
EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84");
|
||||
EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85");
|
||||
EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86");
|
||||
EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87");
|
||||
|
||||
static struct attribute *snb_events_attrs[] = {
|
||||
EVENT_PTR(td_slots_issued),
|
||||
@ -384,6 +437,108 @@ static u64 intel_pmu_event_map(int hw_event)
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static __initconst const u64 spr_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x81d0,
|
||||
[ C(RESULT_MISS) ] = 0xe124,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x82d0,
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_MISS) ] = 0xe424,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x12a,
|
||||
[ C(RESULT_MISS) ] = 0x12a,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x12a,
|
||||
[ C(RESULT_MISS) ] = 0x12a,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x81d0,
|
||||
[ C(RESULT_MISS) ] = 0xe12,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x82d0,
|
||||
[ C(RESULT_MISS) ] = 0xe13,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = 0xe11,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x4c4,
|
||||
[ C(RESULT_MISS) ] = 0x4c5,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x12a,
|
||||
[ C(RESULT_MISS) ] = 0x12a,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static __initconst const u64 spr_hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10001,
|
||||
[ C(RESULT_MISS) ] = 0x3fbfc00001,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
|
||||
[ C(RESULT_MISS) ] = 0x3f3fc00002,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10c000001,
|
||||
[ C(RESULT_MISS) ] = 0x3fb3000001,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Notes on the events:
|
||||
* - data reads do not include code reads (comparable to earlier tables)
|
||||
@ -2134,18 +2289,6 @@ static void intel_tfa_pmu_enable_all(int added)
|
||||
intel_pmu_enable_all(added);
|
||||
}
|
||||
|
||||
static void enable_counter_freeze(void)
|
||||
{
|
||||
update_debugctlmsr(get_debugctlmsr() |
|
||||
DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
|
||||
}
|
||||
|
||||
static void disable_counter_freeze(void)
|
||||
{
|
||||
update_debugctlmsr(get_debugctlmsr() &
|
||||
~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
|
||||
}
|
||||
|
||||
static inline u64 intel_pmu_get_status(void)
|
||||
{
|
||||
u64 status;
|
||||
@ -2337,8 +2480,8 @@ static void __icl_update_topdown_event(struct perf_event *event,
|
||||
}
|
||||
}
|
||||
|
||||
static void update_saved_topdown_regs(struct perf_event *event,
|
||||
u64 slots, u64 metrics)
|
||||
static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
|
||||
u64 metrics, int metric_end)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_event *other;
|
||||
@ -2347,7 +2490,7 @@ static void update_saved_topdown_regs(struct perf_event *event,
|
||||
event->hw.saved_slots = slots;
|
||||
event->hw.saved_metric = metrics;
|
||||
|
||||
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
|
||||
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
|
||||
if (!is_topdown_idx(idx))
|
||||
continue;
|
||||
other = cpuc->events[idx];
|
||||
@ -2362,7 +2505,8 @@ static void update_saved_topdown_regs(struct perf_event *event,
|
||||
* The PERF_METRICS and Fixed counter 3 are read separately. The values may be
|
||||
* modify by a NMI. PMU has to be disabled before calling this function.
|
||||
*/
|
||||
static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
|
||||
static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_event *other;
|
||||
@ -2378,7 +2522,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
/* read PERF_METRICS */
|
||||
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
|
||||
|
||||
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
|
||||
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
|
||||
if (!is_topdown_idx(idx))
|
||||
continue;
|
||||
other = cpuc->events[idx];
|
||||
@ -2404,7 +2548,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
* Don't need to reset the PERF_METRICS and Fixed counter 3.
|
||||
* Because the values will be restored in next schedule in.
|
||||
*/
|
||||
update_saved_topdown_regs(event, slots, metrics);
|
||||
update_saved_topdown_regs(event, slots, metrics, metric_end);
|
||||
reset = false;
|
||||
}
|
||||
|
||||
@ -2413,12 +2557,18 @@ static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
|
||||
wrmsrl(MSR_PERF_METRICS, 0);
|
||||
if (event)
|
||||
update_saved_topdown_regs(event, 0, 0);
|
||||
update_saved_topdown_regs(event, 0, 0, metric_end);
|
||||
}
|
||||
|
||||
return slots;
|
||||
}
|
||||
|
||||
static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
{
|
||||
return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
|
||||
x86_pmu.num_topdown_events - 1);
|
||||
}
|
||||
|
||||
static void intel_pmu_read_topdown_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
@ -2573,8 +2723,11 @@ static void intel_pmu_reset(void)
|
||||
wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
|
||||
wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
|
||||
}
|
||||
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
|
||||
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
|
||||
if (fixed_counter_disabled(idx))
|
||||
continue;
|
||||
wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
|
||||
}
|
||||
|
||||
if (ds)
|
||||
ds->bts_index = ds->bts_buffer_base;
|
||||
@ -2709,95 +2862,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
return handled;
|
||||
}
|
||||
|
||||
static bool disable_counter_freezing = true;
|
||||
static int __init intel_perf_counter_freezing_setup(char *s)
|
||||
{
|
||||
bool res;
|
||||
|
||||
if (kstrtobool(s, &res))
|
||||
return -EINVAL;
|
||||
|
||||
disable_counter_freezing = !res;
|
||||
return 1;
|
||||
}
|
||||
__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
|
||||
|
||||
/*
|
||||
* Simplified handler for Arch Perfmon v4:
|
||||
* - We rely on counter freezing/unfreezing to enable/disable the PMU.
|
||||
* This is done automatically on PMU ack.
|
||||
* - Ack the PMU only after the APIC.
|
||||
*/
|
||||
|
||||
static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int handled = 0;
|
||||
bool bts = false;
|
||||
u64 status;
|
||||
int pmu_enabled = cpuc->enabled;
|
||||
int loops = 0;
|
||||
|
||||
/* PMU has been disabled because of counter freezing */
|
||||
cpuc->enabled = 0;
|
||||
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
|
||||
bts = true;
|
||||
intel_bts_disable_local();
|
||||
handled = intel_pmu_drain_bts_buffer();
|
||||
handled += intel_bts_interrupt();
|
||||
}
|
||||
status = intel_pmu_get_status();
|
||||
if (!status)
|
||||
goto done;
|
||||
again:
|
||||
intel_pmu_lbr_read();
|
||||
if (++loops > 100) {
|
||||
static bool warned;
|
||||
|
||||
if (!warned) {
|
||||
WARN(1, "perfevents: irq loop stuck!\n");
|
||||
perf_event_print_debug();
|
||||
warned = true;
|
||||
}
|
||||
intel_pmu_reset();
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
||||
handled += handle_pmi_common(regs, status);
|
||||
done:
|
||||
/* Ack the PMI in the APIC */
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
|
||||
/*
|
||||
* The counters start counting immediately while ack the status.
|
||||
* Make it as close as possible to IRET. This avoids bogus
|
||||
* freezing on Skylake CPUs.
|
||||
*/
|
||||
if (status) {
|
||||
intel_pmu_ack_status(status);
|
||||
} else {
|
||||
/*
|
||||
* CPU may issues two PMIs very close to each other.
|
||||
* When the PMI handler services the first one, the
|
||||
* GLOBAL_STATUS is already updated to reflect both.
|
||||
* When it IRETs, the second PMI is immediately
|
||||
* handled and it sees clear status. At the meantime,
|
||||
* there may be a third PMI, because the freezing bit
|
||||
* isn't set since the ack in first PMI handlers.
|
||||
* Double check if there is more work to be done.
|
||||
*/
|
||||
status = intel_pmu_get_status();
|
||||
if (status)
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (bts)
|
||||
intel_bts_enable_local();
|
||||
cpuc->enabled = pmu_enabled;
|
||||
return handled;
|
||||
}
|
||||
|
||||
/*
|
||||
* This handler is triggered by the local APIC, so the APIC IRQ handling
|
||||
* rules apply:
|
||||
@ -3563,6 +3627,26 @@ static int core_pmu_hw_config(struct perf_event *event)
|
||||
return intel_pmu_bts_config(event);
|
||||
}
|
||||
|
||||
#define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \
|
||||
((x86_pmu.num_topdown_events - 1) << 8))
|
||||
|
||||
static bool is_available_metric_event(struct perf_event *event)
|
||||
{
|
||||
return is_metric_event(event) &&
|
||||
event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
|
||||
}
|
||||
|
||||
static inline bool is_mem_loads_event(struct perf_event *event)
|
||||
{
|
||||
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
|
||||
}
|
||||
|
||||
static inline bool is_mem_loads_aux_event(struct perf_event *event)
|
||||
{
|
||||
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
|
||||
}
|
||||
|
||||
|
||||
static int intel_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
@ -3636,7 +3720,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
if (event->attr.config & X86_ALL_EVENT_FLAGS)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_metric_event(event)) {
|
||||
if (is_available_metric_event(event)) {
|
||||
struct perf_event *leader = event->group_leader;
|
||||
|
||||
/* The metric events don't support sampling. */
|
||||
@ -3665,6 +3749,33 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
|
||||
* doesn't function quite right. As a work-around it needs to always be
|
||||
* co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
|
||||
* The actual count of this second event is irrelevant it just needs
|
||||
* to be active to make the first event function correctly.
|
||||
*
|
||||
* In a group, the auxiliary event must be in front of the load latency
|
||||
* event. The rule is to simplify the implementation of the check.
|
||||
* That's because perf cannot have a complete group at the moment.
|
||||
*/
|
||||
if (x86_pmu.flags & PMU_FL_MEM_LOADS_AUX &&
|
||||
(event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
|
||||
is_mem_loads_event(event)) {
|
||||
struct perf_event *leader = event->group_leader;
|
||||
struct perf_event *sibling = NULL;
|
||||
|
||||
if (!is_mem_loads_aux_event(leader)) {
|
||||
for_each_sibling_event(sibling, leader) {
|
||||
if (is_mem_loads_aux_event(sibling))
|
||||
break;
|
||||
}
|
||||
if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
|
||||
return -ENODATA;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
|
||||
return 0;
|
||||
|
||||
@ -3680,26 +3791,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
|
||||
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
|
||||
#endif
|
||||
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||
{
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
|
||||
return intel_guest_get_msrs(nr);
|
||||
else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
|
||||
return core_guest_get_msrs(nr);
|
||||
#endif
|
||||
if (x86_pmu.guest_get_msrs)
|
||||
return x86_pmu.guest_get_msrs(nr);
|
||||
*nr = 0;
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
|
||||
|
||||
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
@ -3864,6 +3955,29 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
return hsw_get_event_constraints(cpuc, idx, event);
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
c = icl_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
/*
|
||||
* The :ppp indicates the Precise Distribution (PDist) facility, which
|
||||
* is only supported on the GP counter 0. If a :ppp event which is not
|
||||
* available on the GP counter 0, error out.
|
||||
*/
|
||||
if (event->attr.precise_ip == 3) {
|
||||
if (c->idxmsk64 & BIT_ULL(0))
|
||||
return &counter0_constraint;
|
||||
|
||||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
@ -3953,6 +4067,14 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left)
|
||||
return max(left, 32ULL);
|
||||
}
|
||||
|
||||
static u64 spr_limit_period(struct perf_event *event, u64 left)
|
||||
{
|
||||
if (event->attr.precise_ip == 3)
|
||||
return max(left, 128ULL);
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
@ -4094,9 +4216,6 @@ static void intel_pmu_cpu_starting(int cpu)
|
||||
if (x86_pmu.version > 1)
|
||||
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
|
||||
|
||||
if (x86_pmu.counter_freezing)
|
||||
enable_counter_freeze();
|
||||
|
||||
/* Disable perf metrics if any added CPU doesn't support it. */
|
||||
if (x86_pmu.intel_cap.perf_metrics) {
|
||||
union perf_capabilities perf_cap;
|
||||
@ -4167,9 +4286,6 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
|
||||
static void intel_pmu_cpu_dying(int cpu)
|
||||
{
|
||||
fini_debug_store_on_cpu(cpu);
|
||||
|
||||
if (x86_pmu.counter_freezing)
|
||||
disable_counter_freeze();
|
||||
}
|
||||
|
||||
void intel_cpuc_finish(struct cpu_hw_events *cpuc)
|
||||
@ -4397,6 +4513,9 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
|
||||
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
|
||||
@ -4561,39 +4680,6 @@ static __init void intel_nehalem_quirk(void)
|
||||
}
|
||||
}
|
||||
|
||||
static const struct x86_cpu_desc counter_freezing_ucodes[] = {
|
||||
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D, 1, 0x00000028),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028),
|
||||
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006),
|
||||
{}
|
||||
};
|
||||
|
||||
static bool intel_counter_freezing_broken(void)
|
||||
{
|
||||
return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
|
||||
}
|
||||
|
||||
static __init void intel_counter_freezing_quirk(void)
|
||||
{
|
||||
/* Check if it's already disabled */
|
||||
if (disable_counter_freezing)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the system starts with the wrong ucode, leave the
|
||||
* counter-freezing feature permanently disabled.
|
||||
*/
|
||||
if (intel_counter_freezing_broken()) {
|
||||
pr_info("PMU counter freezing disabled due to CPU errata,"
|
||||
"please upgrade microcode\n");
|
||||
x86_pmu.counter_freezing = false;
|
||||
x86_pmu.handle_irq = intel_pmu_handle_irq;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* enable software workaround for errata:
|
||||
* SNB: BJ122
|
||||
@ -4703,6 +4789,42 @@ static struct attribute *icl_tsx_events_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2");
|
||||
EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82");
|
||||
|
||||
static struct attribute *spr_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_hsw),
|
||||
EVENT_PTR(mem_st_spr),
|
||||
EVENT_PTR(mem_ld_aux),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *spr_td_events_attrs[] = {
|
||||
EVENT_PTR(slots),
|
||||
EVENT_PTR(td_retiring),
|
||||
EVENT_PTR(td_bad_spec),
|
||||
EVENT_PTR(td_fe_bound),
|
||||
EVENT_PTR(td_be_bound),
|
||||
EVENT_PTR(td_heavy_ops),
|
||||
EVENT_PTR(td_br_mispredict),
|
||||
EVENT_PTR(td_fetch_lat),
|
||||
EVENT_PTR(td_mem_bound),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *spr_tsx_events_attrs[] = {
|
||||
EVENT_PTR(tx_start),
|
||||
EVENT_PTR(tx_abort),
|
||||
EVENT_PTR(tx_commit),
|
||||
EVENT_PTR(tx_capacity_read),
|
||||
EVENT_PTR(tx_capacity_write),
|
||||
EVENT_PTR(tx_conflict),
|
||||
EVENT_PTR(cycles_t),
|
||||
EVENT_PTR(cycles_ct),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static ssize_t freeze_on_smi_show(struct device *cdev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
@ -4926,7 +5048,7 @@ __init int intel_pmu_init(void)
|
||||
union cpuid10_eax eax;
|
||||
union cpuid10_ebx ebx;
|
||||
struct event_constraint *c;
|
||||
unsigned int unused;
|
||||
unsigned int fixed_mask;
|
||||
struct extra_reg *er;
|
||||
bool pmem = false;
|
||||
int version, i;
|
||||
@ -4948,7 +5070,7 @@ __init int intel_pmu_init(void)
|
||||
* Check whether the Architectural PerfMon supports
|
||||
* Branch Misses Retired hw_event or not.
|
||||
*/
|
||||
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
|
||||
cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
|
||||
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
|
||||
return -ENODEV;
|
||||
|
||||
@ -4972,15 +5094,15 @@ __init int intel_pmu_init(void)
|
||||
* Quirk: v2 perfmon does not report fixed-purpose events, so
|
||||
* assume at least 3 events, when not running in a hypervisor:
|
||||
*/
|
||||
if (version > 1) {
|
||||
if (version > 1 && version < 5) {
|
||||
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
|
||||
|
||||
x86_pmu.num_counters_fixed =
|
||||
max((int)edx.split.num_counters_fixed, assume);
|
||||
}
|
||||
|
||||
if (version >= 4)
|
||||
x86_pmu.counter_freezing = !disable_counter_freezing;
|
||||
fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
|
||||
} else if (version >= 5)
|
||||
x86_pmu.num_counters_fixed = fls(fixed_mask);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PDCM)) {
|
||||
u64 capabilities;
|
||||
@ -5109,7 +5231,6 @@ __init int intel_pmu_init(void)
|
||||
|
||||
case INTEL_FAM6_ATOM_GOLDMONT:
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_D:
|
||||
x86_add_quirk(intel_counter_freezing_quirk);
|
||||
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
|
||||
@ -5136,7 +5257,6 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
|
||||
x86_add_quirk(intel_counter_freezing_quirk);
|
||||
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
|
||||
@ -5483,12 +5603,50 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_skl(pmem);
|
||||
x86_pmu.num_topdown_events = 4;
|
||||
x86_pmu.update_topdown_event = icl_update_topdown_event;
|
||||
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
|
||||
pr_cont("Icelake events, ");
|
||||
name = "icelake";
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_SAPPHIRERAPIDS_X:
|
||||
pmem = true;
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||
|
||||
x86_pmu.event_constraints = intel_spr_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_spr_extra_regs;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
|
||||
x86_pmu.hw_config = hsw_hw_config;
|
||||
x86_pmu.get_event_constraints = spr_get_event_constraints;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
hsw_format_attr : nhm_format_attr;
|
||||
extra_skl_attr = skl_format_attr;
|
||||
mem_attr = spr_events_attrs;
|
||||
td_attr = spr_td_events_attrs;
|
||||
tsx_attr = spr_tsx_events_attrs;
|
||||
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_skl(pmem);
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
x86_pmu.update_topdown_event = icl_update_topdown_event;
|
||||
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
|
||||
pr_cont("Sapphire Rapids events, ");
|
||||
name = "sapphire_rapids";
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
case 1:
|
||||
@ -5531,8 +5689,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
|
||||
}
|
||||
|
||||
x86_pmu.intel_ctrl |=
|
||||
((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
|
||||
x86_pmu.intel_ctrl |= (u64)fixed_mask << INTEL_PMC_IDX_FIXED;
|
||||
|
||||
/* AnyThread may be deprecated on arch perfmon v5 or later */
|
||||
if (x86_pmu.intel_cap.anythread_deprecated)
|
||||
@ -5549,13 +5706,22 @@ __init int intel_pmu_init(void)
|
||||
* events to the generic counters.
|
||||
*/
|
||||
if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
|
||||
/*
|
||||
* Disable topdown slots and metrics events,
|
||||
* if slots event is not in CPUID.
|
||||
*/
|
||||
if (!(INTEL_PMC_MSK_FIXED_SLOTS & x86_pmu.intel_ctrl))
|
||||
c->idxmsk64 = 0;
|
||||
c->weight = hweight64(c->idxmsk64);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c->cmask == FIXED_EVENT_FLAGS
|
||||
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
|
||||
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
||||
if (c->cmask == FIXED_EVENT_FLAGS) {
|
||||
/* Disabled fixed counters which are not in CPUID */
|
||||
c->idxmsk64 &= x86_pmu.intel_ctrl;
|
||||
|
||||
if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
|
||||
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
||||
}
|
||||
c->idxmsk64 &=
|
||||
~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
|
||||
@ -5601,13 +5767,6 @@ __init int intel_pmu_init(void)
|
||||
pr_cont("full-width counters, ");
|
||||
}
|
||||
|
||||
/*
|
||||
* For arch perfmon 4 use counter freezing to avoid
|
||||
* several MSR accesses in the PMI.
|
||||
*/
|
||||
if (x86_pmu.counter_freezing)
|
||||
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
|
||||
|
||||
if (x86_pmu.intel_cap.perf_metrics)
|
||||
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
|
||||
|
@ -36,7 +36,9 @@ union intel_x86_pebs_dse {
|
||||
unsigned int ld_dse:4;
|
||||
unsigned int ld_stlb_miss:1;
|
||||
unsigned int ld_locked:1;
|
||||
unsigned int ld_reserved:26;
|
||||
unsigned int ld_data_blk:1;
|
||||
unsigned int ld_addr_blk:1;
|
||||
unsigned int ld_reserved:24;
|
||||
};
|
||||
struct {
|
||||
unsigned int st_l1d_hit:1;
|
||||
@ -45,6 +47,12 @@ union intel_x86_pebs_dse {
|
||||
unsigned int st_locked:1;
|
||||
unsigned int st_reserved2:26;
|
||||
};
|
||||
struct {
|
||||
unsigned int st_lat_dse:4;
|
||||
unsigned int st_lat_stlb_miss:1;
|
||||
unsigned int st_lat_locked:1;
|
||||
unsigned int ld_reserved3:26;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -198,6 +206,63 @@ static u64 load_latency_data(u64 status)
|
||||
if (dse.ld_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
/*
|
||||
* Ice Lake and earlier models do not support block infos.
|
||||
*/
|
||||
if (!x86_pmu.pebs_block) {
|
||||
val |= P(BLK, NA);
|
||||
return val;
|
||||
}
|
||||
/*
|
||||
* bit 6: load was blocked since its data could not be forwarded
|
||||
* from a preceding store
|
||||
*/
|
||||
if (dse.ld_data_blk)
|
||||
val |= P(BLK, DATA);
|
||||
|
||||
/*
|
||||
* bit 7: load was blocked due to potential address conflict with
|
||||
* a preceding store
|
||||
*/
|
||||
if (dse.ld_addr_blk)
|
||||
val |= P(BLK, ADDR);
|
||||
|
||||
if (!dse.ld_data_blk && !dse.ld_addr_blk)
|
||||
val |= P(BLK, NA);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 store_latency_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
u64 val;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
/*
|
||||
* use the mapping table for bit 0-3
|
||||
*/
|
||||
val = pebs_data_source[dse.st_lat_dse];
|
||||
|
||||
/*
|
||||
* bit 4: TLB access
|
||||
* 0 = did not miss 2nd level TLB
|
||||
* 1 = missed 2nd level TLB
|
||||
*/
|
||||
if (dse.st_lat_stlb_miss)
|
||||
val |= P(TLB, MISS) | P(TLB, L2);
|
||||
else
|
||||
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
|
||||
|
||||
/*
|
||||
* bit 5: locked prefix
|
||||
*/
|
||||
if (dse.st_lat_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
val |= P(BLK, NA);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -870,6 +935,28 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_spr_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
|
||||
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
|
||||
INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
|
||||
INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
|
||||
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
|
||||
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
|
||||
|
||||
/*
|
||||
* Everything else is handled by PMU_FL_PEBS_ALL, because we
|
||||
* need the full constraints from the main table.
|
||||
*/
|
||||
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
@ -960,7 +1047,8 @@ static void adaptive_pebs_record_size_update(void)
|
||||
}
|
||||
|
||||
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
|
||||
PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
|
||||
PERF_SAMPLE_PHYS_ADDR | \
|
||||
PERF_SAMPLE_WEIGHT_TYPE | \
|
||||
PERF_SAMPLE_TRANSACTION | \
|
||||
PERF_SAMPLE_DATA_PAGE_SIZE)
|
||||
|
||||
@ -987,7 +1075,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
|
||||
gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
|
||||
(attr->sample_regs_intr & PEBS_GP_REGS);
|
||||
|
||||
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
|
||||
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
|
||||
((attr->config & INTEL_ARCH_EVENT_MASK) ==
|
||||
x86_pmu.rtm_abort_event);
|
||||
|
||||
@ -1331,6 +1419,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
|
||||
|
||||
if (fl & PERF_X86_EVENT_PEBS_LDLAT)
|
||||
val = load_latency_data(aux);
|
||||
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
|
||||
val = store_latency_data(aux);
|
||||
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
|
||||
val = precise_datala_hsw(event, aux);
|
||||
else if (fst)
|
||||
@ -1369,8 +1459,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
/*
|
||||
* Use latency for weight (only avail with PEBS-LL)
|
||||
*/
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
|
||||
data->weight = pebs->lat;
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
|
||||
data->weight.full = pebs->lat;
|
||||
|
||||
/*
|
||||
* data.data_src encodes the data source
|
||||
@ -1462,8 +1552,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_format >= 2) {
|
||||
/* Only set the TSX weight when no memory weight. */
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
|
||||
data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
|
||||
data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
||||
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
|
||||
@ -1507,6 +1597,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
|
||||
#endif
|
||||
}
|
||||
|
||||
#define PEBS_LATENCY_MASK 0xffff
|
||||
#define PEBS_CACHE_LATENCY_OFFSET 32
|
||||
|
||||
/*
|
||||
* With adaptive PEBS the layout depends on what fields are configured.
|
||||
*/
|
||||
@ -1577,9 +1670,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_MEMINFO) {
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
data->weight = meminfo->latency ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
||||
u64 weight = meminfo->latency;
|
||||
|
||||
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
|
||||
data->weight.var2_w = weight & PEBS_LATENCY_MASK;
|
||||
weight >>= PEBS_CACHE_LATENCY_OFFSET;
|
||||
}
|
||||
|
||||
/*
|
||||
* Although meminfo::latency is defined as a u64,
|
||||
* only the lower 32 bits include the valid data
|
||||
* in practice on Ice Lake and earlier platforms.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT) {
|
||||
data->weight.full = weight ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
} else {
|
||||
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
data->data_src.val = get_data_src(event, meminfo->aux);
|
||||
|
@ -31,21 +31,21 @@ struct event_constraint uncore_constraint_empty =
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
int uncore_pcibus_to_physid(struct pci_bus *bus)
|
||||
int uncore_pcibus_to_dieid(struct pci_bus *bus)
|
||||
{
|
||||
struct pci2phy_map *map;
|
||||
int phys_id = -1;
|
||||
int die_id = -1;
|
||||
|
||||
raw_spin_lock(&pci2phy_map_lock);
|
||||
list_for_each_entry(map, &pci2phy_map_head, list) {
|
||||
if (map->segment == pci_domain_nr(bus)) {
|
||||
phys_id = map->pbus_to_physid[bus->number];
|
||||
die_id = map->pbus_to_dieid[bus->number];
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
|
||||
return phys_id;
|
||||
return die_id;
|
||||
}
|
||||
|
||||
static void uncore_free_pcibus_map(void)
|
||||
@ -86,7 +86,7 @@ lookup:
|
||||
alloc = NULL;
|
||||
map->segment = segment;
|
||||
for (i = 0; i < 256; i++)
|
||||
map->pbus_to_physid[i] = -1;
|
||||
map->pbus_to_dieid[i] = -1;
|
||||
list_add_tail(&map->list, &pci2phy_map_head);
|
||||
|
||||
end:
|
||||
@ -332,7 +332,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
|
||||
|
||||
uncore_pmu_init_hrtimer(box);
|
||||
box->cpu = -1;
|
||||
box->pci_phys_id = -1;
|
||||
box->dieid = -1;
|
||||
|
||||
/* set default hrtimer timeout */
|
||||
@ -993,18 +992,11 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
|
||||
/*
|
||||
* Get the die information of a PCI device.
|
||||
* @pdev: The PCI device.
|
||||
* @phys_id: The physical socket id which the device maps to.
|
||||
* @die: The die id which the device maps to.
|
||||
*/
|
||||
static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
|
||||
int *phys_id, int *die)
|
||||
static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
|
||||
{
|
||||
*phys_id = uncore_pcibus_to_physid(pdev->bus);
|
||||
if (*phys_id < 0)
|
||||
return -ENODEV;
|
||||
|
||||
*die = (topology_max_die_per_package() > 1) ? *phys_id :
|
||||
topology_phys_to_logical_pkg(*phys_id);
|
||||
*die = uncore_pcibus_to_dieid(pdev->bus);
|
||||
if (*die < 0)
|
||||
return -EINVAL;
|
||||
|
||||
@ -1046,13 +1038,12 @@ uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
|
||||
* @pdev: The PCI device.
|
||||
* @type: The corresponding PMU type of the device.
|
||||
* @pmu: The corresponding PMU of the device.
|
||||
* @phys_id: The physical socket id which the device maps to.
|
||||
* @die: The die id which the device maps to.
|
||||
*/
|
||||
static int uncore_pci_pmu_register(struct pci_dev *pdev,
|
||||
struct intel_uncore_type *type,
|
||||
struct intel_uncore_pmu *pmu,
|
||||
int phys_id, int die)
|
||||
int die)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
int ret;
|
||||
@ -1070,7 +1061,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev,
|
||||
WARN_ON_ONCE(pmu->func_id != pdev->devfn);
|
||||
|
||||
atomic_inc(&box->refcnt);
|
||||
box->pci_phys_id = phys_id;
|
||||
box->dieid = die;
|
||||
box->pci_dev = pdev;
|
||||
box->pmu = pmu;
|
||||
@ -1097,9 +1087,9 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
|
||||
{
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_pmu *pmu = NULL;
|
||||
int phys_id, die, ret;
|
||||
int die, ret;
|
||||
|
||||
ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
|
||||
ret = uncore_pci_get_dev_die_info(pdev, &die);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1132,7 +1122,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
|
||||
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
|
||||
}
|
||||
|
||||
ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
|
||||
ret = uncore_pci_pmu_register(pdev, type, pmu, die);
|
||||
|
||||
pci_set_drvdata(pdev, pmu->boxes[die]);
|
||||
|
||||
@ -1142,17 +1132,12 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
|
||||
/*
|
||||
* Unregister the PMU of a PCI device
|
||||
* @pmu: The corresponding PMU is unregistered.
|
||||
* @phys_id: The physical socket id which the device maps to.
|
||||
* @die: The die id which the device maps to.
|
||||
*/
|
||||
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
|
||||
int phys_id, int die)
|
||||
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
|
||||
{
|
||||
struct intel_uncore_box *box = pmu->boxes[die];
|
||||
|
||||
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
|
||||
return;
|
||||
|
||||
pmu->boxes[die] = NULL;
|
||||
if (atomic_dec_return(&pmu->activeboxes) == 0)
|
||||
uncore_pmu_unregister(pmu);
|
||||
@ -1164,9 +1149,9 @@ static void uncore_pci_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
int i, phys_id, die;
|
||||
int i, die;
|
||||
|
||||
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
|
||||
if (uncore_pci_get_dev_die_info(pdev, &die))
|
||||
return;
|
||||
|
||||
box = pci_get_drvdata(pdev);
|
||||
@ -1185,7 +1170,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
|
||||
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
|
||||
uncore_pci_pmu_unregister(pmu, phys_id, die);
|
||||
uncore_pci_pmu_unregister(pmu, die);
|
||||
}
|
||||
|
||||
static int uncore_bus_notify(struct notifier_block *nb,
|
||||
@ -1194,7 +1179,7 @@ static int uncore_bus_notify(struct notifier_block *nb,
|
||||
struct device *dev = data;
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct intel_uncore_pmu *pmu;
|
||||
int phys_id, die;
|
||||
int die;
|
||||
|
||||
/* Unregister the PMU when the device is going to be deleted. */
|
||||
if (action != BUS_NOTIFY_DEL_DEVICE)
|
||||
@ -1204,10 +1189,10 @@ static int uncore_bus_notify(struct notifier_block *nb,
|
||||
if (!pmu)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
|
||||
if (uncore_pci_get_dev_die_info(pdev, &die))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
uncore_pci_pmu_unregister(pmu, phys_id, die);
|
||||
uncore_pci_pmu_unregister(pmu, die);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
@ -1224,7 +1209,7 @@ static void uncore_pci_sub_driver_init(void)
|
||||
struct pci_dev *pci_sub_dev;
|
||||
bool notify = false;
|
||||
unsigned int devfn;
|
||||
int phys_id, die;
|
||||
int die;
|
||||
|
||||
while (ids && ids->vendor) {
|
||||
pci_sub_dev = NULL;
|
||||
@ -1244,12 +1229,11 @@ static void uncore_pci_sub_driver_init(void)
|
||||
if (!pmu)
|
||||
continue;
|
||||
|
||||
if (uncore_pci_get_dev_die_info(pci_sub_dev,
|
||||
&phys_id, &die))
|
||||
if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
|
||||
continue;
|
||||
|
||||
if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
|
||||
phys_id, die))
|
||||
die))
|
||||
notify = true;
|
||||
}
|
||||
ids++;
|
||||
|
@ -124,7 +124,6 @@ struct intel_uncore_extra_reg {
|
||||
};
|
||||
|
||||
struct intel_uncore_box {
|
||||
int pci_phys_id;
|
||||
int dieid; /* Logical die ID */
|
||||
int n_active; /* number of active events */
|
||||
int n_events;
|
||||
@ -173,11 +172,11 @@ struct freerunning_counters {
|
||||
struct pci2phy_map {
|
||||
struct list_head list;
|
||||
int segment;
|
||||
int pbus_to_physid[256];
|
||||
int pbus_to_dieid[256];
|
||||
};
|
||||
|
||||
struct pci2phy_map *__find_pci2phy_map(int segment);
|
||||
int uncore_pcibus_to_physid(struct pci_bus *bus);
|
||||
int uncore_pcibus_to_dieid(struct pci_bus *bus);
|
||||
|
||||
ssize_t uncore_event_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf);
|
||||
|
@ -657,7 +657,7 @@ int snb_pci2phy_map_init(int devid)
|
||||
pci_dev_put(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
map->pbus_to_physid[bus] = 0;
|
||||
map->pbus_to_dieid[bus] = 0;
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
|
||||
pci_dev_put(dev);
|
||||
|
@ -1359,7 +1359,7 @@ static struct pci_driver snbep_uncore_pci_driver = {
|
||||
static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse)
|
||||
{
|
||||
struct pci_dev *ubox_dev = NULL;
|
||||
int i, bus, nodeid, segment;
|
||||
int i, bus, nodeid, segment, die_id;
|
||||
struct pci2phy_map *map;
|
||||
int err = 0;
|
||||
u32 config = 0;
|
||||
@ -1370,36 +1370,77 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
|
||||
if (!ubox_dev)
|
||||
break;
|
||||
bus = ubox_dev->bus->number;
|
||||
/* get the Node ID of the local register */
|
||||
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
|
||||
if (err)
|
||||
break;
|
||||
nodeid = config & NODE_ID_MASK;
|
||||
/* get the Node ID mapping */
|
||||
err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
segment = pci_domain_nr(ubox_dev->bus);
|
||||
raw_spin_lock(&pci2phy_map_lock);
|
||||
map = __find_pci2phy_map(segment);
|
||||
if (!map) {
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* every three bits in the Node ID mapping register maps
|
||||
* to a particular node.
|
||||
* The nodeid and idmap registers only contain enough
|
||||
* information to handle 8 nodes. On systems with more
|
||||
* than 8 nodes, we need to rely on NUMA information,
|
||||
* filled in from BIOS supplied information, to determine
|
||||
* the topology.
|
||||
*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (nodeid == ((config >> (3 * i)) & 0x7)) {
|
||||
map->pbus_to_physid[bus] = i;
|
||||
if (nr_node_ids <= 8) {
|
||||
/* get the Node ID of the local register */
|
||||
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
|
||||
if (err)
|
||||
break;
|
||||
nodeid = config & NODE_ID_MASK;
|
||||
/* get the Node ID mapping */
|
||||
err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
segment = pci_domain_nr(ubox_dev->bus);
|
||||
raw_spin_lock(&pci2phy_map_lock);
|
||||
map = __find_pci2phy_map(segment);
|
||||
if (!map) {
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* every three bits in the Node ID mapping register maps
|
||||
* to a particular node.
|
||||
*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (nodeid == ((config >> (3 * i)) & 0x7)) {
|
||||
if (topology_max_die_per_package() > 1)
|
||||
die_id = i;
|
||||
else
|
||||
die_id = topology_phys_to_logical_pkg(i);
|
||||
map->pbus_to_dieid[bus] = die_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
} else {
|
||||
int node = pcibus_to_node(ubox_dev->bus);
|
||||
int cpu;
|
||||
|
||||
segment = pci_domain_nr(ubox_dev->bus);
|
||||
raw_spin_lock(&pci2phy_map_lock);
|
||||
map = __find_pci2phy_map(segment);
|
||||
if (!map) {
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
die_id = -1;
|
||||
for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
if (c->initialized && cpu_to_node(cpu) == node) {
|
||||
map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
|
||||
if (WARN_ON_ONCE(die_id == -1)) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
@ -1412,17 +1453,17 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
|
||||
i = -1;
|
||||
if (reverse) {
|
||||
for (bus = 255; bus >= 0; bus--) {
|
||||
if (map->pbus_to_physid[bus] >= 0)
|
||||
i = map->pbus_to_physid[bus];
|
||||
if (map->pbus_to_dieid[bus] >= 0)
|
||||
i = map->pbus_to_dieid[bus];
|
||||
else
|
||||
map->pbus_to_physid[bus] = i;
|
||||
map->pbus_to_dieid[bus] = i;
|
||||
}
|
||||
} else {
|
||||
for (bus = 0; bus <= 255; bus++) {
|
||||
if (map->pbus_to_physid[bus] >= 0)
|
||||
i = map->pbus_to_physid[bus];
|
||||
if (map->pbus_to_dieid[bus] >= 0)
|
||||
i = map->pbus_to_dieid[bus];
|
||||
else
|
||||
map->pbus_to_physid[bus] = i;
|
||||
map->pbus_to_dieid[bus] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4646,19 +4687,14 @@ int snr_uncore_pci_init(void)
|
||||
static struct pci_dev *snr_uncore_get_mc_dev(int id)
|
||||
{
|
||||
struct pci_dev *mc_dev = NULL;
|
||||
int phys_id, pkg;
|
||||
int pkg;
|
||||
|
||||
while (1) {
|
||||
mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
|
||||
if (!mc_dev)
|
||||
break;
|
||||
phys_id = uncore_pcibus_to_physid(mc_dev->bus);
|
||||
if (phys_id < 0)
|
||||
continue;
|
||||
pkg = topology_phys_to_logical_pkg(phys_id);
|
||||
if (pkg < 0)
|
||||
continue;
|
||||
else if (pkg == id)
|
||||
pkg = uncore_pcibus_to_dieid(mc_dev->bus);
|
||||
if (pkg == id)
|
||||
break;
|
||||
}
|
||||
return mc_dev;
|
||||
|
@ -80,6 +80,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
|
||||
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
|
||||
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
|
||||
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
|
||||
#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
|
||||
|
||||
static inline bool is_topdown_count(struct perf_event *event)
|
||||
{
|
||||
@ -443,6 +444,10 @@ struct cpu_hw_events {
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
|
||||
|
||||
#define INTEL_PSD_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT)
|
||||
|
||||
#define INTEL_PST_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
|
||||
@ -682,8 +687,7 @@ struct x86_pmu {
|
||||
|
||||
/* PMI handler bits */
|
||||
unsigned int late_ack :1,
|
||||
enabled_ack :1,
|
||||
counter_freezing :1;
|
||||
enabled_ack :1;
|
||||
/*
|
||||
* sysfs attrs
|
||||
*/
|
||||
@ -724,7 +728,8 @@ struct x86_pmu {
|
||||
pebs_broken :1,
|
||||
pebs_prec_dist :1,
|
||||
pebs_no_tlb :1,
|
||||
pebs_no_isolation :1;
|
||||
pebs_no_isolation :1,
|
||||
pebs_block :1;
|
||||
int pebs_record_size;
|
||||
int pebs_buffer_size;
|
||||
int max_pebs_events;
|
||||
@ -776,6 +781,7 @@ struct x86_pmu {
|
||||
/*
|
||||
* Intel perf metrics
|
||||
*/
|
||||
int num_topdown_events;
|
||||
u64 (*update_topdown_event)(struct perf_event *event);
|
||||
int (*set_topdown_event_period)(struct perf_event *event);
|
||||
|
||||
@ -871,6 +877,8 @@ do { \
|
||||
#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
|
||||
#define PMU_FL_TFA 0x20 /* deal with TSX force abort */
|
||||
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
|
||||
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
|
||||
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
@ -1060,6 +1068,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page);
|
||||
|
||||
static inline bool fixed_counter_disabled(int i)
|
||||
{
|
||||
return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
@ -1157,6 +1170,8 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_icl_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_spr_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_add(struct perf_event *event);
|
||||
|
@ -28,6 +28,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
|
||||
for (bit = 0; bit < cnt; bit++) {
|
||||
if (!msr[bit].no_check) {
|
||||
struct attribute_group *grp = msr[bit].grp;
|
||||
u64 mask;
|
||||
|
||||
/* skip entry with no group */
|
||||
if (!grp)
|
||||
@ -44,8 +45,12 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
|
||||
/* Virt sucks; you cannot tell if a R/O MSR is present :/ */
|
||||
if (rdmsrl_safe(msr[bit].msr, &val))
|
||||
continue;
|
||||
|
||||
mask = msr[bit].mask;
|
||||
if (!mask)
|
||||
mask = ~0ULL;
|
||||
/* Disable zero counters if requested. */
|
||||
if (!zero && !val)
|
||||
if (!zero && !(val & mask))
|
||||
continue;
|
||||
|
||||
grp->is_visible = NULL;
|
||||
|
@ -4,10 +4,11 @@
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
struct perf_msr {
|
||||
u64 msr;
|
||||
struct attribute_group *grp;
|
||||
u64 msr;
|
||||
struct attribute_group *grp;
|
||||
bool (*test)(int idx, void *data);
|
||||
bool no_check;
|
||||
bool no_check;
|
||||
u64 mask;
|
||||
};
|
||||
|
||||
unsigned long
|
||||
|
@ -454,16 +454,9 @@ static struct attribute *rapl_events_cores[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static umode_t
|
||||
rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct attribute_group rapl_events_cores_group = {
|
||||
.name = "events",
|
||||
.attrs = rapl_events_cores,
|
||||
.is_visible = rapl_not_visible,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_pkg[] = {
|
||||
@ -476,7 +469,6 @@ static struct attribute *rapl_events_pkg[] = {
|
||||
static struct attribute_group rapl_events_pkg_group = {
|
||||
.name = "events",
|
||||
.attrs = rapl_events_pkg,
|
||||
.is_visible = rapl_not_visible,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_ram[] = {
|
||||
@ -489,7 +481,6 @@ static struct attribute *rapl_events_ram[] = {
|
||||
static struct attribute_group rapl_events_ram_group = {
|
||||
.name = "events",
|
||||
.attrs = rapl_events_ram,
|
||||
.is_visible = rapl_not_visible,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_gpu[] = {
|
||||
@ -502,7 +493,6 @@ static struct attribute *rapl_events_gpu[] = {
|
||||
static struct attribute_group rapl_events_gpu_group = {
|
||||
.name = "events",
|
||||
.attrs = rapl_events_gpu,
|
||||
.is_visible = rapl_not_visible,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_psys[] = {
|
||||
@ -515,7 +505,6 @@ static struct attribute *rapl_events_psys[] = {
|
||||
static struct attribute_group rapl_events_psys_group = {
|
||||
.name = "events",
|
||||
.attrs = rapl_events_psys,
|
||||
.is_visible = rapl_not_visible,
|
||||
};
|
||||
|
||||
static bool test_msr(int idx, void *data)
|
||||
@ -523,12 +512,23 @@ static bool test_msr(int idx, void *data)
|
||||
return test_bit(idx, (unsigned long *) data);
|
||||
}
|
||||
|
||||
/* Only lower 32bits of the MSR represents the energy counter */
|
||||
#define RAPL_MSR_MASK 0xFFFFFFFF
|
||||
|
||||
static struct perf_msr intel_rapl_msrs[] = {
|
||||
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr },
|
||||
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
|
||||
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr },
|
||||
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr },
|
||||
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr },
|
||||
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK },
|
||||
};
|
||||
|
||||
static struct perf_msr intel_rapl_spr_msrs[] = {
|
||||
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, true, RAPL_MSR_MASK },
|
||||
};
|
||||
|
||||
/*
|
||||
@ -761,7 +761,7 @@ static struct rapl_model model_spr = {
|
||||
BIT(PERF_RAPL_PSYS),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
.rapl_msrs = intel_rapl_msrs,
|
||||
.rapl_msrs = intel_rapl_spr_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_amd_fam17h = {
|
||||
|
@ -58,14 +58,17 @@ struct arch_specific_insn {
|
||||
/* copy of the original instruction */
|
||||
kprobe_opcode_t *insn;
|
||||
/*
|
||||
* boostable = false: This instruction type is not boostable.
|
||||
* boostable = true: This instruction has been boosted: we have
|
||||
* boostable = 0: This instruction type is not boostable.
|
||||
* boostable = 1: This instruction has been boosted: we have
|
||||
* added a relative jump after the instruction copy in insn,
|
||||
* so no single-step and fixup are needed (unless there's
|
||||
* a post_handler).
|
||||
*/
|
||||
bool boostable;
|
||||
bool if_modifier;
|
||||
unsigned boostable:1;
|
||||
unsigned if_modifier:1;
|
||||
unsigned is_call:1;
|
||||
unsigned is_pushf:1;
|
||||
unsigned is_abs_ip:1;
|
||||
/* Number of bytes of text poked */
|
||||
int tp_len;
|
||||
};
|
||||
|
@ -261,8 +261,12 @@ struct x86_pmu_capability {
|
||||
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
|
||||
#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
|
||||
#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
|
||||
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
|
||||
#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
|
||||
#define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4)
|
||||
#define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5)
|
||||
#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6)
|
||||
#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7)
|
||||
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND
|
||||
#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \
|
||||
INTEL_PMC_MSK_FIXED_SLOTS)
|
||||
|
||||
/*
|
||||
@ -280,8 +284,14 @@ struct x86_pmu_capability {
|
||||
#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
|
||||
#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
|
||||
#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
|
||||
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
|
||||
#define INTEL_TD_METRIC_NUM 4
|
||||
/* Level 2 metrics */
|
||||
#define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */
|
||||
#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */
|
||||
#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */
|
||||
#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */
|
||||
|
||||
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
|
||||
#define INTEL_TD_METRIC_NUM 8
|
||||
|
||||
static inline bool is_metric_idx(int idx)
|
||||
{
|
||||
@ -483,11 +493,7 @@ static inline void perf_check_microcode(void) { }
|
||||
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
||||
extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
|
||||
#else
|
||||
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||
{
|
||||
*nr = 0;
|
||||
return NULL;
|
||||
}
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
||||
static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
|
||||
{
|
||||
return -1;
|
||||
|
@ -132,26 +132,6 @@ void synthesize_relcall(void *dest, void *from, void *to)
|
||||
}
|
||||
NOKPROBE_SYMBOL(synthesize_relcall);
|
||||
|
||||
/*
|
||||
* Skip the prefixes of the instruction.
|
||||
*/
|
||||
static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
|
||||
{
|
||||
insn_attr_t attr;
|
||||
|
||||
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
|
||||
while (inat_is_legacy_prefix(attr)) {
|
||||
insn++;
|
||||
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
if (inat_is_rex_prefix(attr))
|
||||
insn++;
|
||||
#endif
|
||||
return insn;
|
||||
}
|
||||
NOKPROBE_SYMBOL(skip_prefixes);
|
||||
|
||||
/*
|
||||
* Returns non-zero if INSN is boostable.
|
||||
* RIP relative instructions are adjusted at copying time in 64 bits mode
|
||||
@ -311,25 +291,6 @@ static int can_probe(unsigned long paddr)
|
||||
return (addr == paddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns non-zero if opcode modifies the interrupt flag.
|
||||
*/
|
||||
static int is_IF_modifier(kprobe_opcode_t *insn)
|
||||
{
|
||||
/* Skip prefixes */
|
||||
insn = skip_prefixes(insn);
|
||||
|
||||
switch (*insn) {
|
||||
case 0xfa: /* cli */
|
||||
case 0xfb: /* sti */
|
||||
case 0xcf: /* iret/iretd */
|
||||
case 0x9d: /* popf/popfd */
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an instruction with recovering modified instruction by kprobes
|
||||
* and adjust the displacement if the instruction uses the %rip-relative
|
||||
@ -411,9 +372,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
|
||||
synthesize_reljump(buf + len, p->ainsn.insn + len,
|
||||
p->addr + insn->length);
|
||||
len += JMP32_INSN_SIZE;
|
||||
p->ainsn.boostable = true;
|
||||
p->ainsn.boostable = 1;
|
||||
} else {
|
||||
p->ainsn.boostable = false;
|
||||
p->ainsn.boostable = 0;
|
||||
}
|
||||
|
||||
return len;
|
||||
@ -450,6 +411,67 @@ void free_insn_page(void *page)
|
||||
module_memfree(page);
|
||||
}
|
||||
|
||||
static void set_resume_flags(struct kprobe *p, struct insn *insn)
|
||||
{
|
||||
insn_byte_t opcode = insn->opcode.bytes[0];
|
||||
|
||||
switch (opcode) {
|
||||
case 0xfa: /* cli */
|
||||
case 0xfb: /* sti */
|
||||
case 0x9d: /* popf/popfd */
|
||||
/* Check whether the instruction modifies Interrupt Flag or not */
|
||||
p->ainsn.if_modifier = 1;
|
||||
break;
|
||||
case 0x9c: /* pushfl */
|
||||
p->ainsn.is_pushf = 1;
|
||||
break;
|
||||
case 0xcf: /* iret */
|
||||
p->ainsn.if_modifier = 1;
|
||||
fallthrough;
|
||||
case 0xc2: /* ret/lret */
|
||||
case 0xc3:
|
||||
case 0xca:
|
||||
case 0xcb:
|
||||
case 0xea: /* jmp absolute -- ip is correct */
|
||||
/* ip is already adjusted, no more changes required */
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
/* Without resume jump, this is boostable */
|
||||
p->ainsn.boostable = 1;
|
||||
break;
|
||||
case 0xe8: /* call relative - Fix return addr */
|
||||
p->ainsn.is_call = 1;
|
||||
break;
|
||||
#ifdef CONFIG_X86_32
|
||||
case 0x9a: /* call absolute -- same as call absolute, indirect */
|
||||
p->ainsn.is_call = 1;
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
break;
|
||||
#endif
|
||||
case 0xff:
|
||||
opcode = insn->opcode.bytes[1];
|
||||
if ((opcode & 0x30) == 0x10) {
|
||||
/*
|
||||
* call absolute, indirect
|
||||
* Fix return addr; ip is correct.
|
||||
* But this is not boostable
|
||||
*/
|
||||
p->ainsn.is_call = 1;
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
break;
|
||||
} else if (((opcode & 0x31) == 0x20) ||
|
||||
((opcode & 0x31) == 0x21)) {
|
||||
/*
|
||||
* jmp near and far, absolute indirect
|
||||
* ip is correct.
|
||||
*/
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
/* Without resume jump, this is boostable */
|
||||
p->ainsn.boostable = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int arch_copy_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct insn insn;
|
||||
@ -467,8 +489,8 @@ static int arch_copy_kprobe(struct kprobe *p)
|
||||
*/
|
||||
len = prepare_boost(buf, p, &insn);
|
||||
|
||||
/* Check whether the instruction modifies Interrupt Flag or not */
|
||||
p->ainsn.if_modifier = is_IF_modifier(buf);
|
||||
/* Analyze the opcode and set resume flags */
|
||||
set_resume_flags(p, &insn);
|
||||
|
||||
/* Also, displacement change doesn't affect the first byte */
|
||||
p->opcode = buf[0];
|
||||
@ -491,6 +513,9 @@ int arch_prepare_kprobe(struct kprobe *p)
|
||||
|
||||
if (!can_probe((unsigned long)p->addr))
|
||||
return -EILSEQ;
|
||||
|
||||
memset(&p->ainsn, 0, sizeof(p->ainsn));
|
||||
|
||||
/* insn: must be on special executable page on x86. */
|
||||
p->ainsn.insn = get_insn_slot();
|
||||
if (!p->ainsn.insn)
|
||||
@ -806,11 +831,6 @@ NOKPROBE_SYMBOL(trampoline_handler);
|
||||
* 2) If the single-stepped instruction was a call, the return address
|
||||
* that is atop the stack is the address following the copied instruction.
|
||||
* We need to make it the address following the original instruction.
|
||||
*
|
||||
* If this is the first time we've single-stepped the instruction at
|
||||
* this probepoint, and the instruction is boostable, boost it: add a
|
||||
* jump instruction after the copied instruction, that jumps to the next
|
||||
* instruction after the probepoint.
|
||||
*/
|
||||
static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
||||
struct kprobe_ctlblk *kcb)
|
||||
@ -818,60 +838,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long *tos = stack_addr(regs);
|
||||
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
|
||||
unsigned long orig_ip = (unsigned long)p->addr;
|
||||
kprobe_opcode_t *insn = p->ainsn.insn;
|
||||
|
||||
/* Skip prefixes */
|
||||
insn = skip_prefixes(insn);
|
||||
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
switch (*insn) {
|
||||
case 0x9c: /* pushfl */
|
||||
|
||||
/* Fixup the contents of top of stack */
|
||||
if (p->ainsn.is_pushf) {
|
||||
*tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
|
||||
*tos |= kcb->kprobe_old_flags;
|
||||
break;
|
||||
case 0xc2: /* iret/ret/lret */
|
||||
case 0xc3:
|
||||
case 0xca:
|
||||
case 0xcb:
|
||||
case 0xcf:
|
||||
case 0xea: /* jmp absolute -- ip is correct */
|
||||
/* ip is already adjusted, no more changes required */
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
case 0xe8: /* call relative - Fix return addr */
|
||||
} else if (p->ainsn.is_call) {
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
break;
|
||||
#ifdef CONFIG_X86_32
|
||||
case 0x9a: /* call absolute -- same as call absolute, indirect */
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
goto no_change;
|
||||
#endif
|
||||
case 0xff:
|
||||
if ((insn[1] & 0x30) == 0x10) {
|
||||
/*
|
||||
* call absolute, indirect
|
||||
* Fix return addr; ip is correct.
|
||||
* But this is not boostable
|
||||
*/
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
goto no_change;
|
||||
} else if (((insn[1] & 0x31) == 0x20) ||
|
||||
((insn[1] & 0x31) == 0x21)) {
|
||||
/*
|
||||
* jmp near and far, absolute indirect
|
||||
* ip is correct. And this is boostable
|
||||
*/
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
regs->ip += orig_ip - copy_ip;
|
||||
if (!p->ainsn.is_abs_ip)
|
||||
regs->ip += orig_ip - copy_ip;
|
||||
|
||||
no_change:
|
||||
restore_btf();
|
||||
}
|
||||
NOKPROBE_SYMBOL(resume_execution);
|
||||
|
@ -998,7 +998,7 @@ struct perf_sample_data {
|
||||
struct perf_raw_record *raw;
|
||||
struct perf_branch_stack *br_stack;
|
||||
u64 period;
|
||||
u64 weight;
|
||||
union perf_sample_weight weight;
|
||||
u64 txn;
|
||||
union perf_mem_data_src data_src;
|
||||
|
||||
@ -1047,7 +1047,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
data->raw = NULL;
|
||||
data->br_stack = NULL;
|
||||
data->period = period;
|
||||
data->weight = 0;
|
||||
data->weight.full = 0;
|
||||
data->data_src.val = PERF_MEM_NA;
|
||||
data->txn = 0;
|
||||
}
|
||||
|
@ -145,12 +145,14 @@ enum perf_event_sample_format {
|
||||
PERF_SAMPLE_CGROUP = 1U << 21,
|
||||
PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22,
|
||||
PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23,
|
||||
PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */
|
||||
PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
|
||||
|
||||
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
|
||||
};
|
||||
|
||||
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
|
||||
/*
|
||||
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
|
||||
*
|
||||
@ -912,7 +914,24 @@ enum perf_event_type {
|
||||
* char data[size];
|
||||
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
|
||||
*
|
||||
* { u64 weight; } && PERF_SAMPLE_WEIGHT
|
||||
* { union perf_sample_weight
|
||||
* {
|
||||
* u64 full; && PERF_SAMPLE_WEIGHT
|
||||
* #if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
* struct {
|
||||
* u32 var1_dw;
|
||||
* u16 var2_w;
|
||||
* u16 var3_w;
|
||||
* } && PERF_SAMPLE_WEIGHT_STRUCT
|
||||
* #elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
* struct {
|
||||
* u16 var3_w;
|
||||
* u16 var2_w;
|
||||
* u32 var1_dw;
|
||||
* } && PERF_SAMPLE_WEIGHT_STRUCT
|
||||
* #endif
|
||||
* }
|
||||
* }
|
||||
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
|
||||
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
|
||||
* { u64 abi; # enum perf_sample_regs_abi
|
||||
@ -1159,14 +1178,16 @@ union perf_mem_data_src {
|
||||
mem_lvl_num:4, /* memory hierarchy level number */
|
||||
mem_remote:1, /* remote */
|
||||
mem_snoopx:2, /* snoop mode, ext */
|
||||
mem_rsvd:24;
|
||||
mem_blk:3, /* access blocked */
|
||||
mem_rsvd:21;
|
||||
};
|
||||
};
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
union perf_mem_data_src {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 mem_rsvd:24,
|
||||
__u64 mem_rsvd:21,
|
||||
mem_blk:3, /* access blocked */
|
||||
mem_snoopx:2, /* snoop mode, ext */
|
||||
mem_remote:1, /* remote */
|
||||
mem_lvl_num:4, /* memory hierarchy level number */
|
||||
@ -1249,6 +1270,12 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
|
||||
#define PERF_MEM_TLB_SHIFT 26
|
||||
|
||||
/* Access blocked */
|
||||
#define PERF_MEM_BLK_NA 0x01 /* not available */
|
||||
#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
|
||||
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
|
||||
#define PERF_MEM_BLK_SHIFT 40
|
||||
|
||||
#define PERF_MEM_S(a, s) \
|
||||
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
|
||||
|
||||
@ -1280,4 +1307,23 @@ struct perf_branch_entry {
|
||||
reserved:40;
|
||||
};
|
||||
|
||||
union perf_sample_weight {
|
||||
__u64 full;
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
struct {
|
||||
__u32 var1_dw;
|
||||
__u16 var2_w;
|
||||
__u16 var3_w;
|
||||
};
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
struct {
|
||||
__u16 var3_w;
|
||||
__u16 var2_w;
|
||||
__u32 var1_dw;
|
||||
};
|
||||
#else
|
||||
#error "Unknown endianness"
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif /* _UAPI_LINUX_PERF_EVENT_H */
|
||||
|
@ -1866,8 +1866,8 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
|
||||
if (sample_type & PERF_SAMPLE_PERIOD)
|
||||
size += sizeof(data->period);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
size += sizeof(data->weight);
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
|
||||
size += sizeof(data->weight.full);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_READ)
|
||||
size += event->read_size;
|
||||
@ -6896,8 +6896,8 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
data->regs_user.regs);
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
perf_output_put(handle, data->weight);
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
|
||||
perf_output_put(handle, data->weight.full);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
perf_output_put(handle, data->data_src.val);
|
||||
@ -11573,6 +11573,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
||||
if (attr->sample_type & PERF_SAMPLE_CGROUP)
|
||||
return -EINVAL;
|
||||
#endif
|
||||
if ((attr->sample_type & PERF_SAMPLE_WEIGHT) &&
|
||||
(attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT))
|
||||
return -EINVAL;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user