mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
Performance events changes for v6.11:
- Intel PT support enhancements & fixes - Fix leaked SIGTRAP events - Improve and fix the Intel uncore driver - Add support for Intel HBM and CXL uncore counters - Add Intel Lake and Arrow Lake support - AMD uncore driver fixes - Make SIGTRAP and __perf_pending_irq() work on RT - Micro-optimizations - Misc cleanups and fixes Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmaWjncRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iZyg//TSafjCK4N9fyXrPdPqf8L7ntX5uYf0rd uVZpEo/+VGvuFhznHnZIV2DLetvuwYZcUWszCqQMYfokGGi6WI1/k4MeZkSpN5QE p5mFk6gW3cmpHT9bECg7mKQH+w7Qna/b6mnA0HYTFxPGmQKdQDl1/S+ZsgWedxpC 4V3re7/FzenFVS45DwSMPi9s7uZzZhVhTSgb4XLy+0Da4S0iRULItBa8HT8HmqE5 v5aQlw3mmwKPUWvyPMi3Sw6RRWK3C+n5ZxWswSYoLSM3dsp1ZD+YYqtOv2GqAx8v JoL0SOnGnNCfxGHh0kz5D2hztDvq61Enotih2gz7HxvdWh2DasNp4yS1USGQhu5h VJnKNA0TfOUaYqWFVj0EgRVhDX79lMwSHTkR1DZd4vM2GDigHeRPh0zGSn2w/koV oCRxFfBoktHBnX0Te1NE2BhojbuKp25vTGK6GriVcHt/RNpuz6hTxsjdJzHCAlVX M349l0EpUJafvfaIN9zF22uw22J8P9y9JYqI6ebkUIKiuoT9LuafVYhQupSE9H4u IqlozPCTNw6eAQcUo03gkl3n+SY/DZH6eU2ycKgEp3r7TDGYbJPwxY1BgOHbwi4U lySM07leso2accSVAz7GDMI3ejj6Sx64asWS1FSwbajDflouaIK2jtey+1IOdXfv hHY65tomV8U= =gguT -----END PGP SIGNATURE----- Merge tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull performance events updates from Ingo Molnar: - Intel PT support enhancements & fixes - Fix leaked SIGTRAP events - Improve and fix the Intel uncore driver - Add support for Intel HBM and CXL uncore counters - Add Intel Lake and Arrow Lake support - AMD uncore driver fixes - Make SIGTRAP and __perf_pending_irq() work on RT - Micro-optimizations - Misc cleanups and fixes * tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) perf/x86/intel: Add a distinct name for Granite Rapids perf/x86/intel/ds: Fix non 0 retire latency on Raptorlake perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated perf/x86/intel/uncore: Fix the bits of the CHA extended umask for SPR perf: Split __perf_pending_irq() out of perf_pending_irq() perf: Don't disable preemption in perf_pending_task(). perf: Move swevent_htable::recursion into task_struct. perf: Shrink the size of the recursion counter. perf: Enqueue SIGTRAP always via task_work. task_work: Add TWA_NMI_CURRENT as an additional notify mode. perf: Move irq_work_queue() where the event is prepared. perf: Fix event leak upon exec and file release perf: Fix event leak upon exit task_work: Introduce task_work_cancel() again task_work: s/task_work_cancel()/task_work_cancel_func()/ perf/x86/amd/uncore: Fix DF and UMC domain identification perf/x86/amd/uncore: Avoid PMU registration if counters are unavailable perf/x86/intel: Support Perfmon MSRs aliasing perf/x86/intel: Support PERFEVTSEL extension perf/x86: Add config_mask to represent EVENTSEL bitmask ...
This commit is contained in:
commit
576a997c63
@ -432,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
|
||||
* be removed on one CPU at a time AND PMU is disabled
|
||||
* when we come here
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
if (cmpxchg(nb->owners + i, event, NULL) == event)
|
||||
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct perf_event *tmp = event;
|
||||
|
||||
if (try_cmpxchg(nb->owners + i, &tmp, NULL))
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -499,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
|
||||
* because of successive calls to x86_schedule_events() from
|
||||
* hw_perf_group_sched_in() without hw_perf_enable()
|
||||
*/
|
||||
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
|
||||
for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
|
||||
if (new == -1 || hwc->idx == idx)
|
||||
/* assign free slot, prefer hwc->idx */
|
||||
old = cmpxchg(nb->owners + idx, NULL, event);
|
||||
@ -542,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
|
||||
/*
|
||||
* initialize all possible NB constraints
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
__set_bit(i, nb->event_constraints[i].idxmsk);
|
||||
nb->event_constraints[i].weight = 1;
|
||||
}
|
||||
@ -735,7 +737,7 @@ static void amd_pmu_check_overflow(void)
|
||||
* counters are always enabled when this function is called and
|
||||
* ARCH_PERFMON_EVENTSEL_INT is always set.
|
||||
*/
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
@ -755,7 +757,7 @@ static void amd_pmu_enable_all(int added)
|
||||
|
||||
amd_brs_enable_all();
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
/* only activate events which are marked as active */
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
@ -978,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
/* Clear any reserved bits set by buggy microcode */
|
||||
status &= amd_pmu_global_cntr_mask;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
@ -1313,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
|
||||
.addr_offset = amd_pmu_addr_offset,
|
||||
.event_map = amd_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
|
||||
.num_counters = AMD64_NUM_COUNTERS,
|
||||
.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
|
||||
.add = amd_pmu_add_event,
|
||||
.del = amd_pmu_del_event,
|
||||
.cntval_bits = 48,
|
||||
@ -1412,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
|
||||
*/
|
||||
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
|
||||
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
|
||||
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
|
||||
x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
|
||||
|
||||
/* Check for Performance Monitoring v2 support */
|
||||
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
|
||||
@ -1422,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
|
||||
x86_pmu.version = 2;
|
||||
|
||||
/* Find the number of available Core PMCs */
|
||||
x86_pmu.num_counters = ebx.split.num_core_pmc;
|
||||
x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
|
||||
|
||||
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
|
||||
amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
|
||||
|
||||
/* Update PMC handling functions */
|
||||
x86_pmu.enable_all = amd_pmu_v2_enable_all;
|
||||
@ -1452,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
|
||||
* even numbered counter that has a consecutive adjacent odd
|
||||
* numbered counter following it.
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
|
||||
for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
|
||||
even_ctr_mask |= BIT_ULL(i);
|
||||
|
||||
pair_constraint = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
|
||||
x86_pmu.num_counters / 2, 0,
|
||||
x86_pmu_max_num_counters(NULL) / 2, 0,
|
||||
PERF_X86_EVENT_PAIR);
|
||||
|
||||
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
|
||||
|
@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags)
|
||||
/* if not, take the first available counter */
|
||||
hwc->idx = -1;
|
||||
for (i = 0; i < pmu->num_counters; i++) {
|
||||
if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
|
||||
struct perf_event *tmp = NULL;
|
||||
|
||||
if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
|
||||
hwc->idx = i;
|
||||
break;
|
||||
}
|
||||
@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags)
|
||||
event->pmu->stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < pmu->num_counters; i++) {
|
||||
if (cmpxchg(&ctx->events[i], event, NULL) == event)
|
||||
struct perf_event *tmp = event;
|
||||
|
||||
if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
|
||||
break;
|
||||
}
|
||||
|
||||
@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
|
||||
info.split.aux_data = 0;
|
||||
info.split.num_pmcs = NUM_COUNTERS_NB;
|
||||
info.split.gid = 0;
|
||||
info.split.cid = topology_die_id(cpu);
|
||||
info.split.cid = topology_logical_package_id(cpu);
|
||||
|
||||
if (pmu_version >= 2) {
|
||||
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
|
||||
@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||
{
|
||||
struct attribute **df_attr = amd_uncore_df_format_attr;
|
||||
struct amd_uncore_pmu *pmu;
|
||||
int num_counters;
|
||||
|
||||
/* Run just once */
|
||||
if (uncore->init_done)
|
||||
return amd_uncore_ctx_init(uncore, cpu);
|
||||
|
||||
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
|
||||
if (!num_counters)
|
||||
goto done;
|
||||
|
||||
/* No grouping, single instance for a system */
|
||||
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
|
||||
if (!uncore->pmus) {
|
||||
uncore->num_pmus = 0;
|
||||
if (!uncore->pmus)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* For Family 17h and above, the Northbridge counters are repurposed
|
||||
@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||
pmu = &uncore->pmus[0];
|
||||
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
|
||||
sizeof(pmu->name));
|
||||
pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
|
||||
pmu->num_counters = num_counters;
|
||||
pmu->msr_base = MSR_F15H_NB_PERF_CTL;
|
||||
pmu->rdpmc_base = RDPMC_BASE_NB;
|
||||
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
|
||||
@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||
{
|
||||
struct attribute **l3_attr = amd_uncore_l3_format_attr;
|
||||
struct amd_uncore_pmu *pmu;
|
||||
int num_counters;
|
||||
|
||||
/* Run just once */
|
||||
if (uncore->init_done)
|
||||
return amd_uncore_ctx_init(uncore, cpu);
|
||||
|
||||
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
|
||||
if (!num_counters)
|
||||
goto done;
|
||||
|
||||
/* No grouping, single instance for a system */
|
||||
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
|
||||
if (!uncore->pmus) {
|
||||
uncore->num_pmus = 0;
|
||||
if (!uncore->pmus)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* For Family 17h and above, L3 cache counters are available instead
|
||||
@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||
pmu = &uncore->pmus[0];
|
||||
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
|
||||
sizeof(pmu->name));
|
||||
pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
|
||||
pmu->num_counters = num_counters;
|
||||
pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
|
||||
pmu->rdpmc_base = RDPMC_BASE_LLC;
|
||||
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
|
||||
@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
|
||||
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
|
||||
info.split.aux_data = ecx; /* stash active mask */
|
||||
info.split.num_pmcs = ebx.split.num_umc_pmc;
|
||||
info.split.gid = topology_die_id(cpu);
|
||||
info.split.cid = topology_die_id(cpu);
|
||||
info.split.gid = topology_logical_package_id(cpu);
|
||||
info.split.cid = topology_logical_package_id(cpu);
|
||||
*per_cpu_ptr(uncore->info, cpu) = info;
|
||||
}
|
||||
|
||||
|
@ -189,29 +189,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
||||
static inline int get_possible_num_counters(void)
|
||||
static inline u64 get_possible_counter_mask(void)
|
||||
{
|
||||
int i, num_counters = x86_pmu.num_counters;
|
||||
u64 cntr_mask = x86_pmu.cntr_mask64;
|
||||
int i;
|
||||
|
||||
if (!is_hybrid())
|
||||
return num_counters;
|
||||
return cntr_mask;
|
||||
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
|
||||
num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters);
|
||||
cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;
|
||||
|
||||
return num_counters;
|
||||
return cntr_mask;
|
||||
}
|
||||
|
||||
static bool reserve_pmc_hardware(void)
|
||||
{
|
||||
int i, num_counters = get_possible_num_counters();
|
||||
u64 cntr_mask = get_possible_counter_mask();
|
||||
int i, end;
|
||||
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
|
||||
goto perfctr_fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
|
||||
goto eventsel_fail;
|
||||
}
|
||||
@ -219,13 +221,14 @@ static bool reserve_pmc_hardware(void)
|
||||
return true;
|
||||
|
||||
eventsel_fail:
|
||||
for (i--; i >= 0; i--)
|
||||
end = i;
|
||||
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
|
||||
release_evntsel_nmi(x86_pmu_config_addr(i));
|
||||
|
||||
i = num_counters;
|
||||
i = X86_PMC_IDX_MAX;
|
||||
|
||||
perfctr_fail:
|
||||
for (i--; i >= 0; i--)
|
||||
end = i;
|
||||
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
|
||||
release_perfctr_nmi(x86_pmu_event_addr(i));
|
||||
|
||||
return false;
|
||||
@ -233,9 +236,10 @@ perfctr_fail:
|
||||
|
||||
static void release_pmc_hardware(void)
|
||||
{
|
||||
int i, num_counters = get_possible_num_counters();
|
||||
u64 cntr_mask = get_possible_counter_mask();
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
|
||||
release_perfctr_nmi(x86_pmu_event_addr(i));
|
||||
release_evntsel_nmi(x86_pmu_config_addr(i));
|
||||
}
|
||||
@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}
|
||||
|
||||
#endif
|
||||
|
||||
bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
|
||||
bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
|
||||
unsigned long *fixed_cntr_mask)
|
||||
{
|
||||
u64 val, val_fail = -1, val_new= ~0;
|
||||
int i, reg, reg_fail = -1, ret = 0;
|
||||
@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
|
||||
* Check to see if the BIOS enabled any of the counters, if so
|
||||
* complain and bail.
|
||||
*/
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
|
||||
reg = x86_pmu_config_addr(i);
|
||||
ret = rdmsrl_safe(reg, &val);
|
||||
if (ret)
|
||||
@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
|
||||
}
|
||||
}
|
||||
|
||||
if (num_counters_fixed) {
|
||||
if (*(u64 *)fixed_cntr_mask) {
|
||||
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
||||
ret = rdmsrl_safe(reg, &val);
|
||||
if (ret)
|
||||
goto msr_fail;
|
||||
for (i = 0; i < num_counters_fixed; i++) {
|
||||
for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (fixed_counter_disabled(i, pmu))
|
||||
continue;
|
||||
if (val & (0x03ULL << i*4)) {
|
||||
@ -619,7 +624,7 @@ int x86_pmu_hw_config(struct perf_event *event)
|
||||
event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
|
||||
|
||||
if (event->attr.type == event->pmu->type)
|
||||
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
|
||||
event->hw.config |= x86_pmu_get_event_config(event);
|
||||
|
||||
if (event->attr.sample_period && x86_pmu.limit_period) {
|
||||
s64 left = event->attr.sample_period;
|
||||
@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
|
||||
u64 val;
|
||||
|
||||
@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
|
||||
|
||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
{
|
||||
int num_counters = hybrid(cpuc->pmu, num_counters);
|
||||
struct event_constraint *c;
|
||||
struct perf_event *e;
|
||||
int n0, i, wmin, wmax, unsched = 0;
|
||||
@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
|
||||
/* slow path */
|
||||
if (i != n) {
|
||||
int gpmax = num_counters;
|
||||
int gpmax = x86_pmu_max_num_counters(cpuc->pmu);
|
||||
|
||||
/*
|
||||
* Do not allow scheduling of more than half the available
|
||||
@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
* the extra Merge events needed by large increment events.
|
||||
*/
|
||||
if (x86_pmu.flags & PMU_FL_PAIR) {
|
||||
gpmax = num_counters - cpuc->n_pair;
|
||||
gpmax -= cpuc->n_pair;
|
||||
WARN_ON(gpmax <= 0);
|
||||
}
|
||||
|
||||
@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
*/
|
||||
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
|
||||
{
|
||||
int num_counters = hybrid(cpuc->pmu, num_counters);
|
||||
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
|
||||
struct perf_event *event;
|
||||
int n, max_count;
|
||||
|
||||
max_count = num_counters + num_counters_fixed;
|
||||
max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);
|
||||
|
||||
/* current number of events already accepted */
|
||||
n = cpuc->n_events;
|
||||
@ -1234,8 +1236,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
|
||||
fallthrough;
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
|
||||
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
||||
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
|
||||
(idx - INTEL_PMC_IDX_FIXED);
|
||||
hwc->event_base = x86_pmu_fixed_ctr_addr(idx - INTEL_PMC_IDX_FIXED);
|
||||
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
|
||||
INTEL_PMC_FIXED_RDPMC_BASE;
|
||||
break;
|
||||
@ -1522,13 +1523,13 @@ void perf_event_print_debug(void)
|
||||
u64 pebs, debugctl;
|
||||
int cpu = smp_processor_id();
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
int num_counters = hybrid(cpuc->pmu, num_counters);
|
||||
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
|
||||
unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
|
||||
unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
|
||||
struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
|
||||
unsigned long flags;
|
||||
int idx;
|
||||
|
||||
if (!num_counters)
|
||||
if (!*(u64 *)cntr_mask)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
@ -1555,7 +1556,7 @@ void perf_event_print_debug(void)
|
||||
}
|
||||
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
|
||||
|
||||
for (idx = 0; idx < num_counters; idx++) {
|
||||
for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
|
||||
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
|
||||
rdmsrl(x86_pmu_event_addr(idx), pmc_count);
|
||||
|
||||
@ -1568,10 +1569,10 @@ void perf_event_print_debug(void)
|
||||
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
|
||||
cpu, idx, prev_left);
|
||||
}
|
||||
for (idx = 0; idx < num_counters_fixed; idx++) {
|
||||
for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (fixed_counter_disabled(idx, cpuc->pmu))
|
||||
continue;
|
||||
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
|
||||
rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
|
||||
|
||||
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
|
||||
cpu, idx, pmc_count);
|
||||
@ -1682,7 +1683,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||
*/
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
@ -2038,18 +2039,15 @@ static void _x86_pmu_read(struct perf_event *event)
|
||||
static_call(x86_pmu_update)(event);
|
||||
}
|
||||
|
||||
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
|
||||
u64 intel_ctrl)
|
||||
void x86_pmu_show_pmu_cap(struct pmu *pmu)
|
||||
{
|
||||
pr_info("... version: %d\n", x86_pmu.version);
|
||||
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
|
||||
pr_info("... generic registers: %d\n", num_counters);
|
||||
pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
|
||||
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
|
||||
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
|
||||
pr_info("... fixed-purpose events: %lu\n",
|
||||
hweight64((((1ULL << num_counters_fixed) - 1)
|
||||
<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
|
||||
pr_info("... event mask: %016Lx\n", intel_ctrl);
|
||||
pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
|
||||
pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
|
||||
}
|
||||
|
||||
static int __init init_hw_perf_events(void)
|
||||
@ -2086,7 +2084,7 @@ static int __init init_hw_perf_events(void)
|
||||
pmu_check_apic();
|
||||
|
||||
/* sanity check that the hardware exists or is emulated */
|
||||
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
|
||||
if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
|
||||
goto out_bad_pmu;
|
||||
|
||||
pr_cont("%s PMU driver.\n", x86_pmu.name);
|
||||
@ -2097,14 +2095,17 @@ static int __init init_hw_perf_events(void)
|
||||
quirk->func();
|
||||
|
||||
if (!x86_pmu.intel_ctrl)
|
||||
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
|
||||
x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
|
||||
|
||||
if (!x86_pmu.config_mask)
|
||||
x86_pmu.config_mask = X86_RAW_EVENT_MASK;
|
||||
|
||||
perf_events_lapic_init();
|
||||
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
|
||||
|
||||
unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
|
||||
0, x86_pmu.num_counters, 0, 0);
|
||||
__EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
|
||||
0, x86_pmu_num_counters(NULL), 0, 0);
|
||||
|
||||
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
|
||||
|
||||
@ -2113,11 +2114,8 @@ static int __init init_hw_perf_events(void)
|
||||
|
||||
pmu.attr_update = x86_pmu.attr_update;
|
||||
|
||||
if (!is_hybrid()) {
|
||||
x86_pmu_show_pmu_cap(x86_pmu.num_counters,
|
||||
x86_pmu.num_counters_fixed,
|
||||
x86_pmu.intel_ctrl);
|
||||
}
|
||||
if (!is_hybrid())
|
||||
x86_pmu_show_pmu_cap(NULL);
|
||||
|
||||
if (!x86_pmu.read)
|
||||
x86_pmu.read = _x86_pmu_read;
|
||||
@ -2481,10 +2479,10 @@ void perf_clear_dirty_counters(void)
|
||||
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
|
||||
if (i >= INTEL_PMC_IDX_FIXED) {
|
||||
/* Metrics and fake events don't have corresponding HW counters. */
|
||||
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
|
||||
if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
|
||||
continue;
|
||||
|
||||
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
|
||||
wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
|
||||
} else {
|
||||
wrmsrl(x86_pmu_event_addr(i), 0);
|
||||
}
|
||||
@ -2986,8 +2984,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
* base PMU holds the correct number of counters for P-cores.
|
||||
*/
|
||||
cap->version = x86_pmu.version;
|
||||
cap->num_counters_gp = x86_pmu.num_counters;
|
||||
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
cap->num_counters_gp = x86_pmu_num_counters(NULL);
|
||||
cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
|
||||
cap->bit_width_gp = x86_pmu.cntval_bits;
|
||||
cap->bit_width_fixed = x86_pmu.cntval_bits;
|
||||
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
|
||||
|
@ -220,6 +220,17 @@ static struct event_constraint intel_grt_event_constraints[] __read_mostly = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_skt_event_constraints[] __read_mostly = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
|
||||
FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */
|
||||
FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */
|
||||
FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_skl_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
@ -370,6 +381,55 @@ static struct extra_reg intel_rwc_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_lnc_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
|
||||
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
|
||||
|
||||
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4),
|
||||
INTEL_UEVENT_CONSTRAINT(0x0175, 0x4),
|
||||
|
||||
INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff),
|
||||
INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff),
|
||||
/*
|
||||
* Generally event codes < 0x90 are restricted to counters 0-3.
|
||||
* The 0x2E and 0x3C are exception, which has no restriction.
|
||||
*/
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
|
||||
|
||||
INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
|
||||
INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
|
||||
INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
|
||||
INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
|
||||
INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1),
|
||||
INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8),
|
||||
INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3),
|
||||
INTEL_EVENT_CONSTRAINT(0xce, 0x1),
|
||||
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
|
||||
/*
|
||||
* Generally event codes >= 0x90 are likely to have no restrictions.
|
||||
* The exception are defined as above.
|
||||
*/
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff),
|
||||
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
||||
@ -2874,26 +2934,26 @@ static void intel_pmu_reset(void)
|
||||
{
|
||||
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
|
||||
int num_counters = hybrid(cpuc->pmu, num_counters);
|
||||
unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
|
||||
unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
|
||||
unsigned long flags;
|
||||
int idx;
|
||||
|
||||
if (!num_counters)
|
||||
if (!*(u64 *)cntr_mask)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
|
||||
|
||||
for (idx = 0; idx < num_counters; idx++) {
|
||||
for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
|
||||
wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
|
||||
wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
|
||||
}
|
||||
for (idx = 0; idx < num_counters_fixed; idx++) {
|
||||
for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
|
||||
if (fixed_counter_disabled(idx, cpuc->pmu))
|
||||
continue;
|
||||
wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
|
||||
wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
|
||||
}
|
||||
|
||||
if (ds)
|
||||
@ -2940,8 +3000,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
|
||||
!guest_pebs_idxs)
|
||||
return;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
|
||||
INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
|
||||
for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
|
||||
event = cpuc->events[bit];
|
||||
if (!event->attr.precise_ip)
|
||||
continue;
|
||||
@ -4199,7 +4258,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
|
||||
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
|
||||
arr[idx].msr = x86_pmu_config_addr(idx);
|
||||
@ -4217,7 +4276,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
|
||||
arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
}
|
||||
|
||||
*nr = x86_pmu.num_counters;
|
||||
*nr = x86_pmu_max_num_counters(cpuc->pmu);
|
||||
return arr;
|
||||
}
|
||||
|
||||
@ -4232,7 +4291,7 @@ static void core_pmu_enable_all(int added)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask) ||
|
||||
@ -4573,8 +4632,55 @@ PMU_FORMAT_ATTR(pc, "config:19" );
|
||||
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
PMU_FORMAT_ATTR(in_tx, "config:32");
|
||||
PMU_FORMAT_ATTR(in_tx_cp, "config:33");
|
||||
PMU_FORMAT_ATTR(in_tx, "config:32" );
|
||||
PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
|
||||
PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */
|
||||
|
||||
static ssize_t umask2_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
u64 mask = hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVENTSEL_UMASK2;
|
||||
|
||||
if (mask == ARCH_PERFMON_EVENTSEL_UMASK2)
|
||||
return sprintf(page, "config:8-15,40-47\n");
|
||||
|
||||
/* Roll back to the old format if umask2 is not supported. */
|
||||
return sprintf(page, "config:8-15\n");
|
||||
}
|
||||
|
||||
static struct device_attribute format_attr_umask2 =
|
||||
__ATTR(umask, 0444, umask2_show, NULL);
|
||||
|
||||
static struct attribute *format_evtsel_ext_attrs[] = {
|
||||
&format_attr_umask2.attr,
|
||||
&format_attr_eq.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static umode_t
|
||||
evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
u64 mask;
|
||||
|
||||
/*
|
||||
* The umask and umask2 have different formats but share the
|
||||
* same attr name. In update mode, the previous value of the
|
||||
* umask is unconditionally removed before is_visible. If
|
||||
* umask2 format is not enumerated, it's impossible to roll
|
||||
* back to the old format.
|
||||
* Does the check in umask2_show rather than is_visible.
|
||||
*/
|
||||
if (i == 0)
|
||||
return attr->mode;
|
||||
|
||||
mask = hybrid(dev_get_drvdata(dev), config_mask);
|
||||
if (i == 1)
|
||||
return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct attribute *intel_arch_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
@ -4684,13 +4790,33 @@ static void flip_smm_bit(void *data)
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_check_num_counters(int *num_counters,
|
||||
int *num_counters_fixed,
|
||||
u64 *intel_ctrl, u64 fixed_mask);
|
||||
static void intel_pmu_check_counters_mask(u64 *cntr_mask,
|
||||
u64 *fixed_cntr_mask,
|
||||
u64 *intel_ctrl)
|
||||
{
|
||||
unsigned int bit;
|
||||
|
||||
bit = fls64(*cntr_mask);
|
||||
if (bit > INTEL_PMC_MAX_GENERIC) {
|
||||
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
|
||||
bit, INTEL_PMC_MAX_GENERIC);
|
||||
*cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
|
||||
}
|
||||
*intel_ctrl = *cntr_mask;
|
||||
|
||||
bit = fls64(*fixed_cntr_mask);
|
||||
if (bit > INTEL_PMC_MAX_FIXED) {
|
||||
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
|
||||
bit, INTEL_PMC_MAX_FIXED);
|
||||
*fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
|
||||
}
|
||||
|
||||
*intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
|
||||
}
|
||||
|
||||
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
|
||||
int num_counters,
|
||||
int num_counters_fixed,
|
||||
u64 cntr_mask,
|
||||
u64 fixed_cntr_mask,
|
||||
u64 intel_ctrl);
|
||||
|
||||
static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
|
||||
@ -4707,17 +4833,22 @@ static inline bool intel_pmu_broken_perf_cap(void)
|
||||
|
||||
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
||||
{
|
||||
unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
unsigned int sub_bitmaps, eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx);
|
||||
|
||||
if (ebx & ARCH_PERFMON_EXT_UMASK2)
|
||||
pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
|
||||
if (ebx & ARCH_PERFMON_EXT_EQ)
|
||||
pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
|
||||
|
||||
if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
|
||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
pmu->num_counters = fls(eax);
|
||||
pmu->num_counters_fixed = fls(ebx);
|
||||
pmu->cntr_mask64 = eax;
|
||||
pmu->fixed_cntr_mask64 = ebx;
|
||||
}
|
||||
|
||||
|
||||
if (!intel_pmu_broken_perf_cap()) {
|
||||
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
|
||||
@ -4726,12 +4857,12 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
||||
|
||||
static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
|
||||
{
|
||||
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
|
||||
&pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
|
||||
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
|
||||
intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64,
|
||||
&pmu->intel_ctrl);
|
||||
pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
__EVENT_CONSTRAINT(0, pmu->cntr_mask64,
|
||||
0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
|
||||
|
||||
if (pmu->intel_cap.perf_metrics)
|
||||
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
@ -4744,8 +4875,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
|
||||
pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
|
||||
|
||||
intel_pmu_check_event_constraints(pmu->event_constraints,
|
||||
pmu->num_counters,
|
||||
pmu->num_counters_fixed,
|
||||
pmu->cntr_mask64,
|
||||
pmu->fixed_cntr_mask64,
|
||||
pmu->intel_ctrl);
|
||||
|
||||
intel_pmu_check_extra_regs(pmu->extra_regs);
|
||||
@ -4806,7 +4937,7 @@ static bool init_hybrid_pmu(int cpu)
|
||||
|
||||
intel_pmu_check_hybrid_pmus(pmu);
|
||||
|
||||
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
|
||||
if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
|
||||
return false;
|
||||
|
||||
pr_info("%s PMU driver: ", pmu->name);
|
||||
@ -4816,8 +4947,7 @@ static bool init_hybrid_pmu(int cpu)
|
||||
|
||||
pr_cont("\n");
|
||||
|
||||
x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed,
|
||||
pmu->intel_ctrl);
|
||||
x86_pmu_show_pmu_cap(&pmu->pmu);
|
||||
|
||||
end:
|
||||
cpumask_set_cpu(cpu, &pmu->supported_cpus);
|
||||
@ -5058,6 +5188,7 @@ static __initconst const struct x86_pmu core_pmu = {
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
||||
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
||||
.fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0,
|
||||
.event_map = intel_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
||||
.apic = 1,
|
||||
@ -5111,6 +5242,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
||||
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
||||
.fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0,
|
||||
.event_map = intel_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
||||
.apic = 1,
|
||||
@ -5698,8 +5830,22 @@ exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
return x86_pmu.version >= 2 ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static umode_t
|
||||
td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
/*
|
||||
* Hide the perf metrics topdown events
|
||||
* if the feature is not enumerated.
|
||||
*/
|
||||
if (x86_pmu.num_topdown_events)
|
||||
return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0;
|
||||
|
||||
return attr->mode;
|
||||
}
|
||||
|
||||
static struct attribute_group group_events_td = {
|
||||
.name = "events",
|
||||
.is_visible = td_is_visible,
|
||||
};
|
||||
|
||||
static struct attribute_group group_events_mem = {
|
||||
@ -5733,6 +5879,12 @@ static struct attribute_group group_format_extra_skl = {
|
||||
.is_visible = exra_is_visible,
|
||||
};
|
||||
|
||||
static struct attribute_group group_format_evtsel_ext = {
|
||||
.name = "format",
|
||||
.attrs = format_evtsel_ext_attrs,
|
||||
.is_visible = evtsel_ext_is_visible,
|
||||
};
|
||||
|
||||
static struct attribute_group group_default = {
|
||||
.attrs = intel_pmu_attrs,
|
||||
.is_visible = default_is_visible,
|
||||
@ -5746,6 +5898,7 @@ static const struct attribute_group *attr_update[] = {
|
||||
&group_caps_lbr,
|
||||
&group_format_extra,
|
||||
&group_format_extra_skl,
|
||||
&group_format_evtsel_ext,
|
||||
&group_default,
|
||||
NULL,
|
||||
};
|
||||
@ -5773,6 +5926,23 @@ static struct attribute *adl_hybrid_events_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_lnl, "event=0xc2,umask=0x02;event=0x00,umask=0x80", hybrid_big_small);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_lnl, "event=0x9c,umask=0x01;event=0x00,umask=0x82", hybrid_big_small);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_lnl, "event=0xa4,umask=0x02;event=0x00,umask=0x83", hybrid_big_small);
|
||||
|
||||
static struct attribute *lnl_hybrid_events_attrs[] = {
|
||||
EVENT_PTR(slots_adl),
|
||||
EVENT_PTR(td_retiring_lnl),
|
||||
EVENT_PTR(td_bad_spec_adl),
|
||||
EVENT_PTR(td_fe_bound_lnl),
|
||||
EVENT_PTR(td_be_bound_lnl),
|
||||
EVENT_PTR(td_heavy_ops_adl),
|
||||
EVENT_PTR(td_br_mis_adl),
|
||||
EVENT_PTR(td_fetch_lat_adl),
|
||||
EVENT_PTR(td_mem_bound_adl),
|
||||
NULL
|
||||
};
|
||||
|
||||
/* Must be in IDX order */
|
||||
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
|
||||
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
|
||||
@ -5901,9 +6071,27 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj,
|
||||
return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static umode_t hybrid_td_is_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int i)
|
||||
{
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct x86_hybrid_pmu *pmu =
|
||||
container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
|
||||
|
||||
if (!is_attr_for_this_pmu(kobj, attr))
|
||||
return 0;
|
||||
|
||||
|
||||
/* Only the big core supports perf metrics */
|
||||
if (pmu->pmu_type == hybrid_big)
|
||||
return pmu->intel_cap.perf_metrics ? attr->mode : 0;
|
||||
|
||||
return attr->mode;
|
||||
}
|
||||
|
||||
static struct attribute_group hybrid_group_events_td = {
|
||||
.name = "events",
|
||||
.is_visible = hybrid_events_is_visible,
|
||||
.is_visible = hybrid_td_is_visible,
|
||||
};
|
||||
|
||||
static struct attribute_group hybrid_group_events_mem = {
|
||||
@ -5948,6 +6136,7 @@ static const struct attribute_group *hybrid_attr_update[] = {
|
||||
&group_caps_gen,
|
||||
&group_caps_lbr,
|
||||
&hybrid_group_format_extra,
|
||||
&group_format_evtsel_ext,
|
||||
&group_default,
|
||||
&hybrid_group_cpus,
|
||||
NULL,
|
||||
@ -5955,29 +6144,9 @@ static const struct attribute_group *hybrid_attr_update[] = {
|
||||
|
||||
static struct attribute *empty_attrs;
|
||||
|
||||
static void intel_pmu_check_num_counters(int *num_counters,
|
||||
int *num_counters_fixed,
|
||||
u64 *intel_ctrl, u64 fixed_mask)
|
||||
{
|
||||
if (*num_counters > INTEL_PMC_MAX_GENERIC) {
|
||||
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
|
||||
*num_counters, INTEL_PMC_MAX_GENERIC);
|
||||
*num_counters = INTEL_PMC_MAX_GENERIC;
|
||||
}
|
||||
*intel_ctrl = (1ULL << *num_counters) - 1;
|
||||
|
||||
if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
|
||||
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
|
||||
*num_counters_fixed, INTEL_PMC_MAX_FIXED);
|
||||
*num_counters_fixed = INTEL_PMC_MAX_FIXED;
|
||||
}
|
||||
|
||||
*intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED;
|
||||
}
|
||||
|
||||
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
|
||||
int num_counters,
|
||||
int num_counters_fixed,
|
||||
u64 cntr_mask,
|
||||
u64 fixed_cntr_mask,
|
||||
u64 intel_ctrl)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
@ -6014,10 +6183,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
|
||||
* generic counters
|
||||
*/
|
||||
if (!use_fixed_pseudo_encoding(c->code))
|
||||
c->idxmsk64 |= (1ULL << num_counters) - 1;
|
||||
c->idxmsk64 |= cntr_mask;
|
||||
}
|
||||
c->idxmsk64 &=
|
||||
~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
|
||||
c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
|
||||
c->weight = hweight64(c->idxmsk64);
|
||||
}
|
||||
}
|
||||
@ -6042,6 +6210,11 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
|
||||
}
|
||||
}
|
||||
|
||||
static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
|
||||
{
|
||||
return MSR_IA32_PMC_V6_STEP * index;
|
||||
}
|
||||
|
||||
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
|
||||
{ hybrid_small, "cpu_atom" },
|
||||
{ hybrid_big, "cpu_core" },
|
||||
@ -6068,12 +6241,13 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
|
||||
pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
|
||||
pmu->name = intel_hybrid_pmu_type_map[bit].name;
|
||||
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
|
||||
pmu->cntr_mask64 = x86_pmu.cntr_mask64;
|
||||
pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
|
||||
pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
|
||||
pmu->config_mask = X86_RAW_EVENT_MASK;
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
__EVENT_CONSTRAINT(0, pmu->cntr_mask64,
|
||||
0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
|
||||
|
||||
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
|
||||
if (pmu->pmu_type & hybrid_small) {
|
||||
@ -6143,6 +6317,21 @@ static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
|
||||
intel_pmu_ref_cycles_ext();
|
||||
}
|
||||
|
||||
static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
|
||||
{
|
||||
intel_pmu_init_glc(pmu);
|
||||
hybrid(pmu, event_constraints) = intel_lnc_event_constraints;
|
||||
hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints;
|
||||
hybrid(pmu, extra_regs) = intel_rwc_extra_regs;
|
||||
}
|
||||
|
||||
static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
|
||||
{
|
||||
intel_pmu_init_grt(pmu);
|
||||
hybrid(pmu, event_constraints) = intel_skt_event_constraints;
|
||||
hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
|
||||
}
|
||||
|
||||
__init int intel_pmu_init(void)
|
||||
{
|
||||
struct attribute **extra_skl_attr = &empty_attrs;
|
||||
@ -6186,14 +6375,14 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu = intel_pmu;
|
||||
|
||||
x86_pmu.version = version;
|
||||
x86_pmu.num_counters = eax.split.num_counters;
|
||||
x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
|
||||
x86_pmu.cntval_bits = eax.split.bit_width;
|
||||
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
|
||||
|
||||
x86_pmu.events_maskl = ebx.full;
|
||||
x86_pmu.events_mask_len = eax.split.mask_length;
|
||||
|
||||
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
|
||||
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
|
||||
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
|
||||
|
||||
/*
|
||||
@ -6203,12 +6392,10 @@ __init int intel_pmu_init(void)
|
||||
if (version > 1 && version < 5) {
|
||||
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
|
||||
|
||||
x86_pmu.num_counters_fixed =
|
||||
max((int)edx.split.num_counters_fixed, assume);
|
||||
|
||||
fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
|
||||
x86_pmu.fixed_cntr_mask64 =
|
||||
GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
|
||||
} else if (version >= 5)
|
||||
x86_pmu.num_counters_fixed = fls(fixed_mask);
|
||||
x86_pmu.fixed_cntr_mask64 = fixed_mask;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PDCM)) {
|
||||
u64 capabilities;
|
||||
@ -6423,7 +6610,7 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_ATOM_GRACEMONT:
|
||||
intel_pmu_init_grt(NULL);
|
||||
intel_pmu_pebs_data_source_grt();
|
||||
x86_pmu.pebs_latency_data = adl_latency_data_small;
|
||||
x86_pmu.pebs_latency_data = grt_latency_data;
|
||||
x86_pmu.get_event_constraints = tnt_get_event_constraints;
|
||||
td_attr = tnt_events_attrs;
|
||||
mem_attr = grt_mem_attrs;
|
||||
@ -6437,7 +6624,7 @@ __init int intel_pmu_init(void)
|
||||
intel_pmu_init_grt(NULL);
|
||||
x86_pmu.extra_regs = intel_cmt_extra_regs;
|
||||
intel_pmu_pebs_data_source_cmt();
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
x86_pmu.pebs_latency_data = cmt_latency_data;
|
||||
x86_pmu.get_event_constraints = cmt_get_event_constraints;
|
||||
td_attr = cmt_events_attrs;
|
||||
mem_attr = grt_mem_attrs;
|
||||
@ -6756,12 +6943,18 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_EMERALDRAPIDS_X:
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
x86_pmu.extra_regs = intel_glc_extra_regs;
|
||||
fallthrough;
|
||||
pr_cont("Sapphire Rapids events, ");
|
||||
name = "sapphire_rapids";
|
||||
goto glc_common;
|
||||
|
||||
case INTEL_GRANITERAPIDS_X:
|
||||
case INTEL_GRANITERAPIDS_D:
|
||||
x86_pmu.extra_regs = intel_rwc_extra_regs;
|
||||
pr_cont("Granite Rapids events, ");
|
||||
name = "granite_rapids";
|
||||
|
||||
glc_common:
|
||||
intel_pmu_init_glc(NULL);
|
||||
if (!x86_pmu.extra_regs)
|
||||
x86_pmu.extra_regs = intel_rwc_extra_regs;
|
||||
x86_pmu.pebs_ept = 1;
|
||||
x86_pmu.hw_config = hsw_hw_config;
|
||||
x86_pmu.get_event_constraints = glc_get_event_constraints;
|
||||
@ -6772,8 +6965,6 @@ __init int intel_pmu_init(void)
|
||||
td_attr = glc_td_events_attrs;
|
||||
tsx_attr = glc_tsx_events_attrs;
|
||||
intel_pmu_pebs_data_source_skl(true);
|
||||
pr_cont("Sapphire Rapids events, ");
|
||||
name = "sapphire_rapids";
|
||||
break;
|
||||
|
||||
case INTEL_ALDERLAKE:
|
||||
@ -6788,7 +6979,7 @@ __init int intel_pmu_init(void)
|
||||
*/
|
||||
intel_pmu_init_hybrid(hybrid_big_small);
|
||||
|
||||
x86_pmu.pebs_latency_data = adl_latency_data_small;
|
||||
x86_pmu.pebs_latency_data = grt_latency_data;
|
||||
x86_pmu.get_event_constraints = adl_get_event_constraints;
|
||||
x86_pmu.hw_config = adl_hw_config;
|
||||
x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
|
||||
@ -6803,11 +6994,13 @@ __init int intel_pmu_init(void)
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
intel_pmu_init_glc(&pmu->pmu);
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
|
||||
pmu->num_counters = x86_pmu.num_counters + 2;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
|
||||
pmu->cntr_mask64 <<= 2;
|
||||
pmu->cntr_mask64 |= 0x3;
|
||||
pmu->fixed_cntr_mask64 <<= 1;
|
||||
pmu->fixed_cntr_mask64 |= 0x1;
|
||||
} else {
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
pmu->cntr_mask64 = x86_pmu.cntr_mask64;
|
||||
pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6817,15 +7010,16 @@ __init int intel_pmu_init(void)
|
||||
* mistakenly add extra counters for P-cores. Correct the number of
|
||||
* counters here.
|
||||
*/
|
||||
if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) {
|
||||
pmu->cntr_mask64 = x86_pmu.cntr_mask64;
|
||||
pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
|
||||
}
|
||||
|
||||
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
|
||||
pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
__EVENT_CONSTRAINT(0, pmu->cntr_mask64,
|
||||
0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
|
||||
|
||||
pmu->extra_regs = intel_glc_extra_regs;
|
||||
|
||||
/* Initialize Atom core specific PerfMon capabilities.*/
|
||||
@ -6842,7 +7036,7 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_METEORLAKE_L:
|
||||
intel_pmu_init_hybrid(hybrid_big_small);
|
||||
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
x86_pmu.pebs_latency_data = cmt_latency_data;
|
||||
x86_pmu.get_event_constraints = mtl_get_event_constraints;
|
||||
x86_pmu.hw_config = adl_hw_config;
|
||||
|
||||
@ -6867,6 +7061,33 @@ __init int intel_pmu_init(void)
|
||||
name = "meteorlake_hybrid";
|
||||
break;
|
||||
|
||||
case INTEL_LUNARLAKE_M:
|
||||
case INTEL_ARROWLAKE:
|
||||
intel_pmu_init_hybrid(hybrid_big_small);
|
||||
|
||||
x86_pmu.pebs_latency_data = lnl_latency_data;
|
||||
x86_pmu.get_event_constraints = mtl_get_event_constraints;
|
||||
x86_pmu.hw_config = adl_hw_config;
|
||||
|
||||
td_attr = lnl_hybrid_events_attrs;
|
||||
mem_attr = mtl_hybrid_mem_attrs;
|
||||
tsx_attr = adl_hybrid_tsx_attrs;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
|
||||
/* Initialize big core specific PerfMon capabilities.*/
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
intel_pmu_init_lnc(&pmu->pmu);
|
||||
|
||||
/* Initialize Atom core specific PerfMon capabilities.*/
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
|
||||
intel_pmu_init_skt(&pmu->pmu);
|
||||
|
||||
intel_pmu_pebs_data_source_lnl();
|
||||
pr_cont("Lunarlake Hybrid events, ");
|
||||
name = "lunarlake_hybrid";
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
case 1:
|
||||
@ -6892,9 +7113,9 @@ __init int intel_pmu_init(void)
|
||||
* The constraints may be cut according to the CPUID enumeration
|
||||
* by inserting the EVENT_CONSTRAINT_END.
|
||||
*/
|
||||
if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
|
||||
x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
|
||||
intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
|
||||
if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED)
|
||||
x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
|
||||
intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1;
|
||||
x86_pmu.event_constraints = intel_v5_gen_event_constraints;
|
||||
pr_cont("generic architected perfmon, ");
|
||||
name = "generic_arch_v5+";
|
||||
@ -6921,18 +7142,17 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.attr_update = hybrid_attr_update;
|
||||
}
|
||||
|
||||
intel_pmu_check_num_counters(&x86_pmu.num_counters,
|
||||
&x86_pmu.num_counters_fixed,
|
||||
&x86_pmu.intel_ctrl,
|
||||
(u64)fixed_mask);
|
||||
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
|
||||
&x86_pmu.fixed_cntr_mask64,
|
||||
&x86_pmu.intel_ctrl);
|
||||
|
||||
/* AnyThread may be deprecated on arch perfmon v5 or later */
|
||||
if (x86_pmu.intel_cap.anythread_deprecated)
|
||||
x86_pmu.format_attrs = intel_arch_formats_attr;
|
||||
|
||||
intel_pmu_check_event_constraints(x86_pmu.event_constraints,
|
||||
x86_pmu.num_counters,
|
||||
x86_pmu.num_counters_fixed,
|
||||
x86_pmu.cntr_mask64,
|
||||
x86_pmu.fixed_cntr_mask64,
|
||||
x86_pmu.intel_ctrl);
|
||||
/*
|
||||
* Access LBR MSR may cause #GP under certain circumstances.
|
||||
@ -6973,6 +7193,14 @@ __init int intel_pmu_init(void)
|
||||
pr_cont("full-width counters, ");
|
||||
}
|
||||
|
||||
/* Support V6+ MSR Aliasing */
|
||||
if (x86_pmu.version >= 6) {
|
||||
x86_pmu.perfctr = MSR_IA32_PMC_V6_GP0_CTR;
|
||||
x86_pmu.eventsel = MSR_IA32_PMC_V6_GP0_CFG_A;
|
||||
x86_pmu.fixedctr = MSR_IA32_PMC_V6_FX0_CTR;
|
||||
x86_pmu.addr_offset = intel_pmu_v6_addr_offset;
|
||||
}
|
||||
|
||||
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
|
||||
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
|
||||
|
@ -41,7 +41,7 @@
|
||||
* MSR_CORE_C1_RES: CORE C1 Residency Counter
|
||||
* perf code: 0x00
|
||||
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
|
||||
* MTL,SRF,GRR
|
||||
* MTL,SRF,GRR,ARL,LNL
|
||||
* Scope: Core (each processor core has a MSR)
|
||||
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
|
||||
* perf code: 0x01
|
||||
@ -53,50 +53,50 @@
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
|
||||
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
|
||||
* GRR
|
||||
* GRR,ARL,LNL
|
||||
* Scope: Core
|
||||
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
|
||||
* perf code: 0x03
|
||||
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
|
||||
* ICL,TGL,RKL,ADL,RPL,MTL
|
||||
* ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL
|
||||
* Scope: Core
|
||||
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
|
||||
* perf code: 0x00
|
||||
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
|
||||
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
|
||||
* RPL,SPR,MTL
|
||||
* RPL,SPR,MTL,ARL,LNL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
|
||||
* perf code: 0x01
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
|
||||
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
|
||||
* ADL,RPL,MTL
|
||||
* ADL,RPL,MTL,ARL,LNL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
|
||||
* perf code: 0x02
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
|
||||
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF
|
||||
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
|
||||
* ARL,LNL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
|
||||
* perf code: 0x03
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
|
||||
* KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL
|
||||
* KBL,CML,ICL,TGL,RKL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
|
||||
* perf code: 0x04
|
||||
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
|
||||
* ADL,RPL,MTL
|
||||
* ADL,RPL,MTL,ARL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
|
||||
* perf code: 0x05
|
||||
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
|
||||
* ADL,RPL,MTL
|
||||
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
|
||||
* perf code: 0x06
|
||||
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
|
||||
* TNT,RKL,ADL,RPL,MTL
|
||||
* TNT,RKL,ADL,RPL,MTL,ARL,LNL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_MODULE_C6_RES_MS: Module C6 Residency Counter.
|
||||
* perf code: 0x00
|
||||
@ -637,9 +637,18 @@ static const struct cstate_model adl_cstates __initconst = {
|
||||
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C7_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C8_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C9_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C10_RES),
|
||||
};
|
||||
|
||||
static const struct cstate_model lnl_cstates __initconst = {
|
||||
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C6_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C7_RES),
|
||||
|
||||
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C10_RES),
|
||||
};
|
||||
|
||||
@ -763,6 +772,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||
X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_cstates),
|
||||
X86_MATCH_VFM(INTEL_METEORLAKE, &adl_cstates),
|
||||
X86_MATCH_VFM(INTEL_METEORLAKE_L, &adl_cstates),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE, &adl_cstates),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates),
|
||||
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates),
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
|
||||
|
@ -63,6 +63,15 @@ union intel_x86_pebs_dse {
|
||||
unsigned int mtl_fwd_blk:1;
|
||||
unsigned int ld_reserved4:24;
|
||||
};
|
||||
struct {
|
||||
unsigned int lnc_dse:8;
|
||||
unsigned int ld_reserved5:2;
|
||||
unsigned int lnc_stlb_miss:1;
|
||||
unsigned int lnc_locked:1;
|
||||
unsigned int lnc_data_blk:1;
|
||||
unsigned int lnc_addr_blk:1;
|
||||
unsigned int ld_reserved6:18;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -77,7 +86,7 @@ union intel_x86_pebs_dse {
|
||||
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
|
||||
|
||||
/* Version for Sandy Bridge and later */
|
||||
static u64 pebs_data_source[] = {
|
||||
static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
|
||||
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
|
||||
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
|
||||
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
|
||||
@ -173,6 +182,40 @@ void __init intel_pmu_pebs_data_source_cmt(void)
|
||||
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
|
||||
}
|
||||
|
||||
/* Version for Lion Cove and later */
|
||||
static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
|
||||
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */
|
||||
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
|
||||
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
|
||||
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */
|
||||
0, /* 0x04: Reserved */
|
||||
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */
|
||||
OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */
|
||||
0, /* 0x07: Reserved */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */
|
||||
0, /* 0x09: Reserved */
|
||||
0, /* 0x0a: Reserved */
|
||||
0, /* 0x0b: Reserved */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */
|
||||
0, /* 0x0e: Reserved */
|
||||
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */
|
||||
OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */
|
||||
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
|
||||
};
|
||||
|
||||
void __init intel_pmu_pebs_data_source_lnl(void)
|
||||
{
|
||||
u64 *data_source;
|
||||
|
||||
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
|
||||
memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
|
||||
|
||||
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
|
||||
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
|
||||
__intel_pmu_pebs_data_source_cmt(data_source);
|
||||
}
|
||||
|
||||
static u64 precise_store_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
@ -257,14 +300,14 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
|
||||
}
|
||||
|
||||
/* Retrieve the latency data for e-core of ADL */
|
||||
static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
|
||||
u8 dse, bool tlb, bool lock, bool blk)
|
||||
static u64 __grt_latency_data(struct perf_event *event, u64 status,
|
||||
u8 dse, bool tlb, bool lock, bool blk)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
|
||||
|
||||
dse &= PERF_PEBS_DATA_SOURCE_MASK;
|
||||
dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
|
||||
val = hybrid_var(event->pmu, pebs_data_source)[dse];
|
||||
|
||||
pebs_set_tlb_lock(&val, tlb, lock);
|
||||
@ -277,27 +320,72 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
|
||||
return val;
|
||||
}
|
||||
|
||||
u64 adl_latency_data_small(struct perf_event *event, u64 status)
|
||||
u64 grt_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
return __adl_latency_data_small(event, status, dse.ld_dse,
|
||||
dse.ld_locked, dse.ld_stlb_miss,
|
||||
dse.ld_data_blk);
|
||||
return __grt_latency_data(event, status, dse.ld_dse,
|
||||
dse.ld_locked, dse.ld_stlb_miss,
|
||||
dse.ld_data_blk);
|
||||
}
|
||||
|
||||
/* Retrieve the latency data for e-core of MTL */
|
||||
u64 mtl_latency_data_small(struct perf_event *event, u64 status)
|
||||
u64 cmt_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
return __adl_latency_data_small(event, status, dse.mtl_dse,
|
||||
dse.mtl_stlb_miss, dse.mtl_locked,
|
||||
dse.mtl_fwd_blk);
|
||||
return __grt_latency_data(event, status, dse.mtl_dse,
|
||||
dse.mtl_stlb_miss, dse.mtl_locked,
|
||||
dse.mtl_fwd_blk);
|
||||
}
|
||||
|
||||
static u64 lnc_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
union perf_mem_data_src src;
|
||||
u64 val;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
/* LNC core latency data */
|
||||
val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
|
||||
if (!val)
|
||||
val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
|
||||
|
||||
if (dse.lnc_stlb_miss)
|
||||
val |= P(TLB, MISS) | P(TLB, L2);
|
||||
else
|
||||
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
|
||||
|
||||
if (dse.lnc_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
if (dse.lnc_data_blk)
|
||||
val |= P(BLK, DATA);
|
||||
if (dse.lnc_addr_blk)
|
||||
val |= P(BLK, ADDR);
|
||||
if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
|
||||
val |= P(BLK, NA);
|
||||
|
||||
src.val = val;
|
||||
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
src.mem_op = P(OP, STORE);
|
||||
|
||||
return src.val;
|
||||
}
|
||||
|
||||
u64 lnl_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_small)
|
||||
return cmt_latency_data(event, status);
|
||||
|
||||
return lnc_latency_data(event, status);
|
||||
}
|
||||
|
||||
static u64 load_latency_data(struct perf_event *event, u64 status)
|
||||
@ -1086,6 +1174,32 @@ struct event_constraint intel_glc_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_lnc_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
|
||||
|
||||
INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff),
|
||||
INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
|
||||
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
|
||||
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
|
||||
|
||||
/*
|
||||
* Everything else is handled by PMU_FL_PEBS_ALL, because we
|
||||
* need the full constraints from the main table.
|
||||
*/
|
||||
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
|
||||
@ -1137,8 +1251,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sche
|
||||
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
|
||||
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
|
||||
int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
|
||||
u64 threshold;
|
||||
int reserved;
|
||||
|
||||
@ -1146,7 +1259,7 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
|
||||
return;
|
||||
|
||||
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
|
||||
reserved = max_pebs_events + num_counters_fixed;
|
||||
reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
|
||||
else
|
||||
reserved = max_pebs_events;
|
||||
|
||||
@ -1831,8 +1944,12 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
set_linear_ip(regs, basic->ip);
|
||||
regs->flags = PERF_EFLAGS_EXACT;
|
||||
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
|
||||
data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
||||
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
|
||||
data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
|
||||
else
|
||||
data->weight.var3_w = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The record for MEMINFO is in front of GP
|
||||
@ -2157,6 +2274,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||
void *base, *at, *top;
|
||||
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||
short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||
int max_pebs_events = intel_pmu_max_num_pebs(NULL);
|
||||
int bit, i, size;
|
||||
u64 mask;
|
||||
|
||||
@ -2168,11 +2286,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||
|
||||
ds->pebs_index = ds->pebs_buffer_base;
|
||||
|
||||
mask = (1ULL << x86_pmu.max_pebs_events) - 1;
|
||||
size = x86_pmu.max_pebs_events;
|
||||
mask = x86_pmu.pebs_events_mask;
|
||||
size = max_pebs_events;
|
||||
if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
|
||||
mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
|
||||
size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
|
||||
mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
|
||||
size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
|
||||
}
|
||||
|
||||
if (unlikely(base >= top)) {
|
||||
@ -2208,8 +2326,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||
pebs_status = p->status = cpuc->pebs_enabled;
|
||||
|
||||
bit = find_first_bit((unsigned long *)&pebs_status,
|
||||
x86_pmu.max_pebs_events);
|
||||
if (bit >= x86_pmu.max_pebs_events)
|
||||
max_pebs_events);
|
||||
|
||||
if (!(x86_pmu.pebs_events_mask & (1 << bit)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
@ -2267,12 +2386,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||
{
|
||||
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
|
||||
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
struct perf_event *event;
|
||||
void *base, *at, *top;
|
||||
int bit, size;
|
||||
int bit;
|
||||
u64 mask;
|
||||
|
||||
if (!x86_pmu.pebs_active)
|
||||
@ -2283,12 +2400,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||
|
||||
ds->pebs_index = ds->pebs_buffer_base;
|
||||
|
||||
mask = ((1ULL << max_pebs_events) - 1) |
|
||||
(((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
|
||||
size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
|
||||
mask = hybrid(cpuc->pmu, pebs_events_mask) |
|
||||
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
|
||||
|
||||
if (unlikely(base >= top)) {
|
||||
intel_pmu_pebs_event_update_no_drain(cpuc, size);
|
||||
intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2298,11 +2414,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||
pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
|
||||
pebs_status &= mask;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
|
||||
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
|
||||
counts[bit]++;
|
||||
}
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&mask, size) {
|
||||
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
|
||||
if (counts[bit] == 0)
|
||||
continue;
|
||||
|
||||
|
@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = {
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 39) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
.cntr_mask64 = 0x3,
|
||||
.cntval_bits = 40,
|
||||
.cntval_mask = (1ULL << 40) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
|
@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
|
||||
|
||||
cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
int overflow;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask)) {
|
||||
@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu = {
|
||||
* though leave it restricted at moment assuming
|
||||
* HT is on
|
||||
*/
|
||||
.num_counters = ARCH_P4_MAX_CCCR,
|
||||
.cntr_mask64 = GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
|
||||
.apic = 1,
|
||||
.cntval_bits = ARCH_P4_CNTRVAL_BITS,
|
||||
.cntval_mask = ARCH_P4_CNTRVAL_MASK,
|
||||
@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void)
|
||||
*
|
||||
* Solve this by zero'ing out the registers to mimic a reset.
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
|
||||
reg = x86_pmu_config_addr(i);
|
||||
wrmsrl_safe(reg, 0ULL);
|
||||
}
|
||||
|
@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu = {
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
.cntr_mask64 = 0x3,
|
||||
/*
|
||||
* Events have 40 bits implemented. However they are designed such
|
||||
* that bits [32-39] are sign extensions of bit 31. As such the
|
||||
|
@ -878,7 +878,7 @@ static void pt_update_head(struct pt *pt)
|
||||
*/
|
||||
static void *pt_buffer_region(struct pt_buffer *buf)
|
||||
{
|
||||
return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
|
||||
return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -990,7 +990,7 @@ pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
|
||||
* order allocations, there shouldn't be many of these.
|
||||
*/
|
||||
list_for_each_entry(topa, &buf->tables, list) {
|
||||
if (topa->offset + topa->size > pg << PAGE_SHIFT)
|
||||
if (topa->offset + topa->size > (unsigned long)pg << PAGE_SHIFT)
|
||||
goto found;
|
||||
}
|
||||
|
||||
|
@ -33,8 +33,8 @@ struct topa_entry {
|
||||
u64 rsvd2 : 1;
|
||||
u64 size : 4;
|
||||
u64 rsvd3 : 2;
|
||||
u64 base : 36;
|
||||
u64 rsvd4 : 16;
|
||||
u64 base : 40;
|
||||
u64 rsvd4 : 12;
|
||||
};
|
||||
|
||||
/* TSC to Core Crystal Clock Ratio */
|
||||
|
@ -264,6 +264,9 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
|
||||
return;
|
||||
}
|
||||
|
||||
if (intel_generic_uncore_assign_hw_event(event, box))
|
||||
return;
|
||||
|
||||
hwc->config_base = uncore_event_ctl(box, hwc->idx);
|
||||
hwc->event_base = uncore_perf_ctr(box, hwc->idx);
|
||||
}
|
||||
@ -844,7 +847,9 @@ static void uncore_pmu_disable(struct pmu *pmu)
|
||||
static ssize_t uncore_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
|
||||
struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu);
|
||||
|
||||
return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
|
||||
@ -861,7 +866,10 @@ static const struct attribute_group uncore_pmu_attr_group = {
|
||||
static inline int uncore_get_box_id(struct intel_uncore_type *type,
|
||||
struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
|
||||
if (type->boxes)
|
||||
return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx);
|
||||
|
||||
return pmu->pmu_idx;
|
||||
}
|
||||
|
||||
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
|
||||
@ -962,6 +970,9 @@ static void uncore_type_exit(struct intel_uncore_type *type)
|
||||
if (type->cleanup_mapping)
|
||||
type->cleanup_mapping(type);
|
||||
|
||||
if (type->cleanup_extra_boxes)
|
||||
type->cleanup_extra_boxes(type);
|
||||
|
||||
if (pmu) {
|
||||
for (i = 0; i < type->num_boxes; i++, pmu++) {
|
||||
uncore_pmu_unregister(pmu);
|
||||
@ -970,10 +981,7 @@ static void uncore_type_exit(struct intel_uncore_type *type)
|
||||
kfree(type->pmus);
|
||||
type->pmus = NULL;
|
||||
}
|
||||
if (type->box_ids) {
|
||||
kfree(type->box_ids);
|
||||
type->box_ids = NULL;
|
||||
}
|
||||
|
||||
kfree(type->events_group);
|
||||
type->events_group = NULL;
|
||||
}
|
||||
@ -1077,22 +1085,19 @@ static struct intel_uncore_pmu *
|
||||
uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
|
||||
{
|
||||
struct intel_uncore_type **types = uncore_pci_uncores;
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
struct intel_uncore_type *type;
|
||||
u64 box_ctl;
|
||||
int i, die;
|
||||
struct rb_node *node;
|
||||
|
||||
for (; *types; types++) {
|
||||
type = *types;
|
||||
for (die = 0; die < __uncore_max_dies; die++) {
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
if (!type->box_ctls[die])
|
||||
continue;
|
||||
box_ctl = type->box_ctls[die] + type->pci_offsets[i];
|
||||
if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
|
||||
pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
|
||||
pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
|
||||
return &type->pmus[i];
|
||||
}
|
||||
|
||||
for (node = rb_first(type->boxes); node; node = rb_next(node)) {
|
||||
unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
|
||||
if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) &&
|
||||
pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) &&
|
||||
pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr))
|
||||
return &type->pmus[unit->pmu_idx];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1368,28 +1373,25 @@ static struct notifier_block uncore_pci_notifier = {
|
||||
static void uncore_pci_pmus_register(void)
|
||||
{
|
||||
struct intel_uncore_type **types = uncore_pci_uncores;
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
struct rb_node *node;
|
||||
struct pci_dev *pdev;
|
||||
u64 box_ctl;
|
||||
int i, die;
|
||||
|
||||
for (; *types; types++) {
|
||||
type = *types;
|
||||
for (die = 0; die < __uncore_max_dies; die++) {
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
if (!type->box_ctls[die])
|
||||
continue;
|
||||
box_ctl = type->box_ctls[die] + type->pci_offsets[i];
|
||||
pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
|
||||
UNCORE_DISCOVERY_PCI_BUS(box_ctl),
|
||||
UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
|
||||
if (!pdev)
|
||||
continue;
|
||||
pmu = &type->pmus[i];
|
||||
|
||||
uncore_pci_pmu_register(pdev, type, pmu, die);
|
||||
}
|
||||
for (node = rb_first(type->boxes); node; node = rb_next(node)) {
|
||||
unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
|
||||
pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr),
|
||||
UNCORE_DISCOVERY_PCI_BUS(unit->addr),
|
||||
UNCORE_DISCOVERY_PCI_DEVFN(unit->addr));
|
||||
|
||||
if (!pdev)
|
||||
continue;
|
||||
pmu = &type->pmus[unit->pmu_idx];
|
||||
uncore_pci_pmu_register(pdev, type, pmu, unit->die);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1454,6 +1456,18 @@ static void uncore_pci_exit(void)
|
||||
}
|
||||
}
|
||||
|
||||
static bool uncore_die_has_box(struct intel_uncore_type *type,
|
||||
int die, unsigned int pmu_idx)
|
||||
{
|
||||
if (!type->boxes)
|
||||
return true;
|
||||
|
||||
if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
|
||||
int new_cpu)
|
||||
{
|
||||
@ -1469,18 +1483,25 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
|
||||
|
||||
if (old_cpu < 0) {
|
||||
WARN_ON_ONCE(box->cpu != -1);
|
||||
box->cpu = new_cpu;
|
||||
if (uncore_die_has_box(type, die, pmu->pmu_idx)) {
|
||||
box->cpu = new_cpu;
|
||||
cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(box->cpu != old_cpu);
|
||||
WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu);
|
||||
box->cpu = -1;
|
||||
cpumask_clear_cpu(old_cpu, &pmu->cpu_mask);
|
||||
if (new_cpu < 0)
|
||||
continue;
|
||||
|
||||
if (!uncore_die_has_box(type, die, pmu->pmu_idx))
|
||||
continue;
|
||||
uncore_pmu_cancel_hrtimer(box);
|
||||
perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
|
||||
box->cpu = new_cpu;
|
||||
cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1503,7 +1524,7 @@ static void uncore_box_unref(struct intel_uncore_type **types, int id)
|
||||
pmu = type->pmus;
|
||||
for (i = 0; i < type->num_boxes; i++, pmu++) {
|
||||
box = pmu->boxes[id];
|
||||
if (box && atomic_dec_return(&box->refcnt) == 0)
|
||||
if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0)
|
||||
uncore_box_exit(box);
|
||||
}
|
||||
}
|
||||
@ -1593,7 +1614,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
|
||||
pmu = type->pmus;
|
||||
for (i = 0; i < type->num_boxes; i++, pmu++) {
|
||||
box = pmu->boxes[id];
|
||||
if (box && atomic_inc_return(&box->refcnt) == 1)
|
||||
if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1)
|
||||
uncore_box_init(box);
|
||||
}
|
||||
}
|
||||
|
@ -62,7 +62,6 @@ struct intel_uncore_type {
|
||||
unsigned fixed_ctr;
|
||||
unsigned fixed_ctl;
|
||||
unsigned box_ctl;
|
||||
u64 *box_ctls; /* Unit ctrl addr of the first box of each die */
|
||||
union {
|
||||
unsigned msr_offset;
|
||||
unsigned mmio_offset;
|
||||
@ -76,7 +75,6 @@ struct intel_uncore_type {
|
||||
u64 *pci_offsets;
|
||||
u64 *mmio_offsets;
|
||||
};
|
||||
unsigned *box_ids;
|
||||
struct event_constraint unconstrainted;
|
||||
struct event_constraint *constraints;
|
||||
struct intel_uncore_pmu *pmus;
|
||||
@ -86,6 +84,7 @@ struct intel_uncore_type {
|
||||
const struct attribute_group *attr_groups[4];
|
||||
const struct attribute_group **attr_update;
|
||||
struct pmu *pmu; /* for custom pmu ops */
|
||||
struct rb_root *boxes;
|
||||
/*
|
||||
* Uncore PMU would store relevant platform topology configuration here
|
||||
* to identify which platform component each PMON block of that type is
|
||||
@ -98,6 +97,10 @@ struct intel_uncore_type {
|
||||
int (*get_topology)(struct intel_uncore_type *type);
|
||||
void (*set_mapping)(struct intel_uncore_type *type);
|
||||
void (*cleanup_mapping)(struct intel_uncore_type *type);
|
||||
/*
|
||||
* Optional callbacks for extra uncore units cleanup
|
||||
*/
|
||||
void (*cleanup_extra_boxes)(struct intel_uncore_type *type);
|
||||
};
|
||||
|
||||
#define pmu_group attr_groups[0]
|
||||
@ -125,6 +128,7 @@ struct intel_uncore_pmu {
|
||||
int func_id;
|
||||
bool registered;
|
||||
atomic_t activeboxes;
|
||||
cpumask_t cpu_mask;
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_box **boxes;
|
||||
};
|
||||
|
@ -89,9 +89,7 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
|
||||
if (!type)
|
||||
return NULL;
|
||||
|
||||
type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
|
||||
if (!type->box_ctrl_die)
|
||||
goto free_type;
|
||||
type->units = RB_ROOT;
|
||||
|
||||
type->access_type = unit->access_type;
|
||||
num_discovered_types[type->access_type]++;
|
||||
@ -100,12 +98,6 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
|
||||
rb_add(&type->node, &discovery_tables, __type_less);
|
||||
|
||||
return type;
|
||||
|
||||
free_type:
|
||||
kfree(type);
|
||||
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
static struct intel_uncore_discovery_type *
|
||||
@ -120,14 +112,118 @@ get_uncore_discovery_type(struct uncore_unit_discovery *unit)
|
||||
return add_uncore_discovery_type(unit);
|
||||
}
|
||||
|
||||
static inline int pmu_idx_cmp(const void *key, const struct rb_node *b)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
const unsigned int *id = key;
|
||||
|
||||
unit = rb_entry(b, struct intel_uncore_discovery_unit, node);
|
||||
|
||||
if (unit->pmu_idx > *id)
|
||||
return -1;
|
||||
else if (unit->pmu_idx < *id)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct intel_uncore_discovery_unit *
|
||||
intel_uncore_find_discovery_unit(struct rb_root *units, int die,
|
||||
unsigned int pmu_idx)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
struct rb_node *pos;
|
||||
|
||||
if (!units)
|
||||
return NULL;
|
||||
|
||||
pos = rb_find_first(&pmu_idx, units, pmu_idx_cmp);
|
||||
if (!pos)
|
||||
return NULL;
|
||||
unit = rb_entry(pos, struct intel_uncore_discovery_unit, node);
|
||||
|
||||
if (die < 0)
|
||||
return unit;
|
||||
|
||||
for (; pos; pos = rb_next(pos)) {
|
||||
unit = rb_entry(pos, struct intel_uncore_discovery_unit, node);
|
||||
|
||||
if (unit->pmu_idx != pmu_idx)
|
||||
break;
|
||||
|
||||
if (unit->die == die)
|
||||
return unit;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
|
||||
unsigned int pmu_idx)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
|
||||
unit = intel_uncore_find_discovery_unit(units, die, pmu_idx);
|
||||
if (unit)
|
||||
return unit->id;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline bool unit_less(struct rb_node *a, const struct rb_node *b)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *a_node, *b_node;
|
||||
|
||||
a_node = rb_entry(a, struct intel_uncore_discovery_unit, node);
|
||||
b_node = rb_entry(b, struct intel_uncore_discovery_unit, node);
|
||||
|
||||
if (a_node->pmu_idx < b_node->pmu_idx)
|
||||
return true;
|
||||
if (a_node->pmu_idx > b_node->pmu_idx)
|
||||
return false;
|
||||
|
||||
if (a_node->die < b_node->die)
|
||||
return true;
|
||||
if (a_node->die > b_node->die)
|
||||
return false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct intel_uncore_discovery_unit *
|
||||
uncore_find_unit(struct rb_root *root, unsigned int id)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
struct rb_node *node;
|
||||
|
||||
for (node = rb_first(root); node; node = rb_next(node)) {
|
||||
unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
|
||||
if (unit->id == id)
|
||||
return unit;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
|
||||
struct rb_root *root, u16 *num_units)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit = uncore_find_unit(root, node->id);
|
||||
|
||||
if (unit)
|
||||
node->pmu_idx = unit->pmu_idx;
|
||||
else if (num_units)
|
||||
node->pmu_idx = (*num_units)++;
|
||||
|
||||
rb_add(&node->node, root, unit_less);
|
||||
}
|
||||
|
||||
static void
|
||||
uncore_insert_box_info(struct uncore_unit_discovery *unit,
|
||||
int die, bool parsed)
|
||||
int die)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *node;
|
||||
struct intel_uncore_discovery_type *type;
|
||||
unsigned int *ids;
|
||||
u64 *box_offset;
|
||||
int i;
|
||||
|
||||
if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
|
||||
pr_info("Invalid address is detected for uncore type %d box %d, "
|
||||
@ -136,71 +232,29 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed) {
|
||||
type = search_uncore_discovery_type(unit->box_type);
|
||||
if (!type) {
|
||||
pr_info("A spurious uncore type %d is detected, "
|
||||
"Disable the uncore type.\n",
|
||||
unit->box_type);
|
||||
return;
|
||||
}
|
||||
/* Store the first box of each die */
|
||||
if (!type->box_ctrl_die[die])
|
||||
type->box_ctrl_die[die] = unit->ctl;
|
||||
node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
node->die = die;
|
||||
node->id = unit->box_id;
|
||||
node->addr = unit->ctl;
|
||||
|
||||
type = get_uncore_discovery_type(unit);
|
||||
if (!type) {
|
||||
kfree(node);
|
||||
return;
|
||||
}
|
||||
|
||||
type = get_uncore_discovery_type(unit);
|
||||
if (!type)
|
||||
return;
|
||||
|
||||
box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL);
|
||||
if (!box_offset)
|
||||
return;
|
||||
|
||||
ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL);
|
||||
if (!ids)
|
||||
goto free_box_offset;
|
||||
uncore_find_add_unit(node, &type->units, &type->num_units);
|
||||
|
||||
/* Store generic information for the first box */
|
||||
if (!type->num_boxes) {
|
||||
type->box_ctrl = unit->ctl;
|
||||
type->box_ctrl_die[die] = unit->ctl;
|
||||
if (type->num_units == 1) {
|
||||
type->num_counters = unit->num_regs;
|
||||
type->counter_width = unit->bit_width;
|
||||
type->ctl_offset = unit->ctl_offset;
|
||||
type->ctr_offset = unit->ctr_offset;
|
||||
*ids = unit->box_id;
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
ids[i] = type->ids[i];
|
||||
box_offset[i] = type->box_offset[i];
|
||||
|
||||
if (unit->box_id == ids[i]) {
|
||||
pr_info("Duplicate uncore type %d box ID %d is detected, "
|
||||
"Drop the duplicate uncore unit.\n",
|
||||
unit->box_type, unit->box_id);
|
||||
goto free_ids;
|
||||
}
|
||||
}
|
||||
ids[i] = unit->box_id;
|
||||
box_offset[i] = unit->ctl - type->box_ctrl;
|
||||
kfree(type->ids);
|
||||
kfree(type->box_offset);
|
||||
end:
|
||||
type->ids = ids;
|
||||
type->box_offset = box_offset;
|
||||
type->num_boxes++;
|
||||
return;
|
||||
|
||||
free_ids:
|
||||
kfree(ids);
|
||||
|
||||
free_box_offset:
|
||||
kfree(box_offset);
|
||||
|
||||
}
|
||||
|
||||
static bool
|
||||
@ -279,7 +333,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
|
||||
if (uncore_ignore_unit(&unit, ignore))
|
||||
continue;
|
||||
|
||||
uncore_insert_box_info(&unit, die, *parsed);
|
||||
uncore_insert_box_info(&unit, die);
|
||||
}
|
||||
|
||||
*parsed = true;
|
||||
@ -339,9 +393,16 @@ err:
|
||||
void intel_uncore_clear_discovery_tables(void)
|
||||
{
|
||||
struct intel_uncore_discovery_type *type, *next;
|
||||
struct intel_uncore_discovery_unit *pos;
|
||||
struct rb_node *node;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) {
|
||||
kfree(type->box_ctrl_die);
|
||||
while (!RB_EMPTY_ROOT(&type->units)) {
|
||||
node = rb_first(&type->units);
|
||||
pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
|
||||
rb_erase(node, &type->units);
|
||||
kfree(pos);
|
||||
}
|
||||
kfree(type);
|
||||
}
|
||||
}
|
||||
@ -366,19 +427,31 @@ static const struct attribute_group generic_uncore_format_group = {
|
||||
.attrs = generic_uncore_formats_attr,
|
||||
};
|
||||
|
||||
static u64 intel_generic_uncore_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
|
||||
unit = intel_uncore_find_discovery_unit(box->pmu->type->boxes,
|
||||
-1, box->pmu->pmu_idx);
|
||||
if (WARN_ON_ONCE(!unit))
|
||||
return 0;
|
||||
|
||||
return unit->addr;
|
||||
}
|
||||
|
||||
void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
|
||||
wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
|
||||
}
|
||||
|
||||
void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
|
||||
wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
|
||||
}
|
||||
|
||||
void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(uncore_msr_box_ctl(box), 0);
|
||||
wrmsrl(intel_generic_uncore_box_ctl(box), 0);
|
||||
}
|
||||
|
||||
static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box,
|
||||
@ -406,10 +479,47 @@ static struct intel_uncore_ops generic_uncore_msr_ops = {
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
|
||||
struct intel_uncore_box *box)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 box_ctl;
|
||||
|
||||
if (!box->pmu->type->boxes)
|
||||
return false;
|
||||
|
||||
if (box->io_addr) {
|
||||
hwc->config_base = uncore_pci_event_ctl(box, hwc->idx);
|
||||
hwc->event_base = uncore_pci_perf_ctr(box, hwc->idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
box_ctl = intel_generic_uncore_box_ctl(box);
|
||||
if (!box_ctl)
|
||||
return false;
|
||||
|
||||
if (box->pci_dev) {
|
||||
box_ctl = UNCORE_DISCOVERY_PCI_BOX_CTRL(box_ctl);
|
||||
hwc->config_base = box_ctl + uncore_pci_event_ctl(box, hwc->idx);
|
||||
hwc->event_base = box_ctl + uncore_pci_perf_ctr(box, hwc->idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
hwc->config_base = box_ctl + box->pmu->type->event_ctl + hwc->idx;
|
||||
hwc->event_base = box_ctl + box->pmu->type->perf_ctr + hwc->idx;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int intel_pci_uncore_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
return UNCORE_DISCOVERY_PCI_BOX_CTRL(intel_generic_uncore_box_ctl(box));
|
||||
}
|
||||
|
||||
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int box_ctl = uncore_pci_box_ctl(box);
|
||||
int box_ctl = intel_pci_uncore_box_ctl(box);
|
||||
|
||||
__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
|
||||
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT);
|
||||
@ -418,7 +528,7 @@ void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
|
||||
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int box_ctl = uncore_pci_box_ctl(box);
|
||||
int box_ctl = intel_pci_uncore_box_ctl(box);
|
||||
|
||||
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ);
|
||||
}
|
||||
@ -426,7 +536,7 @@ void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
|
||||
void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int box_ctl = uncore_pci_box_ctl(box);
|
||||
int box_ctl = intel_pci_uncore_box_ctl(box);
|
||||
|
||||
pci_write_config_dword(pdev, box_ctl, 0);
|
||||
}
|
||||
@ -473,34 +583,30 @@ static struct intel_uncore_ops generic_uncore_pci_ops = {
|
||||
|
||||
#define UNCORE_GENERIC_MMIO_SIZE 0x4000
|
||||
|
||||
static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
struct intel_uncore_type *type = box->pmu->type;
|
||||
|
||||
if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets)
|
||||
return 0;
|
||||
|
||||
return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx];
|
||||
}
|
||||
|
||||
void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
u64 box_ctl = generic_uncore_mmio_box_ctl(box);
|
||||
static struct intel_uncore_discovery_unit *unit;
|
||||
struct intel_uncore_type *type = box->pmu->type;
|
||||
resource_size_t addr;
|
||||
|
||||
if (!box_ctl) {
|
||||
pr_warn("Uncore type %d box %d: Invalid box control address.\n",
|
||||
type->type_id, type->box_ids[box->pmu->pmu_idx]);
|
||||
unit = intel_uncore_find_discovery_unit(type->boxes, box->dieid, box->pmu->pmu_idx);
|
||||
if (!unit) {
|
||||
pr_warn("Uncore type %d id %d: Cannot find box control address.\n",
|
||||
type->type_id, box->pmu->pmu_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
addr = box_ctl;
|
||||
if (!unit->addr) {
|
||||
pr_warn("Uncore type %d box %d: Invalid box control address.\n",
|
||||
type->type_id, unit->id);
|
||||
return;
|
||||
}
|
||||
|
||||
addr = unit->addr;
|
||||
box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE);
|
||||
if (!box->io_addr) {
|
||||
pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n",
|
||||
type->type_id, type->box_ids[box->pmu->pmu_idx],
|
||||
(unsigned long long)addr);
|
||||
type->type_id, unit->id, (unsigned long long)addr);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -560,34 +666,22 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
|
||||
struct intel_uncore_discovery_type *type)
|
||||
{
|
||||
uncore->type_id = type->type;
|
||||
uncore->num_boxes = type->num_boxes;
|
||||
uncore->num_counters = type->num_counters;
|
||||
uncore->perf_ctr_bits = type->counter_width;
|
||||
uncore->box_ids = type->ids;
|
||||
uncore->perf_ctr = (unsigned int)type->ctr_offset;
|
||||
uncore->event_ctl = (unsigned int)type->ctl_offset;
|
||||
uncore->boxes = &type->units;
|
||||
uncore->num_boxes = type->num_units;
|
||||
|
||||
switch (type_id) {
|
||||
case UNCORE_ACCESS_MSR:
|
||||
uncore->ops = &generic_uncore_msr_ops;
|
||||
uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset;
|
||||
uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset;
|
||||
uncore->box_ctl = (unsigned int)type->box_ctrl;
|
||||
uncore->msr_offsets = type->box_offset;
|
||||
break;
|
||||
case UNCORE_ACCESS_PCI:
|
||||
uncore->ops = &generic_uncore_pci_ops;
|
||||
uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset;
|
||||
uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset;
|
||||
uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl);
|
||||
uncore->box_ctls = type->box_ctrl_die;
|
||||
uncore->pci_offsets = type->box_offset;
|
||||
break;
|
||||
case UNCORE_ACCESS_MMIO:
|
||||
uncore->ops = &generic_uncore_mmio_ops;
|
||||
uncore->perf_ctr = (unsigned int)type->ctr_offset;
|
||||
uncore->event_ctl = (unsigned int)type->ctl_offset;
|
||||
uncore->box_ctl = (unsigned int)type->box_ctrl;
|
||||
uncore->box_ctls = type->box_ctrl_die;
|
||||
uncore->mmio_offsets = type->box_offset;
|
||||
uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE;
|
||||
break;
|
||||
default:
|
||||
|
@ -113,19 +113,24 @@ struct uncore_unit_discovery {
|
||||
};
|
||||
};
|
||||
|
||||
struct intel_uncore_discovery_unit {
|
||||
struct rb_node node;
|
||||
unsigned int pmu_idx; /* The idx of the corresponding PMU */
|
||||
unsigned int id; /* Unit ID */
|
||||
unsigned int die; /* Die ID */
|
||||
u64 addr; /* Unit Control Address */
|
||||
};
|
||||
|
||||
struct intel_uncore_discovery_type {
|
||||
struct rb_node node;
|
||||
enum uncore_access_type access_type;
|
||||
u64 box_ctrl; /* Unit ctrl addr of the first box */
|
||||
u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */
|
||||
struct rb_root units; /* Unit ctrl addr for all units */
|
||||
u16 type; /* Type ID of the uncore block */
|
||||
u8 num_counters;
|
||||
u8 counter_width;
|
||||
u8 ctl_offset; /* Counter Control 0 offset */
|
||||
u8 ctr_offset; /* Counter 0 offset */
|
||||
u16 num_boxes; /* number of boxes for the uncore block */
|
||||
unsigned int *ids; /* Box IDs */
|
||||
u64 *box_offset; /* Box offset */
|
||||
u16 num_units; /* number of units */
|
||||
};
|
||||
|
||||
bool intel_uncore_has_discovery_tables(int *ignore);
|
||||
@ -156,3 +161,10 @@ u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
|
||||
|
||||
struct intel_uncore_type **
|
||||
intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
|
||||
|
||||
int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
|
||||
unsigned int pmu_idx);
|
||||
bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
|
||||
struct intel_uncore_box *box);
|
||||
void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
|
||||
struct rb_root *root, u16 *num_units);
|
||||
|
@ -462,6 +462,7 @@
|
||||
#define SPR_UBOX_DID 0x3250
|
||||
|
||||
/* SPR CHA */
|
||||
#define SPR_CHA_EVENT_MASK_EXT 0xffffffff
|
||||
#define SPR_CHA_PMON_CTL_TID_EN (1 << 16)
|
||||
#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
|
||||
SPR_CHA_PMON_CTL_TID_EN)
|
||||
@ -478,6 +479,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext5, umask, "config:8-15,32-63");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
|
||||
@ -5933,10 +5935,11 @@ static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev
|
||||
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
|
||||
bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN);
|
||||
struct intel_uncore_type *type = box->pmu->type;
|
||||
int id = intel_uncore_find_discovery_unit_id(type->boxes, -1, box->pmu->pmu_idx);
|
||||
|
||||
if (tie_en) {
|
||||
reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 +
|
||||
HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx];
|
||||
HSWEP_CBO_MSR_OFFSET * id;
|
||||
reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID;
|
||||
reg1->idx = 0;
|
||||
}
|
||||
@ -5958,7 +5961,7 @@ static struct intel_uncore_ops spr_uncore_chabox_ops = {
|
||||
|
||||
static struct attribute *spr_uncore_cha_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask_ext4.attr,
|
||||
&format_attr_umask_ext5.attr,
|
||||
&format_attr_tid_en2.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
@ -5994,7 +5997,7 @@ ATTRIBUTE_GROUPS(uncore_alias);
|
||||
static struct intel_uncore_type spr_uncore_chabox = {
|
||||
.name = "cha",
|
||||
.event_mask = SPR_CHA_PMON_EVENT_MASK,
|
||||
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
|
||||
.event_mask_ext = SPR_CHA_EVENT_MASK_EXT,
|
||||
.num_shared_regs = 1,
|
||||
.constraints = skx_uncore_chabox_constraints,
|
||||
.ops = &spr_uncore_chabox_ops,
|
||||
@ -6162,7 +6165,55 @@ static struct intel_uncore_type spr_uncore_mdf = {
|
||||
.name = "mdf",
|
||||
};
|
||||
|
||||
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
|
||||
static void spr_uncore_mmio_offs8_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
|
||||
intel_generic_uncore_mmio_init_box(box);
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops spr_uncore_mmio_offs8_ops = {
|
||||
.init_box = spr_uncore_mmio_offs8_init_box,
|
||||
.exit_box = uncore_mmio_exit_box,
|
||||
.disable_box = intel_generic_uncore_mmio_disable_box,
|
||||
.enable_box = intel_generic_uncore_mmio_enable_box,
|
||||
.disable_event = intel_generic_uncore_mmio_disable_event,
|
||||
.enable_event = spr_uncore_mmio_enable_event,
|
||||
.read_counter = uncore_mmio_read_counter,
|
||||
};
|
||||
|
||||
#define SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT() \
|
||||
SPR_UNCORE_COMMON_FORMAT(), \
|
||||
.ops = &spr_uncore_mmio_offs8_ops
|
||||
|
||||
static struct event_constraint spr_uncore_cxlcm_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x02, 0x0f),
|
||||
UNCORE_EVENT_CONSTRAINT(0x05, 0x0f),
|
||||
UNCORE_EVENT_CONSTRAINT(0x40, 0xf0),
|
||||
UNCORE_EVENT_CONSTRAINT(0x41, 0xf0),
|
||||
UNCORE_EVENT_CONSTRAINT(0x42, 0xf0),
|
||||
UNCORE_EVENT_CONSTRAINT(0x43, 0xf0),
|
||||
UNCORE_EVENT_CONSTRAINT(0x4b, 0xf0),
|
||||
UNCORE_EVENT_CONSTRAINT(0x52, 0xf0),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_cxlcm = {
|
||||
SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
|
||||
.name = "cxlcm",
|
||||
.constraints = spr_uncore_cxlcm_constraints,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_cxldp = {
|
||||
SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
|
||||
.name = "cxldp",
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_hbm = {
|
||||
SPR_UNCORE_COMMON_FORMAT(),
|
||||
.name = "hbm",
|
||||
};
|
||||
|
||||
#define UNCORE_SPR_NUM_UNCORE_TYPES 15
|
||||
#define UNCORE_SPR_CHA 0
|
||||
#define UNCORE_SPR_IIO 1
|
||||
#define UNCORE_SPR_IMC 6
|
||||
@ -6186,6 +6237,9 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
|
||||
NULL,
|
||||
NULL,
|
||||
&spr_uncore_mdf,
|
||||
&spr_uncore_cxlcm,
|
||||
&spr_uncore_cxldp,
|
||||
&spr_uncore_hbm,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -6198,6 +6252,24 @@ static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
|
||||
0, 0x8000, 0x10000, 0x18000
|
||||
};
|
||||
|
||||
static void spr_extra_boxes_cleanup(struct intel_uncore_type *type)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *pos;
|
||||
struct rb_node *node;
|
||||
|
||||
if (!type->boxes)
|
||||
return;
|
||||
|
||||
while (!RB_EMPTY_ROOT(type->boxes)) {
|
||||
node = rb_first(type->boxes);
|
||||
pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
|
||||
rb_erase(node, type->boxes);
|
||||
kfree(pos);
|
||||
}
|
||||
kfree(type->boxes);
|
||||
type->boxes = NULL;
|
||||
}
|
||||
|
||||
static struct intel_uncore_type spr_uncore_upi = {
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
|
||||
@ -6212,10 +6284,11 @@ static struct intel_uncore_type spr_uncore_upi = {
|
||||
.num_counters = 4,
|
||||
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = ICX_UPI_PCI_PMON_CTR0,
|
||||
.event_ctl = ICX_UPI_PCI_PMON_CTL0,
|
||||
.perf_ctr = ICX_UPI_PCI_PMON_CTR0 - ICX_UPI_PCI_PMON_BOX_CTL,
|
||||
.event_ctl = ICX_UPI_PCI_PMON_CTL0 - ICX_UPI_PCI_PMON_BOX_CTL,
|
||||
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
|
||||
.pci_offsets = spr_upi_pci_offsets,
|
||||
.cleanup_extra_boxes = spr_extra_boxes_cleanup,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_m3upi = {
|
||||
@ -6225,11 +6298,12 @@ static struct intel_uncore_type spr_uncore_m3upi = {
|
||||
.num_counters = 4,
|
||||
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = ICX_M3UPI_PCI_PMON_CTR0,
|
||||
.event_ctl = ICX_M3UPI_PCI_PMON_CTL0,
|
||||
.perf_ctr = ICX_M3UPI_PCI_PMON_CTR0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
|
||||
.event_ctl = ICX_M3UPI_PCI_PMON_CTL0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
|
||||
.box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
|
||||
.pci_offsets = spr_upi_pci_offsets,
|
||||
.constraints = icx_uncore_m3upi_constraints,
|
||||
.cleanup_extra_boxes = spr_extra_boxes_cleanup,
|
||||
};
|
||||
|
||||
enum perf_uncore_spr_iio_freerunning_type_id {
|
||||
@ -6460,18 +6534,21 @@ uncore_find_type_by_id(struct intel_uncore_type **types, int type_id)
|
||||
static int uncore_type_max_boxes(struct intel_uncore_type **types,
|
||||
int type_id)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
struct intel_uncore_type *type;
|
||||
int i, max = 0;
|
||||
struct rb_node *node;
|
||||
int max = 0;
|
||||
|
||||
type = uncore_find_type_by_id(types, type_id);
|
||||
if (!type)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
if (type->box_ids[i] > max)
|
||||
max = type->box_ids[i];
|
||||
}
|
||||
for (node = rb_first(type->boxes); node; node = rb_next(node)) {
|
||||
unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
|
||||
|
||||
if (unit->id > max)
|
||||
max = unit->id;
|
||||
}
|
||||
return max + 1;
|
||||
}
|
||||
|
||||
@ -6513,10 +6590,11 @@ void spr_uncore_cpu_init(void)
|
||||
|
||||
static void spr_update_device_location(int type_id)
|
||||
{
|
||||
struct intel_uncore_discovery_unit *unit;
|
||||
struct intel_uncore_type *type;
|
||||
struct pci_dev *dev = NULL;
|
||||
struct rb_root *root;
|
||||
u32 device, devfn;
|
||||
u64 *ctls;
|
||||
int die;
|
||||
|
||||
if (type_id == UNCORE_SPR_UPI) {
|
||||
@ -6530,27 +6608,35 @@ static void spr_update_device_location(int type_id)
|
||||
} else
|
||||
return;
|
||||
|
||||
ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
|
||||
if (!ctls) {
|
||||
root = kzalloc(sizeof(struct rb_root), GFP_KERNEL);
|
||||
if (!root) {
|
||||
type->num_boxes = 0;
|
||||
return;
|
||||
}
|
||||
*root = RB_ROOT;
|
||||
|
||||
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
|
||||
if (devfn != dev->devfn)
|
||||
continue;
|
||||
|
||||
die = uncore_device_to_die(dev);
|
||||
if (die < 0)
|
||||
continue;
|
||||
|
||||
ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
|
||||
dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
|
||||
devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
|
||||
type->box_ctl;
|
||||
unit = kzalloc(sizeof(*unit), GFP_KERNEL);
|
||||
if (!unit)
|
||||
continue;
|
||||
unit->die = die;
|
||||
unit->id = PCI_SLOT(dev->devfn) - PCI_SLOT(devfn);
|
||||
unit->addr = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
|
||||
dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
|
||||
devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
|
||||
type->box_ctl;
|
||||
|
||||
unit->pmu_idx = unit->id;
|
||||
|
||||
uncore_find_add_unit(unit, root, NULL);
|
||||
}
|
||||
|
||||
type->box_ctls = ctls;
|
||||
type->boxes = root;
|
||||
}
|
||||
|
||||
int spr_uncore_pci_init(void)
|
||||
@ -6623,7 +6709,7 @@ static struct intel_uncore_type gnr_uncore_b2cmi = {
|
||||
};
|
||||
|
||||
static struct intel_uncore_type gnr_uncore_b2cxl = {
|
||||
SPR_UNCORE_MMIO_COMMON_FORMAT(),
|
||||
SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
|
||||
.name = "b2cxl",
|
||||
};
|
||||
|
||||
|
@ -476,6 +476,14 @@ struct cpu_hw_events {
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
|
||||
|
||||
#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW)
|
||||
|
||||
#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
|
||||
/* Event constraint, but match on all event flags too. */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
|
||||
@ -655,8 +663,10 @@ enum {
|
||||
x86_lbr_exclusive_max,
|
||||
};
|
||||
|
||||
#define PERF_PEBS_DATA_SOURCE_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_MAX 0x100
|
||||
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
|
||||
|
||||
enum hybrid_cpu_type {
|
||||
HYBRID_INTEL_NONE,
|
||||
@ -684,9 +694,16 @@ struct x86_hybrid_pmu {
|
||||
cpumask_t supported_cpus;
|
||||
union perf_capabilities intel_cap;
|
||||
u64 intel_ctrl;
|
||||
int max_pebs_events;
|
||||
int num_counters;
|
||||
int num_counters_fixed;
|
||||
u64 pebs_events_mask;
|
||||
u64 config_mask;
|
||||
union {
|
||||
u64 cntr_mask64;
|
||||
unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
};
|
||||
union {
|
||||
u64 fixed_cntr_mask64;
|
||||
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
};
|
||||
struct event_constraint unconstrained;
|
||||
|
||||
u64 hw_cache_event_ids
|
||||
@ -770,12 +787,20 @@ struct x86_pmu {
|
||||
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
unsigned eventsel;
|
||||
unsigned perfctr;
|
||||
unsigned fixedctr;
|
||||
int (*addr_offset)(int index, bool eventsel);
|
||||
int (*rdpmc_index)(int index);
|
||||
u64 (*event_map)(int);
|
||||
int max_events;
|
||||
int num_counters;
|
||||
int num_counters_fixed;
|
||||
u64 config_mask;
|
||||
union {
|
||||
u64 cntr_mask64;
|
||||
unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
};
|
||||
union {
|
||||
u64 fixed_cntr_mask64;
|
||||
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
};
|
||||
int cntval_bits;
|
||||
u64 cntval_mask;
|
||||
union {
|
||||
@ -852,7 +877,7 @@ struct x86_pmu {
|
||||
pebs_ept :1;
|
||||
int pebs_record_size;
|
||||
int pebs_buffer_size;
|
||||
int max_pebs_events;
|
||||
u64 pebs_events_mask;
|
||||
void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
|
||||
struct event_constraint *pebs_constraints;
|
||||
void (*pebs_aliases)(struct perf_event *event);
|
||||
@ -1120,13 +1145,19 @@ static inline unsigned int x86_pmu_event_addr(int index)
|
||||
x86_pmu.addr_offset(index, false) : index);
|
||||
}
|
||||
|
||||
static inline unsigned int x86_pmu_fixed_ctr_addr(int index)
|
||||
{
|
||||
return x86_pmu.fixedctr + (x86_pmu.addr_offset ?
|
||||
x86_pmu.addr_offset(index, false) : index);
|
||||
}
|
||||
|
||||
static inline int x86_pmu_rdpmc_index(int index)
|
||||
{
|
||||
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
|
||||
}
|
||||
|
||||
bool check_hw_exists(struct pmu *pmu, int num_counters,
|
||||
int num_counters_fixed);
|
||||
bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
|
||||
unsigned long *fixed_cntr_mask);
|
||||
|
||||
int x86_add_exclusive(unsigned int what);
|
||||
|
||||
@ -1197,8 +1228,32 @@ void x86_pmu_enable_event(struct perf_event *event);
|
||||
|
||||
int x86_pmu_handle_irq(struct pt_regs *regs);
|
||||
|
||||
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
|
||||
u64 intel_ctrl);
|
||||
void x86_pmu_show_pmu_cap(struct pmu *pmu);
|
||||
|
||||
static inline int x86_pmu_num_counters(struct pmu *pmu)
|
||||
{
|
||||
return hweight64(hybrid(pmu, cntr_mask64));
|
||||
}
|
||||
|
||||
static inline int x86_pmu_max_num_counters(struct pmu *pmu)
|
||||
{
|
||||
return fls64(hybrid(pmu, cntr_mask64));
|
||||
}
|
||||
|
||||
static inline int x86_pmu_num_counters_fixed(struct pmu *pmu)
|
||||
{
|
||||
return hweight64(hybrid(pmu, fixed_cntr_mask64));
|
||||
}
|
||||
|
||||
static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu)
|
||||
{
|
||||
return fls64(hybrid(pmu, fixed_cntr_mask64));
|
||||
}
|
||||
|
||||
static inline u64 x86_pmu_get_event_config(struct perf_event *event)
|
||||
{
|
||||
return event->attr.config & hybrid(event->pmu, config_mask);
|
||||
}
|
||||
|
||||
extern struct event_constraint emptyconstraint;
|
||||
|
||||
@ -1517,9 +1572,11 @@ void intel_pmu_disable_bts(void);
|
||||
|
||||
int intel_pmu_drain_bts_buffer(void);
|
||||
|
||||
u64 adl_latency_data_small(struct perf_event *event, u64 status);
|
||||
u64 grt_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
u64 mtl_latency_data_small(struct perf_event *event, u64 status);
|
||||
u64 cmt_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
u64 lnl_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
extern struct event_constraint intel_core2_pebs_event_constraints[];
|
||||
|
||||
@ -1551,6 +1608,8 @@ extern struct event_constraint intel_icl_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_glc_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_lnc_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_add(struct perf_event *event);
|
||||
@ -1640,6 +1699,8 @@ void intel_pmu_pebs_data_source_mtl(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_cmt(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_lnl(void);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
void intel_pt_interrupt(void);
|
||||
@ -1661,6 +1722,17 @@ static inline int is_ht_workaround_enabled(void)
|
||||
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
|
||||
}
|
||||
|
||||
static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
|
||||
{
|
||||
return MAX_PEBS_EVENTS_MASK & cntr_mask;
|
||||
}
|
||||
|
||||
static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
|
||||
{
|
||||
static_assert(MAX_PEBS_EVENTS == 32);
|
||||
return fls((u32)hybrid(pmu, pebs_events_mask));
|
||||
}
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void)
|
||||
pr_info("Version check pass!\n");
|
||||
|
||||
x86_pmu.version = version;
|
||||
x86_pmu.num_counters = eax.split.num_counters;
|
||||
x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
|
||||
x86_pmu.cntval_bits = eax.split.bit_width;
|
||||
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
|
||||
x86_pmu.events_maskl = ebx.full;
|
||||
x86_pmu.events_mask_len = eax.split.mask_length;
|
||||
|
||||
x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
|
||||
x86_pmu.fixed_cntr_mask64 = GENMASK_ULL(edx.split.num_counters_fixed - 1, 0);
|
||||
x86_add_quirk(zhaoxin_arch_events_quirk);
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
|
||||
x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
|
||||
x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
|
||||
x86_pmu.intel_ctrl |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
|
||||
|
||||
if (x86_pmu.event_constraints) {
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
||||
c->weight += x86_pmu.num_counters;
|
||||
c->idxmsk64 |= x86_pmu.cntr_mask64;
|
||||
c->weight += x86_pmu_num_counters(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS_FMT4 8
|
||||
#define MAX_PEBS_EVENTS 32
|
||||
#define MAX_PEBS_EVENTS_MASK GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
|
||||
#define MAX_FIXED_PEBS_EVENTS 16
|
||||
|
||||
/*
|
||||
|
@ -566,6 +566,12 @@
|
||||
#define MSR_RELOAD_PMC0 0x000014c1
|
||||
#define MSR_RELOAD_FIXED_CTR0 0x00001309
|
||||
|
||||
/* V6 PMON MSR range */
|
||||
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
|
||||
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
|
||||
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
|
||||
#define MSR_IA32_PMC_V6_STEP 4
|
||||
|
||||
/* KeyID partitioning between MKTME and TDX */
|
||||
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
|
||||
|
||||
|
@ -32,6 +32,8 @@
|
||||
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
|
||||
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
|
||||
#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
|
||||
#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
|
||||
#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
|
||||
|
||||
#define INTEL_FIXED_BITS_MASK 0xFULL
|
||||
#define INTEL_FIXED_BITS_STRIDE 4
|
||||
@ -185,6 +187,8 @@ union cpuid10_edx {
|
||||
* detection/enumeration details:
|
||||
*/
|
||||
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
||||
#define ARCH_PERFMON_EXT_UMASK2 0x1
|
||||
#define ARCH_PERFMON_EXT_EQ 0x2
|
||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
|
||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
||||
|
||||
@ -307,6 +311,10 @@ struct x86_pmu_capability {
|
||||
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
|
||||
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
|
||||
|
||||
/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
|
||||
/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
|
||||
/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
|
||||
|
||||
static inline bool use_fixed_pseudo_encoding(u64 code)
|
||||
{
|
||||
return !(code & 0xff);
|
||||
|
@ -781,11 +781,12 @@ struct perf_event {
|
||||
unsigned int pending_wakeup;
|
||||
unsigned int pending_kill;
|
||||
unsigned int pending_disable;
|
||||
unsigned int pending_sigtrap;
|
||||
unsigned long pending_addr; /* SIGTRAP */
|
||||
struct irq_work pending_irq;
|
||||
struct irq_work pending_disable_irq;
|
||||
struct callback_head pending_task;
|
||||
unsigned int pending_work;
|
||||
struct rcuwait pending_work_wait;
|
||||
|
||||
atomic_t event_limit;
|
||||
|
||||
@ -962,7 +963,7 @@ struct perf_event_context {
|
||||
struct rcu_head rcu_head;
|
||||
|
||||
/*
|
||||
* Sum (event->pending_sigtrap + event->pending_work)
|
||||
* Sum (event->pending_work + event->pending_work)
|
||||
*
|
||||
* The SIGTRAP is targeted at ctx->task, as such it won't do changing
|
||||
* that until the signal is delivered.
|
||||
@ -970,12 +971,6 @@ struct perf_event_context {
|
||||
local_t nr_pending;
|
||||
};
|
||||
|
||||
/*
|
||||
* Number of contexts where an event can trigger:
|
||||
* task, softirq, hardirq, nmi.
|
||||
*/
|
||||
#define PERF_NR_CONTEXTS 4
|
||||
|
||||
struct perf_cpu_pmu_context {
|
||||
struct perf_event_pmu_context epc;
|
||||
struct perf_event_pmu_context *task_epc;
|
||||
|
@ -734,6 +734,12 @@ enum perf_event_task_context {
|
||||
perf_nr_task_contexts,
|
||||
};
|
||||
|
||||
/*
|
||||
* Number of contexts where an event can trigger:
|
||||
* task, softirq, hardirq, nmi.
|
||||
*/
|
||||
#define PERF_NR_CONTEXTS 4
|
||||
|
||||
struct wake_q_node {
|
||||
struct wake_q_node *next;
|
||||
};
|
||||
@ -1256,6 +1262,7 @@ struct task_struct {
|
||||
unsigned int futex_state;
|
||||
#endif
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
u8 perf_recursion[PERF_NR_CONTEXTS];
|
||||
struct perf_event_context *perf_event_ctxp;
|
||||
struct mutex perf_event_mutex;
|
||||
struct list_head perf_event_list;
|
||||
|
@ -18,6 +18,7 @@ enum task_work_notify_mode {
|
||||
TWA_RESUME,
|
||||
TWA_SIGNAL,
|
||||
TWA_SIGNAL_NO_IPI,
|
||||
TWA_NMI_CURRENT,
|
||||
};
|
||||
|
||||
static inline bool task_work_pending(struct task_struct *task)
|
||||
@ -30,7 +31,8 @@ int task_work_add(struct task_struct *task, struct callback_head *twork,
|
||||
|
||||
struct callback_head *task_work_cancel_match(struct task_struct *task,
|
||||
bool (*match)(struct callback_head *, void *data), void *data);
|
||||
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
|
||||
struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
|
||||
bool task_work_cancel(struct task_struct *task, struct callback_head *cb);
|
||||
void task_work_run(void);
|
||||
|
||||
static inline void exit_task_work(struct task_struct *task)
|
||||
|
@ -1349,12 +1349,14 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
|
||||
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
|
||||
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
|
||||
/* 5-0x7 available */
|
||||
#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
|
||||
#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
|
||||
/* 0x7 available */
|
||||
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
|
||||
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
|
||||
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
|
||||
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
|
||||
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
|
||||
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
|
||||
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
|
||||
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
|
||||
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
|
||||
|
@ -29,7 +29,7 @@ static inline size_t perf_callchain_entry__sizeof(void)
|
||||
sysctl_perf_event_max_contexts_per_stack));
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
|
||||
static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]);
|
||||
static atomic_t nr_callchain_events;
|
||||
static DEFINE_MUTEX(callchain_mutex);
|
||||
static struct callchain_cpus_entries *callchain_cpus_entries;
|
||||
|
@ -2283,21 +2283,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
|
||||
state = PERF_EVENT_STATE_OFF;
|
||||
}
|
||||
|
||||
if (event->pending_sigtrap) {
|
||||
bool dec = true;
|
||||
|
||||
event->pending_sigtrap = 0;
|
||||
if (state != PERF_EVENT_STATE_OFF &&
|
||||
!event->pending_work) {
|
||||
event->pending_work = 1;
|
||||
dec = false;
|
||||
WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
|
||||
task_work_add(current, &event->pending_task, TWA_RESUME);
|
||||
}
|
||||
if (dec)
|
||||
local_dec(&event->ctx->nr_pending);
|
||||
}
|
||||
|
||||
perf_event_set_state(event, state);
|
||||
|
||||
if (!is_software_event(event))
|
||||
@ -2466,7 +2451,7 @@ static void __perf_event_disable(struct perf_event *event,
|
||||
* hold the top-level event's child_mutex, so any descendant that
|
||||
* goes to exit will block in perf_event_exit_event().
|
||||
*
|
||||
* When called from perf_pending_irq it's OK because event->ctx
|
||||
* When called from perf_pending_disable it's OK because event->ctx
|
||||
* is the current context on this CPU and preemption is disabled,
|
||||
* hence we can't get into perf_event_task_sched_out for this context.
|
||||
*/
|
||||
@ -2506,7 +2491,7 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
|
||||
void perf_event_disable_inatomic(struct perf_event *event)
|
||||
{
|
||||
event->pending_disable = 1;
|
||||
irq_work_queue(&event->pending_irq);
|
||||
irq_work_queue(&event->pending_disable_irq);
|
||||
}
|
||||
|
||||
#define MAX_INTERRUPTS (~0ULL)
|
||||
@ -5206,9 +5191,35 @@ static bool exclusive_event_installable(struct perf_event *event,
|
||||
static void perf_addr_filters_splice(struct perf_event *event,
|
||||
struct list_head *head);
|
||||
|
||||
static void perf_pending_task_sync(struct perf_event *event)
|
||||
{
|
||||
struct callback_head *head = &event->pending_task;
|
||||
|
||||
if (!event->pending_work)
|
||||
return;
|
||||
/*
|
||||
* If the task is queued to the current task's queue, we
|
||||
* obviously can't wait for it to complete. Simply cancel it.
|
||||
*/
|
||||
if (task_work_cancel(current, head)) {
|
||||
event->pending_work = 0;
|
||||
local_dec(&event->ctx->nr_pending);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* All accesses related to the event are within the same RCU section in
|
||||
* perf_pending_task(). The RCU grace period before the event is freed
|
||||
* will make sure all those accesses are complete by then.
|
||||
*/
|
||||
rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
static void _free_event(struct perf_event *event)
|
||||
{
|
||||
irq_work_sync(&event->pending_irq);
|
||||
irq_work_sync(&event->pending_disable_irq);
|
||||
perf_pending_task_sync(event);
|
||||
|
||||
unaccount_event(event);
|
||||
|
||||
@ -6509,6 +6520,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = vma_size / PAGE_SIZE;
|
||||
if (nr_pages > INT_MAX)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&event->mmap_mutex);
|
||||
ret = -EINVAL;
|
||||
@ -6750,7 +6763,7 @@ static void perf_sigtrap(struct perf_event *event)
|
||||
/*
|
||||
* Deliver the pending work in-event-context or follow the context.
|
||||
*/
|
||||
static void __perf_pending_irq(struct perf_event *event)
|
||||
static void __perf_pending_disable(struct perf_event *event)
|
||||
{
|
||||
int cpu = READ_ONCE(event->oncpu);
|
||||
|
||||
@ -6765,11 +6778,6 @@ static void __perf_pending_irq(struct perf_event *event)
|
||||
* Yay, we hit home and are in the context of the event.
|
||||
*/
|
||||
if (cpu == smp_processor_id()) {
|
||||
if (event->pending_sigtrap) {
|
||||
event->pending_sigtrap = 0;
|
||||
perf_sigtrap(event);
|
||||
local_dec(&event->ctx->nr_pending);
|
||||
}
|
||||
if (event->pending_disable) {
|
||||
event->pending_disable = 0;
|
||||
perf_event_disable_local(event);
|
||||
@ -6793,11 +6801,26 @@ static void __perf_pending_irq(struct perf_event *event)
|
||||
* irq_work_queue(); // FAILS
|
||||
*
|
||||
* irq_work_run()
|
||||
* perf_pending_irq()
|
||||
* perf_pending_disable()
|
||||
*
|
||||
* But the event runs on CPU-B and wants disabling there.
|
||||
*/
|
||||
irq_work_queue_on(&event->pending_irq, cpu);
|
||||
irq_work_queue_on(&event->pending_disable_irq, cpu);
|
||||
}
|
||||
|
||||
static void perf_pending_disable(struct irq_work *entry)
|
||||
{
|
||||
struct perf_event *event = container_of(entry, struct perf_event, pending_disable_irq);
|
||||
int rctx;
|
||||
|
||||
/*
|
||||
* If we 'fail' here, that's OK, it means recursion is already disabled
|
||||
* and we won't recurse 'further'.
|
||||
*/
|
||||
rctx = perf_swevent_get_recursion_context();
|
||||
__perf_pending_disable(event);
|
||||
if (rctx >= 0)
|
||||
perf_swevent_put_recursion_context(rctx);
|
||||
}
|
||||
|
||||
static void perf_pending_irq(struct irq_work *entry)
|
||||
@ -6820,8 +6843,6 @@ static void perf_pending_irq(struct irq_work *entry)
|
||||
perf_event_wakeup(event);
|
||||
}
|
||||
|
||||
__perf_pending_irq(event);
|
||||
|
||||
if (rctx >= 0)
|
||||
perf_swevent_put_recursion_context(rctx);
|
||||
}
|
||||
@ -6831,24 +6852,28 @@ static void perf_pending_task(struct callback_head *head)
|
||||
struct perf_event *event = container_of(head, struct perf_event, pending_task);
|
||||
int rctx;
|
||||
|
||||
/*
|
||||
* All accesses to the event must belong to the same implicit RCU read-side
|
||||
* critical section as the ->pending_work reset. See comment in
|
||||
* perf_pending_task_sync().
|
||||
*/
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* If we 'fail' here, that's OK, it means recursion is already disabled
|
||||
* and we won't recurse 'further'.
|
||||
*/
|
||||
preempt_disable_notrace();
|
||||
rctx = perf_swevent_get_recursion_context();
|
||||
|
||||
if (event->pending_work) {
|
||||
event->pending_work = 0;
|
||||
perf_sigtrap(event);
|
||||
local_dec(&event->ctx->nr_pending);
|
||||
rcuwait_wake_up(&event->pending_work_wait);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (rctx >= 0)
|
||||
perf_swevent_put_recursion_context(rctx);
|
||||
preempt_enable_notrace();
|
||||
|
||||
put_event(event);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GUEST_PERF_EVENTS
|
||||
@ -9706,16 +9731,26 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
*/
|
||||
bool valid_sample = sample_is_allowed(event, regs);
|
||||
unsigned int pending_id = 1;
|
||||
enum task_work_notify_mode notify_mode;
|
||||
|
||||
if (regs)
|
||||
pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
|
||||
if (!event->pending_sigtrap) {
|
||||
event->pending_sigtrap = pending_id;
|
||||
|
||||
notify_mode = in_nmi() ? TWA_NMI_CURRENT : TWA_RESUME;
|
||||
|
||||
if (!event->pending_work &&
|
||||
!task_work_add(current, &event->pending_task, notify_mode)) {
|
||||
event->pending_work = pending_id;
|
||||
local_inc(&event->ctx->nr_pending);
|
||||
|
||||
event->pending_addr = 0;
|
||||
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
|
||||
event->pending_addr = data->addr;
|
||||
|
||||
} else if (event->attr.exclude_kernel && valid_sample) {
|
||||
/*
|
||||
* Should not be able to return to user space without
|
||||
* consuming pending_sigtrap; with exceptions:
|
||||
* consuming pending_work; with exceptions:
|
||||
*
|
||||
* 1. Where !exclude_kernel, events can overflow again
|
||||
* in the kernel without returning to user space.
|
||||
@ -9725,13 +9760,8 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
* To approximate progress (with false negatives),
|
||||
* check 32-bit hash of the current IP.
|
||||
*/
|
||||
WARN_ON_ONCE(event->pending_sigtrap != pending_id);
|
||||
WARN_ON_ONCE(event->pending_work != pending_id);
|
||||
}
|
||||
|
||||
event->pending_addr = 0;
|
||||
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
|
||||
event->pending_addr = data->addr;
|
||||
irq_work_queue(&event->pending_irq);
|
||||
}
|
||||
|
||||
READ_ONCE(event->overflow_handler)(event, data, regs);
|
||||
@ -9759,11 +9789,7 @@ struct swevent_htable {
|
||||
struct swevent_hlist *swevent_hlist;
|
||||
struct mutex hlist_mutex;
|
||||
int hlist_refcount;
|
||||
|
||||
/* Recursion avoidance in each contexts */
|
||||
int recursion[PERF_NR_CONTEXTS];
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
|
||||
|
||||
/*
|
||||
@ -9961,17 +9987,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
|
||||
|
||||
int perf_swevent_get_recursion_context(void)
|
||||
{
|
||||
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
|
||||
|
||||
return get_recursion_context(swhash->recursion);
|
||||
return get_recursion_context(current->perf_recursion);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
|
||||
|
||||
void perf_swevent_put_recursion_context(int rctx)
|
||||
{
|
||||
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
|
||||
|
||||
put_recursion_context(swhash->recursion, rctx);
|
||||
put_recursion_context(current->perf_recursion, rctx);
|
||||
}
|
||||
|
||||
void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
|
||||
@ -11961,7 +11983,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
|
||||
init_waitqueue_head(&event->waitq);
|
||||
init_irq_work(&event->pending_irq, perf_pending_irq);
|
||||
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
|
||||
init_task_work(&event->pending_task, perf_pending_task);
|
||||
rcuwait_init(&event->pending_work_wait);
|
||||
|
||||
mutex_init(&event->mmap_mutex);
|
||||
raw_spin_lock_init(&event->addr_filters.lock);
|
||||
@ -13637,6 +13661,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
|
||||
child->perf_event_ctxp = NULL;
|
||||
mutex_init(&child->perf_event_mutex);
|
||||
INIT_LIST_HEAD(&child->perf_event_list);
|
||||
|
@ -128,7 +128,7 @@ static inline unsigned long perf_data_size(struct perf_buffer *rb)
|
||||
|
||||
static inline unsigned long perf_aux_size(struct perf_buffer *rb)
|
||||
{
|
||||
return rb->aux_nr_pages << PAGE_SHIFT;
|
||||
return (unsigned long)rb->aux_nr_pages << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \
|
||||
@ -208,7 +208,7 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
|
||||
|
||||
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
|
||||
|
||||
static inline int get_recursion_context(int *recursion)
|
||||
static inline int get_recursion_context(u8 *recursion)
|
||||
{
|
||||
unsigned char rctx = interrupt_context_level();
|
||||
|
||||
@ -221,7 +221,7 @@ static inline int get_recursion_context(int *recursion)
|
||||
return rctx;
|
||||
}
|
||||
|
||||
static inline void put_recursion_context(int *recursion, int rctx)
|
||||
static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
|
||||
{
|
||||
barrier();
|
||||
recursion[rctx]--;
|
||||
|
@ -682,13 +682,18 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
|
||||
if (!has_aux(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (nr_pages <= 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (!overwrite) {
|
||||
/*
|
||||
* Watermark defaults to half the buffer, and so does the
|
||||
* max_order, to aid PMU drivers in double buffering.
|
||||
*/
|
||||
if (!watermark)
|
||||
watermark = nr_pages << (PAGE_SHIFT - 1);
|
||||
watermark = min_t(unsigned long,
|
||||
U32_MAX,
|
||||
(unsigned long)nr_pages << (PAGE_SHIFT - 1));
|
||||
|
||||
/*
|
||||
* Use aux_watermark as the basis for chunking to
|
||||
|
@ -1337,7 +1337,7 @@ static int irq_thread(void *data)
|
||||
* synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the
|
||||
* oneshot mask bit can be set.
|
||||
*/
|
||||
task_work_cancel(current, irq_thread_dtor);
|
||||
task_work_cancel_func(current, irq_thread_dtor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,18 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/resume_user_mode.h>
|
||||
|
||||
static struct callback_head work_exited; /* all we need is ->next == NULL */
|
||||
|
||||
static void task_work_set_notify_irq(struct irq_work *entry)
|
||||
{
|
||||
test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
|
||||
}
|
||||
static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =
|
||||
IRQ_WORK_INIT_HARD(task_work_set_notify_irq);
|
||||
|
||||
/**
|
||||
* task_work_add - ask the @task to execute @work->func()
|
||||
* @task: the task which should run the callback
|
||||
@ -12,7 +20,7 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
|
||||
* @notify: how to notify the targeted task
|
||||
*
|
||||
* Queue @work for task_work_run() below and notify the @task if @notify
|
||||
* is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI.
|
||||
* is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT.
|
||||
*
|
||||
* @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
|
||||
* task and run the task_work, regardless of whether the task is currently
|
||||
@ -24,6 +32,8 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
|
||||
* kernel anyway.
|
||||
* @TWA_RESUME work is run only when the task exits the kernel and returns to
|
||||
* user mode, or before entering guest mode.
|
||||
* @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the
|
||||
* current @task and if the current context is NMI.
|
||||
*
|
||||
* Fails if the @task is exiting/exited and thus it can't process this @work.
|
||||
* Otherwise @work->func() will be called when the @task goes through one of
|
||||
@ -44,8 +54,13 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
||||
{
|
||||
struct callback_head *head;
|
||||
|
||||
/* record the work call stack in order to print it in KASAN reports */
|
||||
kasan_record_aux_stack(work);
|
||||
if (notify == TWA_NMI_CURRENT) {
|
||||
if (WARN_ON_ONCE(task != current))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* record the work call stack in order to print it in KASAN reports */
|
||||
kasan_record_aux_stack(work);
|
||||
}
|
||||
|
||||
head = READ_ONCE(task->task_works);
|
||||
do {
|
||||
@ -66,6 +81,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
||||
case TWA_SIGNAL_NO_IPI:
|
||||
__set_notify_signal(task);
|
||||
break;
|
||||
case TWA_NMI_CURRENT:
|
||||
irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume));
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
@ -120,9 +138,9 @@ static bool task_work_func_match(struct callback_head *cb, void *data)
|
||||
}
|
||||
|
||||
/**
|
||||
* task_work_cancel - cancel a pending work added by task_work_add()
|
||||
* @task: the task which should execute the work
|
||||
* @func: identifies the work to remove
|
||||
* task_work_cancel_func - cancel a pending work matching a function added by task_work_add()
|
||||
* @task: the task which should execute the func's work
|
||||
* @func: identifies the func to match with a work to remove
|
||||
*
|
||||
* Find the last queued pending work with ->func == @func and remove
|
||||
* it from queue.
|
||||
@ -131,11 +149,35 @@ static bool task_work_func_match(struct callback_head *cb, void *data)
|
||||
* The found work or NULL if not found.
|
||||
*/
|
||||
struct callback_head *
|
||||
task_work_cancel(struct task_struct *task, task_work_func_t func)
|
||||
task_work_cancel_func(struct task_struct *task, task_work_func_t func)
|
||||
{
|
||||
return task_work_cancel_match(task, task_work_func_match, func);
|
||||
}
|
||||
|
||||
static bool task_work_match(struct callback_head *cb, void *data)
|
||||
{
|
||||
return cb == data;
|
||||
}
|
||||
|
||||
/**
|
||||
* task_work_cancel - cancel a pending work added by task_work_add()
|
||||
* @task: the task which should execute the work
|
||||
* @cb: the callback to remove if queued
|
||||
*
|
||||
* Remove a callback from a task's queue if queued.
|
||||
*
|
||||
* RETURNS:
|
||||
* True if the callback was queued and got cancelled, false otherwise.
|
||||
*/
|
||||
bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
|
||||
{
|
||||
struct callback_head *ret;
|
||||
|
||||
ret = task_work_cancel_match(task, task_work_match, cb);
|
||||
|
||||
return ret == cb;
|
||||
}
|
||||
|
||||
/**
|
||||
* task_work_run - execute the works added by task_work_add()
|
||||
*
|
||||
@ -168,7 +210,7 @@ void task_work_run(void)
|
||||
if (!work)
|
||||
break;
|
||||
/*
|
||||
* Synchronize with task_work_cancel(). It can not remove
|
||||
* Synchronize with task_work_cancel_match(). It can not remove
|
||||
* the first entry == work, cmpxchg(task_works) must fail.
|
||||
* But it can remove another entry from the ->next list.
|
||||
*/
|
||||
|
@ -1694,7 +1694,7 @@ long keyctl_session_to_parent(void)
|
||||
goto unlock;
|
||||
|
||||
/* cancel an already pending keyring replacement */
|
||||
oldwork = task_work_cancel(parent, key_change_session_keyring);
|
||||
oldwork = task_work_cancel_func(parent, key_change_session_keyring);
|
||||
|
||||
/* the replacement session keyring is applied just prior to userspace
|
||||
* restarting */
|
||||
|
Loading…
Reference in New Issue
Block a user