diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index fdd106267fd2..de1a924a4914 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event) return -EINVAL; } + /* sample_regs_user never support XMM registers */ + if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + return -EINVAL; + /* + * Besides the general purpose registers, XMM registers may + * be collected in PEBS on some platforms, e.g. Icelake + */ + if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { + if (x86_pmu.pebs_no_xmm_regs) + return -EINVAL; + + if (!event->attr.precise_ip) + return -EINVAL; + } + return x86_setup_perfctr(event); } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 10c99ce1fead..f57e6cb7fd99 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1628,8 +1628,10 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) + if (x86_pmu.version <= 4) { x86_pmu.pebs_no_isolation = 1; + x86_pmu.pebs_no_xmm_regs = 1; + } if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; int format = x86_pmu.intel_cap.pebs_format; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9e474a5f3b86..7abfadb4f202 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -115,6 +115,24 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) +#define PEBS_XMM_REGS \ + ((1ULL << PERF_REG_X86_XMM0) | \ + (1ULL << PERF_REG_X86_XMM1) | \ + (1ULL << PERF_REG_X86_XMM2) | \ + (1ULL << PERF_REG_X86_XMM3) | \ + (1ULL << PERF_REG_X86_XMM4) | \ + (1ULL << PERF_REG_X86_XMM5) | \ + (1ULL << PERF_REG_X86_XMM6) | \ + (1ULL << PERF_REG_X86_XMM7) | \ + (1ULL << PERF_REG_X86_XMM8) | \ + (1ULL << PERF_REG_X86_XMM9) | \ + (1ULL << PERF_REG_X86_XMM10) | \ + (1ULL << PERF_REG_X86_XMM11) | \ + (1ULL << PERF_REG_X86_XMM12) | \ + (1ULL << PERF_REG_X86_XMM13) | \ + (1ULL << PERF_REG_X86_XMM14) | \ + (1ULL << PERF_REG_X86_XMM15)) + /* * Per register state. */ @@ -612,7 +630,8 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1; + pebs_no_isolation :1, + pebs_no_xmm_regs :1; int pebs_record_size; int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8bdf74902293..d9f5bbe44b3c 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void); #define PERF_EFLAGS_VM (1UL << 5) struct pt_regs; +struct x86_perf_regs { + struct pt_regs regs; + u64 *xmm_regs; +}; + extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index f3329cabce5c..ac67bbea10ca 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -27,8 +27,29 @@ enum perf_event_x86_regs { PERF_REG_X86_R13, PERF_REG_X86_R14, PERF_REG_X86_R15, - + /* These are the limits for the GPRs. */ PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, + + /* These all need two bits set because they are 128bit */ + PERF_REG_X86_XMM0 = 32, + PERF_REG_X86_XMM1 = 34, + PERF_REG_X86_XMM2 = 36, + PERF_REG_X86_XMM3 = 38, + PERF_REG_X86_XMM4 = 40, + PERF_REG_X86_XMM5 = 42, + PERF_REG_X86_XMM6 = 44, + PERF_REG_X86_XMM7 = 46, + PERF_REG_X86_XMM8 = 48, + PERF_REG_X86_XMM9 = 50, + PERF_REG_X86_XMM10 = 52, + PERF_REG_X86_XMM11 = 54, + PERF_REG_X86_XMM12 = 56, + PERF_REG_X86_XMM13 = 58, + PERF_REG_X86_XMM14 = 60, + PERF_REG_X86_XMM15 = 62, + + /* These include both GPRs and XMMX registers */ + PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index c06c4c16c6b6..07c30ee17425 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { u64 perf_reg_value(struct pt_regs *regs, int idx) { + struct x86_perf_regs *perf_regs; + + if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) { + perf_regs = container_of(regs, struct x86_perf_regs, regs); + if (!perf_regs->xmm_regs) + return 0; + return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0]; + } + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) return 0; return regs_get_register(regs, pt_regs_offset[idx]); } -#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) - #ifdef CONFIG_X86_32 +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) + int perf_reg_validate(u64 mask) { - if (!mask || mask & REG_RESERVED) + if (!mask || (mask & REG_NOSUPPORT)) return -EINVAL; return 0; @@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || mask & REG_RESERVED) - return -EINVAL; - - if (mask & REG_NOSUPPORT) + if (!mask || (mask & REG_NOSUPPORT)) return -EINVAL; return 0;