b371b59431
Commit43b4578071
("perf/x86: Reduce stack usage of x86_schedule_events()") violated the rule that 'fake' scheduling; as used for event/group validation; should not change the event state. This went mostly un-noticed because repeated calls of x86_pmu::get_event_constraints() would give the same result. And x86_pmu::put_event_constraints() would mostly not do anything. Commite979121b1b
("perf/x86/intel: Implement cross-HT corruption bug workaround") made the situation much worse by actually setting the event->hw.constraint value to NULL, so when validation and actual scheduling interact we get NULL ptr derefs. Fix it by removing the constraint pointer from the event and move it back to an array, this time in cpuc instead of on the stack. validate_group() x86_schedule_events() event->hw.constraint = c; # store <context switch> perf_task_event_sched_in() ... x86_schedule_events(); event->hw.constraint = c2; # store ... put_event_constraints(event); # assume failure to schedule intel_put_event_constraints() event->hw.constraint = NULL; <context switch end> c = event->hw.constraint; # read -> NULL if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref This in particular is possible when the event in question is a cpu-wide event and group-leader, where the validate_group() tries to add an event to the group. Reported-by: Vince Weaver <vincent.weaver@maine.edu> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Hunter <ahh@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes:43b4578071
("perf/x86: Reduce stack usage of x86_schedule_events()") Fixes:e979121b1b
("perf/x86/intel: Implement cross-HT corruption bug workaround") Signed-off-by: Ingo Molnar <mingo@kernel.org>
1117 lines
28 KiB
C
1117 lines
28 KiB
C
#include <linux/bitops.h>
|
|
#include <linux/types.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/perf_event.h>
|
|
#include <asm/insn.h>
|
|
|
|
#include "perf_event.h"
|
|
|
|
/* The size of a BTS record in bytes: */
|
|
#define BTS_RECORD_SIZE 24
|
|
|
|
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
|
#define PEBS_BUFFER_SIZE PAGE_SIZE
|
|
#define PEBS_FIXUP_SIZE PAGE_SIZE
|
|
|
|
/*
|
|
* pebs_record_32 for p4 and core not supported
|
|
|
|
struct pebs_record_32 {
|
|
u32 flags, ip;
|
|
u32 ax, bc, cx, dx;
|
|
u32 si, di, bp, sp;
|
|
};
|
|
|
|
*/
|
|
|
|
union intel_x86_pebs_dse {
|
|
u64 val;
|
|
struct {
|
|
unsigned int ld_dse:4;
|
|
unsigned int ld_stlb_miss:1;
|
|
unsigned int ld_locked:1;
|
|
unsigned int ld_reserved:26;
|
|
};
|
|
struct {
|
|
unsigned int st_l1d_hit:1;
|
|
unsigned int st_reserved1:3;
|
|
unsigned int st_stlb_miss:1;
|
|
unsigned int st_locked:1;
|
|
unsigned int st_reserved2:26;
|
|
};
|
|
};
|
|
|
|
|
|
/*
|
|
* Map PEBS Load Latency Data Source encodings to generic
|
|
* memory data source information
|
|
*/
|
|
#define P(a, b) PERF_MEM_S(a, b)
|
|
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
|
|
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
|
|
|
|
static const u64 pebs_data_source[] = {
|
|
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
|
|
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
|
|
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
|
|
OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
|
|
OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
|
|
OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
|
|
OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
|
|
OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
|
|
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
|
|
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
|
|
OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
|
|
OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
|
|
OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
|
|
OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
|
|
OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
|
|
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
|
|
};
|
|
|
|
static u64 precise_store_data(u64 status)
|
|
{
|
|
union intel_x86_pebs_dse dse;
|
|
u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
|
|
|
|
dse.val = status;
|
|
|
|
/*
|
|
* bit 4: TLB access
|
|
* 1 = stored missed 2nd level TLB
|
|
*
|
|
* so it either hit the walker or the OS
|
|
* otherwise hit 2nd level TLB
|
|
*/
|
|
if (dse.st_stlb_miss)
|
|
val |= P(TLB, MISS);
|
|
else
|
|
val |= P(TLB, HIT);
|
|
|
|
/*
|
|
* bit 0: hit L1 data cache
|
|
* if not set, then all we know is that
|
|
* it missed L1D
|
|
*/
|
|
if (dse.st_l1d_hit)
|
|
val |= P(LVL, HIT);
|
|
else
|
|
val |= P(LVL, MISS);
|
|
|
|
/*
|
|
* bit 5: Locked prefix
|
|
*/
|
|
if (dse.st_locked)
|
|
val |= P(LOCK, LOCKED);
|
|
|
|
return val;
|
|
}
|
|
|
|
static u64 precise_datala_hsw(struct perf_event *event, u64 status)
|
|
{
|
|
union perf_mem_data_src dse;
|
|
|
|
dse.val = PERF_MEM_NA;
|
|
|
|
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
|
|
dse.mem_op = PERF_MEM_OP_STORE;
|
|
else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
|
|
dse.mem_op = PERF_MEM_OP_LOAD;
|
|
|
|
/*
|
|
* L1 info only valid for following events:
|
|
*
|
|
* MEM_UOPS_RETIRED.STLB_MISS_STORES
|
|
* MEM_UOPS_RETIRED.LOCK_STORES
|
|
* MEM_UOPS_RETIRED.SPLIT_STORES
|
|
* MEM_UOPS_RETIRED.ALL_STORES
|
|
*/
|
|
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
|
|
if (status & 1)
|
|
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
|
|
else
|
|
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
|
|
}
|
|
return dse.val;
|
|
}
|
|
|
|
static u64 load_latency_data(u64 status)
|
|
{
|
|
union intel_x86_pebs_dse dse;
|
|
u64 val;
|
|
int model = boot_cpu_data.x86_model;
|
|
int fam = boot_cpu_data.x86;
|
|
|
|
dse.val = status;
|
|
|
|
/*
|
|
* use the mapping table for bit 0-3
|
|
*/
|
|
val = pebs_data_source[dse.ld_dse];
|
|
|
|
/*
|
|
* Nehalem models do not support TLB, Lock infos
|
|
*/
|
|
if (fam == 0x6 && (model == 26 || model == 30
|
|
|| model == 31 || model == 46)) {
|
|
val |= P(TLB, NA) | P(LOCK, NA);
|
|
return val;
|
|
}
|
|
/*
|
|
* bit 4: TLB access
|
|
* 0 = did not miss 2nd level TLB
|
|
* 1 = missed 2nd level TLB
|
|
*/
|
|
if (dse.ld_stlb_miss)
|
|
val |= P(TLB, MISS) | P(TLB, L2);
|
|
else
|
|
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
|
|
|
|
/*
|
|
* bit 5: locked prefix
|
|
*/
|
|
if (dse.ld_locked)
|
|
val |= P(LOCK, LOCKED);
|
|
|
|
return val;
|
|
}
|
|
|
|
struct pebs_record_core {
|
|
u64 flags, ip;
|
|
u64 ax, bx, cx, dx;
|
|
u64 si, di, bp, sp;
|
|
u64 r8, r9, r10, r11;
|
|
u64 r12, r13, r14, r15;
|
|
};
|
|
|
|
struct pebs_record_nhm {
|
|
u64 flags, ip;
|
|
u64 ax, bx, cx, dx;
|
|
u64 si, di, bp, sp;
|
|
u64 r8, r9, r10, r11;
|
|
u64 r12, r13, r14, r15;
|
|
u64 status, dla, dse, lat;
|
|
};
|
|
|
|
/*
|
|
* Same as pebs_record_nhm, with two additional fields.
|
|
*/
|
|
struct pebs_record_hsw {
|
|
u64 flags, ip;
|
|
u64 ax, bx, cx, dx;
|
|
u64 si, di, bp, sp;
|
|
u64 r8, r9, r10, r11;
|
|
u64 r12, r13, r14, r15;
|
|
u64 status, dla, dse, lat;
|
|
u64 real_ip, tsx_tuning;
|
|
};
|
|
|
|
union hsw_tsx_tuning {
|
|
struct {
|
|
u32 cycles_last_block : 32,
|
|
hle_abort : 1,
|
|
rtm_abort : 1,
|
|
instruction_abort : 1,
|
|
non_instruction_abort : 1,
|
|
retry : 1,
|
|
data_conflict : 1,
|
|
capacity_writes : 1,
|
|
capacity_reads : 1;
|
|
};
|
|
u64 value;
|
|
};
|
|
|
|
#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
|
|
|
|
void init_debug_store_on_cpu(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds)
|
|
return;
|
|
|
|
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
|
|
(u32)((u64)(unsigned long)ds),
|
|
(u32)((u64)(unsigned long)ds >> 32));
|
|
}
|
|
|
|
void fini_debug_store_on_cpu(int cpu)
|
|
{
|
|
if (!per_cpu(cpu_hw_events, cpu).ds)
|
|
return;
|
|
|
|
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
|
|
}
|
|
|
|
static DEFINE_PER_CPU(void *, insn_buffer);
|
|
|
|
static int alloc_pebs_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
int node = cpu_to_node(cpu);
|
|
int max, thresh = 1; /* always use a single PEBS record */
|
|
void *buffer, *ibuffer;
|
|
|
|
if (!x86_pmu.pebs)
|
|
return 0;
|
|
|
|
buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
|
|
if (unlikely(!buffer))
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* HSW+ already provides us the eventing ip; no need to allocate this
|
|
* buffer then.
|
|
*/
|
|
if (x86_pmu.intel_cap.pebs_format < 2) {
|
|
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
|
|
if (!ibuffer) {
|
|
kfree(buffer);
|
|
return -ENOMEM;
|
|
}
|
|
per_cpu(insn_buffer, cpu) = ibuffer;
|
|
}
|
|
|
|
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
|
|
|
|
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
|
|
ds->pebs_index = ds->pebs_buffer_base;
|
|
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
|
|
max * x86_pmu.pebs_record_size;
|
|
|
|
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
|
|
thresh * x86_pmu.pebs_record_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void release_pebs_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds || !x86_pmu.pebs)
|
|
return;
|
|
|
|
kfree(per_cpu(insn_buffer, cpu));
|
|
per_cpu(insn_buffer, cpu) = NULL;
|
|
|
|
kfree((void *)(unsigned long)ds->pebs_buffer_base);
|
|
ds->pebs_buffer_base = 0;
|
|
}
|
|
|
|
static int alloc_bts_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
int node = cpu_to_node(cpu);
|
|
int max, thresh;
|
|
void *buffer;
|
|
|
|
if (!x86_pmu.bts)
|
|
return 0;
|
|
|
|
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
|
|
if (unlikely(!buffer)) {
|
|
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
|
|
thresh = max / 16;
|
|
|
|
ds->bts_buffer_base = (u64)(unsigned long)buffer;
|
|
ds->bts_index = ds->bts_buffer_base;
|
|
ds->bts_absolute_maximum = ds->bts_buffer_base +
|
|
max * BTS_RECORD_SIZE;
|
|
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
|
|
thresh * BTS_RECORD_SIZE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void release_bts_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds || !x86_pmu.bts)
|
|
return;
|
|
|
|
kfree((void *)(unsigned long)ds->bts_buffer_base);
|
|
ds->bts_buffer_base = 0;
|
|
}
|
|
|
|
static int alloc_ds_buffer(int cpu)
|
|
{
|
|
int node = cpu_to_node(cpu);
|
|
struct debug_store *ds;
|
|
|
|
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
|
|
if (unlikely(!ds))
|
|
return -ENOMEM;
|
|
|
|
per_cpu(cpu_hw_events, cpu).ds = ds;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void release_ds_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds)
|
|
return;
|
|
|
|
per_cpu(cpu_hw_events, cpu).ds = NULL;
|
|
kfree(ds);
|
|
}
|
|
|
|
void release_ds_buffers(void)
|
|
{
|
|
int cpu;
|
|
|
|
if (!x86_pmu.bts && !x86_pmu.pebs)
|
|
return;
|
|
|
|
get_online_cpus();
|
|
for_each_online_cpu(cpu)
|
|
fini_debug_store_on_cpu(cpu);
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
release_pebs_buffer(cpu);
|
|
release_bts_buffer(cpu);
|
|
release_ds_buffer(cpu);
|
|
}
|
|
put_online_cpus();
|
|
}
|
|
|
|
void reserve_ds_buffers(void)
|
|
{
|
|
int bts_err = 0, pebs_err = 0;
|
|
int cpu;
|
|
|
|
x86_pmu.bts_active = 0;
|
|
x86_pmu.pebs_active = 0;
|
|
|
|
if (!x86_pmu.bts && !x86_pmu.pebs)
|
|
return;
|
|
|
|
if (!x86_pmu.bts)
|
|
bts_err = 1;
|
|
|
|
if (!x86_pmu.pebs)
|
|
pebs_err = 1;
|
|
|
|
get_online_cpus();
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
if (alloc_ds_buffer(cpu)) {
|
|
bts_err = 1;
|
|
pebs_err = 1;
|
|
}
|
|
|
|
if (!bts_err && alloc_bts_buffer(cpu))
|
|
bts_err = 1;
|
|
|
|
if (!pebs_err && alloc_pebs_buffer(cpu))
|
|
pebs_err = 1;
|
|
|
|
if (bts_err && pebs_err)
|
|
break;
|
|
}
|
|
|
|
if (bts_err) {
|
|
for_each_possible_cpu(cpu)
|
|
release_bts_buffer(cpu);
|
|
}
|
|
|
|
if (pebs_err) {
|
|
for_each_possible_cpu(cpu)
|
|
release_pebs_buffer(cpu);
|
|
}
|
|
|
|
if (bts_err && pebs_err) {
|
|
for_each_possible_cpu(cpu)
|
|
release_ds_buffer(cpu);
|
|
} else {
|
|
if (x86_pmu.bts && !bts_err)
|
|
x86_pmu.bts_active = 1;
|
|
|
|
if (x86_pmu.pebs && !pebs_err)
|
|
x86_pmu.pebs_active = 1;
|
|
|
|
for_each_online_cpu(cpu)
|
|
init_debug_store_on_cpu(cpu);
|
|
}
|
|
|
|
put_online_cpus();
|
|
}
|
|
|
|
/*
|
|
* BTS
|
|
*/
|
|
|
|
struct event_constraint bts_constraint =
|
|
EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
|
|
|
|
void intel_pmu_enable_bts(u64 config)
|
|
{
|
|
unsigned long debugctlmsr;
|
|
|
|
debugctlmsr = get_debugctlmsr();
|
|
|
|
debugctlmsr |= DEBUGCTLMSR_TR;
|
|
debugctlmsr |= DEBUGCTLMSR_BTS;
|
|
if (config & ARCH_PERFMON_EVENTSEL_INT)
|
|
debugctlmsr |= DEBUGCTLMSR_BTINT;
|
|
|
|
if (!(config & ARCH_PERFMON_EVENTSEL_OS))
|
|
debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
|
|
|
|
if (!(config & ARCH_PERFMON_EVENTSEL_USR))
|
|
debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
|
|
|
|
update_debugctlmsr(debugctlmsr);
|
|
}
|
|
|
|
void intel_pmu_disable_bts(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
unsigned long debugctlmsr;
|
|
|
|
if (!cpuc->ds)
|
|
return;
|
|
|
|
debugctlmsr = get_debugctlmsr();
|
|
|
|
debugctlmsr &=
|
|
~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
|
|
DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
|
|
|
|
update_debugctlmsr(debugctlmsr);
|
|
}
|
|
|
|
int intel_pmu_drain_bts_buffer(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct debug_store *ds = cpuc->ds;
|
|
struct bts_record {
|
|
u64 from;
|
|
u64 to;
|
|
u64 flags;
|
|
};
|
|
struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
|
|
struct bts_record *at, *top;
|
|
struct perf_output_handle handle;
|
|
struct perf_event_header header;
|
|
struct perf_sample_data data;
|
|
struct pt_regs regs;
|
|
|
|
if (!event)
|
|
return 0;
|
|
|
|
if (!x86_pmu.bts_active)
|
|
return 0;
|
|
|
|
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
|
|
top = (struct bts_record *)(unsigned long)ds->bts_index;
|
|
|
|
if (top <= at)
|
|
return 0;
|
|
|
|
memset(®s, 0, sizeof(regs));
|
|
|
|
ds->bts_index = ds->bts_buffer_base;
|
|
|
|
perf_sample_data_init(&data, 0, event->hw.last_period);
|
|
|
|
/*
|
|
* Prepare a generic sample, i.e. fill in the invariant fields.
|
|
* We will overwrite the from and to address before we output
|
|
* the sample.
|
|
*/
|
|
perf_prepare_sample(&header, &data, event, ®s);
|
|
|
|
if (perf_output_begin(&handle, event, header.size * (top - at)))
|
|
return 1;
|
|
|
|
for (; at < top; at++) {
|
|
data.ip = at->from;
|
|
data.addr = at->to;
|
|
|
|
perf_output_sample(&handle, &header, &data, event);
|
|
}
|
|
|
|
perf_output_end(&handle);
|
|
|
|
/* There's new data available. */
|
|
event->hw.interrupts++;
|
|
event->pending_kill = POLL_IN;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* PEBS
|
|
*/
|
|
struct event_constraint intel_core2_pebs_event_constraints[] = {
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
|
|
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_atom_pebs_event_constraints[] = {
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
|
|
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_slm_pebs_event_constraints[] = {
|
|
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
|
|
/* Allow all events as PEBS with no flags */
|
|
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
|
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
|
|
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
|
|
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_westmere_pebs_event_constraints[] = {
|
|
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
|
|
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_snb_pebs_event_constraints[] = {
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
|
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
|
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
|
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
|
|
/* Allow all events as PEBS with no flags */
|
|
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
|
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
|
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
|
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
|
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
|
|
/* Allow all events as PEBS with no flags */
|
|
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint intel_hsw_pebs_event_constraints[] = {
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
|
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
|
|
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
|
|
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
|
|
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
|
|
/* Allow all events as PEBS with no flags */
|
|
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
|
{
|
|
struct event_constraint *c;
|
|
|
|
if (!event->attr.precise_ip)
|
|
return NULL;
|
|
|
|
if (x86_pmu.pebs_constraints) {
|
|
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
|
|
if ((event->hw.config & c->cmask) == c->code) {
|
|
event->hw.flags |= c->flags;
|
|
return c;
|
|
}
|
|
}
|
|
}
|
|
|
|
return &emptyconstraint;
|
|
}
|
|
|
|
void intel_pmu_pebs_enable(struct perf_event *event)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
|
|
|
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
|
|
|
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
|
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
|
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
|
cpuc->pebs_enabled |= 1ULL << 63;
|
|
}
|
|
|
|
void intel_pmu_pebs_disable(struct perf_event *event)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
|
|
|
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
|
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
|
|
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
|
cpuc->pebs_enabled &= ~(1ULL << 63);
|
|
|
|
if (cpuc->enabled)
|
|
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
|
|
|
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
|
}
|
|
|
|
void intel_pmu_pebs_enable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
|
|
if (cpuc->pebs_enabled)
|
|
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
|
}
|
|
|
|
void intel_pmu_pebs_disable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
|
|
if (cpuc->pebs_enabled)
|
|
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
|
|
}
|
|
|
|
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
unsigned long from = cpuc->lbr_entries[0].from;
|
|
unsigned long old_to, to = cpuc->lbr_entries[0].to;
|
|
unsigned long ip = regs->ip;
|
|
int is_64bit = 0;
|
|
void *kaddr;
|
|
int size;
|
|
|
|
/*
|
|
* We don't need to fixup if the PEBS assist is fault like
|
|
*/
|
|
if (!x86_pmu.intel_cap.pebs_trap)
|
|
return 1;
|
|
|
|
/*
|
|
* No LBR entry, no basic block, no rewinding
|
|
*/
|
|
if (!cpuc->lbr_stack.nr || !from || !to)
|
|
return 0;
|
|
|
|
/*
|
|
* Basic blocks should never cross user/kernel boundaries
|
|
*/
|
|
if (kernel_ip(ip) != kernel_ip(to))
|
|
return 0;
|
|
|
|
/*
|
|
* unsigned math, either ip is before the start (impossible) or
|
|
* the basic block is larger than 1 page (sanity)
|
|
*/
|
|
if ((ip - to) > PEBS_FIXUP_SIZE)
|
|
return 0;
|
|
|
|
/*
|
|
* We sampled a branch insn, rewind using the LBR stack
|
|
*/
|
|
if (ip == to) {
|
|
set_linear_ip(regs, from);
|
|
return 1;
|
|
}
|
|
|
|
size = ip - to;
|
|
if (!kernel_ip(ip)) {
|
|
int bytes;
|
|
u8 *buf = this_cpu_read(insn_buffer);
|
|
|
|
/* 'size' must fit our buffer, see above */
|
|
bytes = copy_from_user_nmi(buf, (void __user *)to, size);
|
|
if (bytes != 0)
|
|
return 0;
|
|
|
|
kaddr = buf;
|
|
} else {
|
|
kaddr = (void *)to;
|
|
}
|
|
|
|
do {
|
|
struct insn insn;
|
|
|
|
old_to = to;
|
|
|
|
#ifdef CONFIG_X86_64
|
|
is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
|
|
#endif
|
|
insn_init(&insn, kaddr, size, is_64bit);
|
|
insn_get_length(&insn);
|
|
/*
|
|
* Make sure there was not a problem decoding the
|
|
* instruction and getting the length. This is
|
|
* doubly important because we have an infinite
|
|
* loop if insn.length=0.
|
|
*/
|
|
if (!insn.length)
|
|
break;
|
|
|
|
to += insn.length;
|
|
kaddr += insn.length;
|
|
size -= insn.length;
|
|
} while (to < ip);
|
|
|
|
if (to == ip) {
|
|
set_linear_ip(regs, old_to);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Even though we decoded the basic block, the instruction stream
|
|
* never matched the given IP, either the TO or the IP got corrupted.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
|
|
{
|
|
if (pebs->tsx_tuning) {
|
|
union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
|
|
return tsx.cycles_last_block;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
|
|
{
|
|
u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
|
|
|
|
/* For RTM XABORTs also log the abort code from AX */
|
|
if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
|
|
txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
|
|
return txn;
|
|
}
|
|
|
|
static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
struct pt_regs *iregs, void *__pebs)
|
|
{
|
|
#define PERF_X86_EVENT_PEBS_HSW_PREC \
|
|
(PERF_X86_EVENT_PEBS_ST_HSW | \
|
|
PERF_X86_EVENT_PEBS_LD_HSW | \
|
|
PERF_X86_EVENT_PEBS_NA_HSW)
|
|
/*
|
|
* We cast to the biggest pebs_record but are careful not to
|
|
* unconditionally access the 'extra' entries.
|
|
*/
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct pebs_record_hsw *pebs = __pebs;
|
|
struct perf_sample_data data;
|
|
struct pt_regs regs;
|
|
u64 sample_type;
|
|
int fll, fst, dsrc;
|
|
int fl = event->hw.flags;
|
|
|
|
if (!intel_pmu_save_and_restart(event))
|
|
return;
|
|
|
|
sample_type = event->attr.sample_type;
|
|
dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
|
|
|
|
fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
|
|
fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
|
|
|
|
perf_sample_data_init(&data, 0, event->hw.last_period);
|
|
|
|
data.period = event->hw.last_period;
|
|
|
|
/*
|
|
* Use latency for weight (only avail with PEBS-LL)
|
|
*/
|
|
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
|
|
data.weight = pebs->lat;
|
|
|
|
/*
|
|
* data.data_src encodes the data source
|
|
*/
|
|
if (dsrc) {
|
|
u64 val = PERF_MEM_NA;
|
|
if (fll)
|
|
val = load_latency_data(pebs->dse);
|
|
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
|
|
val = precise_datala_hsw(event, pebs->dse);
|
|
else if (fst)
|
|
val = precise_store_data(pebs->dse);
|
|
data.data_src.val = val;
|
|
}
|
|
|
|
/*
|
|
* We use the interrupt regs as a base because the PEBS record
|
|
* does not contain a full regs set, specifically it seems to
|
|
* lack segment descriptors, which get used by things like
|
|
* user_mode().
|
|
*
|
|
* In the simple case fix up only the IP and BP,SP regs, for
|
|
* PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
|
|
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
|
|
*/
|
|
regs = *iregs;
|
|
regs.flags = pebs->flags;
|
|
set_linear_ip(®s, pebs->ip);
|
|
regs.bp = pebs->bp;
|
|
regs.sp = pebs->sp;
|
|
|
|
if (sample_type & PERF_SAMPLE_REGS_INTR) {
|
|
regs.ax = pebs->ax;
|
|
regs.bx = pebs->bx;
|
|
regs.cx = pebs->cx;
|
|
regs.dx = pebs->dx;
|
|
regs.si = pebs->si;
|
|
regs.di = pebs->di;
|
|
regs.bp = pebs->bp;
|
|
regs.sp = pebs->sp;
|
|
|
|
regs.flags = pebs->flags;
|
|
#ifndef CONFIG_X86_32
|
|
regs.r8 = pebs->r8;
|
|
regs.r9 = pebs->r9;
|
|
regs.r10 = pebs->r10;
|
|
regs.r11 = pebs->r11;
|
|
regs.r12 = pebs->r12;
|
|
regs.r13 = pebs->r13;
|
|
regs.r14 = pebs->r14;
|
|
regs.r15 = pebs->r15;
|
|
#endif
|
|
}
|
|
|
|
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
|
|
regs.ip = pebs->real_ip;
|
|
regs.flags |= PERF_EFLAGS_EXACT;
|
|
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))
|
|
regs.flags |= PERF_EFLAGS_EXACT;
|
|
else
|
|
regs.flags &= ~PERF_EFLAGS_EXACT;
|
|
|
|
if ((sample_type & PERF_SAMPLE_ADDR) &&
|
|
x86_pmu.intel_cap.pebs_format >= 1)
|
|
data.addr = pebs->dla;
|
|
|
|
if (x86_pmu.intel_cap.pebs_format >= 2) {
|
|
/* Only set the TSX weight when no memory weight. */
|
|
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
|
|
data.weight = intel_hsw_weight(pebs);
|
|
|
|
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
|
data.txn = intel_hsw_transaction(pebs);
|
|
}
|
|
|
|
if (has_branch_stack(event))
|
|
data.br_stack = &cpuc->lbr_stack;
|
|
|
|
if (perf_event_overflow(event, &data, ®s))
|
|
x86_pmu_stop(event, 0);
|
|
}
|
|
|
|
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct debug_store *ds = cpuc->ds;
|
|
struct perf_event *event = cpuc->events[0]; /* PMC0 only */
|
|
struct pebs_record_core *at, *top;
|
|
int n;
|
|
|
|
if (!x86_pmu.pebs_active)
|
|
return;
|
|
|
|
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
|
|
top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
|
|
|
|
/*
|
|
* Whatever else happens, drain the thing
|
|
*/
|
|
ds->pebs_index = ds->pebs_buffer_base;
|
|
|
|
if (!test_bit(0, cpuc->active_mask))
|
|
return;
|
|
|
|
WARN_ON_ONCE(!event);
|
|
|
|
if (!event->attr.precise_ip)
|
|
return;
|
|
|
|
n = top - at;
|
|
if (n <= 0)
|
|
return;
|
|
|
|
/*
|
|
* Should not happen, we program the threshold at 1 and do not
|
|
* set a reset value.
|
|
*/
|
|
WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
|
|
at += n - 1;
|
|
|
|
__intel_pmu_pebs_event(event, iregs, at);
|
|
}
|
|
|
|
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct debug_store *ds = cpuc->ds;
|
|
struct perf_event *event = NULL;
|
|
void *at, *top;
|
|
u64 status = 0;
|
|
int bit;
|
|
|
|
if (!x86_pmu.pebs_active)
|
|
return;
|
|
|
|
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
|
|
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
|
|
|
|
ds->pebs_index = ds->pebs_buffer_base;
|
|
|
|
if (unlikely(at > top))
|
|
return;
|
|
|
|
/*
|
|
* Should not happen, we program the threshold at 1 and do not
|
|
* set a reset value.
|
|
*/
|
|
WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
|
|
"Unexpected number of pebs records %ld\n",
|
|
(long)(top - at) / x86_pmu.pebs_record_size);
|
|
|
|
for (; at < top; at += x86_pmu.pebs_record_size) {
|
|
struct pebs_record_nhm *p = at;
|
|
|
|
for_each_set_bit(bit, (unsigned long *)&p->status,
|
|
x86_pmu.max_pebs_events) {
|
|
event = cpuc->events[bit];
|
|
if (!test_bit(bit, cpuc->active_mask))
|
|
continue;
|
|
|
|
WARN_ON_ONCE(!event);
|
|
|
|
if (!event->attr.precise_ip)
|
|
continue;
|
|
|
|
if (__test_and_set_bit(bit, (unsigned long *)&status))
|
|
continue;
|
|
|
|
break;
|
|
}
|
|
|
|
if (!event || bit >= x86_pmu.max_pebs_events)
|
|
continue;
|
|
|
|
__intel_pmu_pebs_event(event, iregs, at);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* BTS, PEBS probe and setup
|
|
*/
|
|
|
|
void __init intel_ds_init(void)
|
|
{
|
|
/*
|
|
* No support for 32bit formats
|
|
*/
|
|
if (!boot_cpu_has(X86_FEATURE_DTES64))
|
|
return;
|
|
|
|
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
|
|
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
|
|
if (x86_pmu.pebs) {
|
|
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
|
|
int format = x86_pmu.intel_cap.pebs_format;
|
|
|
|
switch (format) {
|
|
case 0:
|
|
printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
|
|
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
|
|
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
|
|
break;
|
|
|
|
case 1:
|
|
printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
|
|
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
|
|
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
|
|
break;
|
|
|
|
case 2:
|
|
pr_cont("PEBS fmt2%c, ", pebs_type);
|
|
x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
|
|
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
|
|
break;
|
|
|
|
default:
|
|
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
|
|
x86_pmu.pebs = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void perf_restore_debug_store(void)
|
|
{
|
|
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
|
|
|
if (!x86_pmu.bts && !x86_pmu.pebs)
|
|
return;
|
|
|
|
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
|
|
}
|