forked from Minki/linux
bce38cd53e
This patch adds the ability to sample taken branches to the perf_event interface. The ability to capture taken branches is very useful for all sorts of analysis. For instance, basic block profiling, call counts, statistical call graph. This new capability requires hardware assist and as such may not be available on all HW platforms. On Intel x86 it is implemented on top of the Last Branch Record (LBR) facility. To enable taken branches sampling, the PERF_SAMPLE_BRANCH_STACK bit must be set in attr->sample_type. Sampled taken branches may be filtered by type and/or priv levels. The patch adds a new field, called branch_sample_type, to the perf_event_attr structure. It contains a bitmask of filters to apply to the sampled taken branches. Filters may be implemented in HW. If the HW filter does not exist or is not good enough, some arch may also implement a SW filter. The following generic filters are currently defined: - PERF_SAMPLE_USER only branches whose targets are at the user level - PERF_SAMPLE_KERNEL only branches whose targets are at the kernel level - PERF_SAMPLE_HV only branches whose targets are at the hypervisor level - PERF_SAMPLE_ANY any type of branches (subject to priv levels filters) - PERF_SAMPLE_ANY_CALL any call branches (may incl. syscall on some arch) - PERF_SAMPLE_ANY_RET any return branches (may incl. syscall returns on some arch) - PERF_SAMPLE_IND_CALL indirect call branches Obviously filter may be combined. The priv level bits are optional. If not provided, the priv level of the associated event are used. It is possible to collect branches at a priv level different from the associated event. Use of kernel, hv priv levels is subject to permissions and availability (hv). The number of taken branch records present in each sample may vary based on HW, the type of sampled branches, the executed code. Therefore each sample contains the number of taken branches it contains. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
226 lines
4.6 KiB
C
226 lines
4.6 KiB
C
#include <linux/perf_event.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/perf_event.h>
|
|
#include <asm/msr.h>
|
|
|
|
#include "perf_event.h"
|
|
|
|
enum {
|
|
LBR_FORMAT_32 = 0x00,
|
|
LBR_FORMAT_LIP = 0x01,
|
|
LBR_FORMAT_EIP = 0x02,
|
|
LBR_FORMAT_EIP_FLAGS = 0x03,
|
|
};
|
|
|
|
/*
|
|
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
|
|
* otherwise it becomes near impossible to get a reliable stack.
|
|
*/
|
|
|
|
static void __intel_pmu_lbr_enable(void)
|
|
{
|
|
u64 debugctl;
|
|
|
|
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
|
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
|
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
|
}
|
|
|
|
static void __intel_pmu_lbr_disable(void)
|
|
{
|
|
u64 debugctl;
|
|
|
|
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
|
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
|
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
|
}
|
|
|
|
static void intel_pmu_lbr_reset_32(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < x86_pmu.lbr_nr; i++)
|
|
wrmsrl(x86_pmu.lbr_from + i, 0);
|
|
}
|
|
|
|
static void intel_pmu_lbr_reset_64(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
|
wrmsrl(x86_pmu.lbr_from + i, 0);
|
|
wrmsrl(x86_pmu.lbr_to + i, 0);
|
|
}
|
|
}
|
|
|
|
void intel_pmu_lbr_reset(void)
|
|
{
|
|
if (!x86_pmu.lbr_nr)
|
|
return;
|
|
|
|
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
|
|
intel_pmu_lbr_reset_32();
|
|
else
|
|
intel_pmu_lbr_reset_64();
|
|
}
|
|
|
|
void intel_pmu_lbr_enable(struct perf_event *event)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (!x86_pmu.lbr_nr)
|
|
return;
|
|
|
|
/*
|
|
* Reset the LBR stack if we changed task context to
|
|
* avoid data leaks.
|
|
*/
|
|
|
|
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
|
|
intel_pmu_lbr_reset();
|
|
cpuc->lbr_context = event->ctx;
|
|
}
|
|
|
|
cpuc->lbr_users++;
|
|
}
|
|
|
|
void intel_pmu_lbr_disable(struct perf_event *event)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (!x86_pmu.lbr_nr)
|
|
return;
|
|
|
|
cpuc->lbr_users--;
|
|
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
|
|
|
if (cpuc->enabled && !cpuc->lbr_users)
|
|
__intel_pmu_lbr_disable();
|
|
}
|
|
|
|
void intel_pmu_lbr_enable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (cpuc->lbr_users)
|
|
__intel_pmu_lbr_enable();
|
|
}
|
|
|
|
void intel_pmu_lbr_disable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (cpuc->lbr_users)
|
|
__intel_pmu_lbr_disable();
|
|
}
|
|
|
|
static inline u64 intel_pmu_lbr_tos(void)
|
|
{
|
|
u64 tos;
|
|
|
|
rdmsrl(x86_pmu.lbr_tos, tos);
|
|
|
|
return tos;
|
|
}
|
|
|
|
static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
|
|
{
|
|
unsigned long mask = x86_pmu.lbr_nr - 1;
|
|
u64 tos = intel_pmu_lbr_tos();
|
|
int i;
|
|
|
|
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
|
unsigned long lbr_idx = (tos - i) & mask;
|
|
union {
|
|
struct {
|
|
u32 from;
|
|
u32 to;
|
|
};
|
|
u64 lbr;
|
|
} msr_lastbranch;
|
|
|
|
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
|
|
|
|
cpuc->lbr_entries[i].from = msr_lastbranch.from;
|
|
cpuc->lbr_entries[i].to = msr_lastbranch.to;
|
|
cpuc->lbr_entries[i].mispred = 0;
|
|
cpuc->lbr_entries[i].predicted = 0;
|
|
cpuc->lbr_entries[i].reserved = 0;
|
|
}
|
|
cpuc->lbr_stack.nr = i;
|
|
}
|
|
|
|
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
|
|
|
/*
|
|
* Due to lack of segmentation in Linux the effective address (offset)
|
|
* is the same as the linear address, allowing us to merge the LIP and EIP
|
|
* LBR formats.
|
|
*/
|
|
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
|
{
|
|
unsigned long mask = x86_pmu.lbr_nr - 1;
|
|
int lbr_format = x86_pmu.intel_cap.lbr_format;
|
|
u64 tos = intel_pmu_lbr_tos();
|
|
int i;
|
|
|
|
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
|
unsigned long lbr_idx = (tos - i) & mask;
|
|
u64 from, to, mis = 0, pred = 0;
|
|
|
|
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
|
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
|
|
|
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
|
|
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
|
pred = !mis;
|
|
from = (u64)((((s64)from) << 1) >> 1);
|
|
}
|
|
|
|
cpuc->lbr_entries[i].from = from;
|
|
cpuc->lbr_entries[i].to = to;
|
|
cpuc->lbr_entries[i].mispred = mis;
|
|
cpuc->lbr_entries[i].predicted = pred;
|
|
cpuc->lbr_entries[i].reserved = 0;
|
|
}
|
|
cpuc->lbr_stack.nr = i;
|
|
}
|
|
|
|
void intel_pmu_lbr_read(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (!cpuc->lbr_users)
|
|
return;
|
|
|
|
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
|
|
intel_pmu_lbr_read_32(cpuc);
|
|
else
|
|
intel_pmu_lbr_read_64(cpuc);
|
|
}
|
|
|
|
void intel_pmu_lbr_init_core(void)
|
|
{
|
|
x86_pmu.lbr_nr = 4;
|
|
x86_pmu.lbr_tos = 0x01c9;
|
|
x86_pmu.lbr_from = 0x40;
|
|
x86_pmu.lbr_to = 0x60;
|
|
}
|
|
|
|
void intel_pmu_lbr_init_nhm(void)
|
|
{
|
|
x86_pmu.lbr_nr = 16;
|
|
x86_pmu.lbr_tos = 0x01c9;
|
|
x86_pmu.lbr_from = 0x680;
|
|
x86_pmu.lbr_to = 0x6c0;
|
|
}
|
|
|
|
void intel_pmu_lbr_init_atom(void)
|
|
{
|
|
x86_pmu.lbr_nr = 8;
|
|
x86_pmu.lbr_tos = 0x01c9;
|
|
x86_pmu.lbr_from = 0x40;
|
|
x86_pmu.lbr_to = 0x60;
|
|
}
|