perf/x86/intel: Support Haswell/v4 LBR format
Haswell has two additional LBR from flags for TSX: in_tx and abort_tx, implemented as a new "v4" version of the LBR format. Handle those in and adjust the sign extension code to still correctly extend. The flags are exported similarly in the LBR record to the existing misprediction flag Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Andi Kleen <ak@linux.jf.intel.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Link: http://lkml.kernel.org/r/1371515812-9646-6-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
72db559646
commit
135c5612c4
@ -12,6 +12,16 @@ enum {
|
|||||||
LBR_FORMAT_LIP = 0x01,
|
LBR_FORMAT_LIP = 0x01,
|
||||||
LBR_FORMAT_EIP = 0x02,
|
LBR_FORMAT_EIP = 0x02,
|
||||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||||
|
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||||
|
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
|
||||||
|
};
|
||||||
|
|
||||||
|
static enum {
|
||||||
|
LBR_EIP_FLAGS = 1,
|
||||||
|
LBR_TSX = 2,
|
||||||
|
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
|
||||||
|
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
|
||||||
|
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -56,6 +66,8 @@ enum {
|
|||||||
LBR_FAR)
|
LBR_FAR)
|
||||||
|
|
||||||
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
||||||
|
#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
|
||||||
|
#define LBR_FROM_FLAG_ABORT (1ULL << 61)
|
||||||
|
|
||||||
#define for_each_branch_sample_type(x) \
|
#define for_each_branch_sample_type(x) \
|
||||||
for ((x) = PERF_SAMPLE_BRANCH_USER; \
|
for ((x) = PERF_SAMPLE_BRANCH_USER; \
|
||||||
@ -81,9 +93,13 @@ enum {
|
|||||||
X86_BR_JMP = 1 << 9, /* jump */
|
X86_BR_JMP = 1 << 9, /* jump */
|
||||||
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
||||||
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
||||||
|
X86_BR_ABORT = 1 << 12,/* transaction abort */
|
||||||
|
X86_BR_IN_TX = 1 << 13,/* in transaction */
|
||||||
|
X86_BR_NO_TX = 1 << 14,/* not in transaction */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||||
|
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
|
||||||
|
|
||||||
#define X86_BR_ANY \
|
#define X86_BR_ANY \
|
||||||
(X86_BR_CALL |\
|
(X86_BR_CALL |\
|
||||||
@ -95,6 +111,7 @@ enum {
|
|||||||
X86_BR_JCC |\
|
X86_BR_JCC |\
|
||||||
X86_BR_JMP |\
|
X86_BR_JMP |\
|
||||||
X86_BR_IRQ |\
|
X86_BR_IRQ |\
|
||||||
|
X86_BR_ABORT |\
|
||||||
X86_BR_IND_CALL)
|
X86_BR_IND_CALL)
|
||||||
|
|
||||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||||
@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
|||||||
|
|
||||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||||
unsigned long lbr_idx = (tos - i) & mask;
|
unsigned long lbr_idx = (tos - i) & mask;
|
||||||
u64 from, to, mis = 0, pred = 0;
|
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
|
||||||
|
int skip = 0;
|
||||||
|
int lbr_flags = lbr_desc[lbr_format];
|
||||||
|
|
||||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
||||||
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
||||||
|
|
||||||
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
|
if (lbr_flags & LBR_EIP_FLAGS) {
|
||||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||||
pred = !mis;
|
pred = !mis;
|
||||||
from = (u64)((((s64)from) << 1) >> 1);
|
skip = 1;
|
||||||
}
|
}
|
||||||
|
if (lbr_flags & LBR_TSX) {
|
||||||
|
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
|
||||||
|
abort = !!(from & LBR_FROM_FLAG_ABORT);
|
||||||
|
skip = 3;
|
||||||
|
}
|
||||||
|
from = (u64)((((s64)from) << skip) >> skip);
|
||||||
|
|
||||||
cpuc->lbr_entries[i].from = from;
|
cpuc->lbr_entries[i].from = from;
|
||||||
cpuc->lbr_entries[i].to = to;
|
cpuc->lbr_entries[i].to = to;
|
||||||
cpuc->lbr_entries[i].mispred = mis;
|
cpuc->lbr_entries[i].mispred = mis;
|
||||||
cpuc->lbr_entries[i].predicted = pred;
|
cpuc->lbr_entries[i].predicted = pred;
|
||||||
|
cpuc->lbr_entries[i].in_tx = in_tx;
|
||||||
|
cpuc->lbr_entries[i].abort = abort;
|
||||||
cpuc->lbr_entries[i].reserved = 0;
|
cpuc->lbr_entries[i].reserved = 0;
|
||||||
}
|
}
|
||||||
cpuc->lbr_stack.nr = i;
|
cpuc->lbr_stack.nr = i;
|
||||||
@ -334,6 +361,16 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
|||||||
|
|
||||||
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
||||||
mask |= X86_BR_IND_CALL;
|
mask |= X86_BR_IND_CALL;
|
||||||
|
|
||||||
|
if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
|
||||||
|
mask |= X86_BR_ABORT;
|
||||||
|
|
||||||
|
if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
|
||||||
|
mask |= X86_BR_IN_TX;
|
||||||
|
|
||||||
|
if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
|
||||||
|
mask |= X86_BR_NO_TX;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* stash actual user request into reg, it may
|
* stash actual user request into reg, it may
|
||||||
* be used by fixup code for some CPU
|
* be used by fixup code for some CPU
|
||||||
@ -408,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
|||||||
* decoded (e.g., text page not present), then X86_BR_NONE is
|
* decoded (e.g., text page not present), then X86_BR_NONE is
|
||||||
* returned.
|
* returned.
|
||||||
*/
|
*/
|
||||||
static int branch_type(unsigned long from, unsigned long to)
|
static int branch_type(unsigned long from, unsigned long to, int abort)
|
||||||
{
|
{
|
||||||
struct insn insn;
|
struct insn insn;
|
||||||
void *addr;
|
void *addr;
|
||||||
@ -428,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
|
|||||||
if (from == 0 || to == 0)
|
if (from == 0 || to == 0)
|
||||||
return X86_BR_NONE;
|
return X86_BR_NONE;
|
||||||
|
|
||||||
|
if (abort)
|
||||||
|
return X86_BR_ABORT | to_plm;
|
||||||
|
|
||||||
if (from_plm == X86_BR_USER) {
|
if (from_plm == X86_BR_USER) {
|
||||||
/*
|
/*
|
||||||
* can happen if measuring at the user level only
|
* can happen if measuring at the user level only
|
||||||
@ -574,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
|
|||||||
from = cpuc->lbr_entries[i].from;
|
from = cpuc->lbr_entries[i].from;
|
||||||
to = cpuc->lbr_entries[i].to;
|
to = cpuc->lbr_entries[i].to;
|
||||||
|
|
||||||
type = branch_type(from, to);
|
type = branch_type(from, to, cpuc->lbr_entries[i].abort);
|
||||||
|
if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
|
||||||
|
if (cpuc->lbr_entries[i].in_tx)
|
||||||
|
type |= X86_BR_IN_TX;
|
||||||
|
else
|
||||||
|
type |= X86_BR_NO_TX;
|
||||||
|
}
|
||||||
|
|
||||||
/* if type does not correspond, then discard */
|
/* if type does not correspond, then discard */
|
||||||
if (type == X86_BR_NONE || (br_sel & type) != type) {
|
if (type == X86_BR_NONE || (br_sel & type) != type) {
|
||||||
|
@ -73,13 +73,18 @@ struct perf_raw_record {
|
|||||||
*
|
*
|
||||||
* support for mispred, predicted is optional. In case it
|
* support for mispred, predicted is optional. In case it
|
||||||
* is not supported mispred = predicted = 0.
|
* is not supported mispred = predicted = 0.
|
||||||
|
*
|
||||||
|
* in_tx: running in a hardware transaction
|
||||||
|
* abort: aborting a hardware transaction
|
||||||
*/
|
*/
|
||||||
struct perf_branch_entry {
|
struct perf_branch_entry {
|
||||||
__u64 from;
|
__u64 from;
|
||||||
__u64 to;
|
__u64 to;
|
||||||
__u64 mispred:1, /* target mispredicted */
|
__u64 mispred:1, /* target mispredicted */
|
||||||
predicted:1,/* target predicted */
|
predicted:1,/* target predicted */
|
||||||
reserved:62;
|
in_tx:1, /* in transaction */
|
||||||
|
abort:1, /* transaction abort */
|
||||||
|
reserved:60;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -157,8 +157,11 @@ enum perf_branch_sample_type {
|
|||||||
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
|
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
|
||||||
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
|
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
|
||||||
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
|
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
|
||||||
|
PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
|
||||||
|
PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */
|
||||||
|
PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */
|
||||||
|
|
||||||
PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
|
PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define PERF_SAMPLE_BRANCH_PLM_ALL \
|
#define PERF_SAMPLE_BRANCH_PLM_ALL \
|
||||||
|
Loading…
Reference in New Issue
Block a user