forked from Minki/linux
perf intel-pt: Support generating branch stack
Add support for generating branch stack context for PT samples. The decoder reports a configurable number of branches as branch context for each sample. Internally it keeps track of them by using a simple sliding window. We also flush the last branch buffer on each sample to avoid overlapping intervals. This is useful for: - Reporting accurate basic block edge frequencies through the perf report branch view - Using with --branch-history to get the wider context of samples - Other users of LBRs Also the Documentation is updated. Examples: Record with Intel PT: perf record -e intel_pt//u ls Branch stacks are used by default if synthesized so: perf report --itrace=ile is the same as: perf report --itrace=ile -b Branch history can be requested also: perf report --itrace=igle --branch-history Based-on-patch-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1443186956-18718-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
385e33063f
commit
f14445ee72
@ -671,6 +671,7 @@ The letters are:
|
||||
e synthesize tracing error events
|
||||
d create a debug log
|
||||
g synthesize a call chain (use with i or x)
|
||||
l synthesize last branch entries (use with i or x)
|
||||
|
||||
"Instructions" events look like they were recorded by "perf record -e
|
||||
instructions".
|
||||
@ -718,6 +719,15 @@ transactions events can be specified. e.g.
|
||||
--itrace=ig32
|
||||
--itrace=xg32
|
||||
|
||||
Also the number of last branch entries (default 64, max. 1024) for instructions or
|
||||
transactions events can be specified. e.g.
|
||||
|
||||
--itrace=il10
|
||||
--itrace=xl10
|
||||
|
||||
Note that last branch entries are cleared for each sample, so there is no overlap
|
||||
from one sample to the next.
|
||||
|
||||
To disable trace decoding entirely, use the option --no-itrace.
|
||||
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "../perf.h"
|
||||
#include "session.h"
|
||||
#include "machine.h"
|
||||
#include "sort.h"
|
||||
#include "tool.h"
|
||||
#include "event.h"
|
||||
#include "evlist.h"
|
||||
@ -115,6 +116,9 @@ struct intel_pt_queue {
|
||||
void *decoder;
|
||||
const struct intel_pt_state *state;
|
||||
struct ip_callchain *chain;
|
||||
struct branch_stack *last_branch;
|
||||
struct branch_stack *last_branch_rb;
|
||||
size_t last_branch_pos;
|
||||
union perf_event *event_buf;
|
||||
bool on_heap;
|
||||
bool stop;
|
||||
@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.last_branch) {
|
||||
size_t sz = sizeof(struct branch_stack);
|
||||
|
||||
sz += pt->synth_opts.last_branch_sz *
|
||||
sizeof(struct branch_entry);
|
||||
ptq->last_branch = zalloc(sz);
|
||||
if (!ptq->last_branch)
|
||||
goto out_free;
|
||||
ptq->last_branch_rb = zalloc(sz);
|
||||
if (!ptq->last_branch_rb)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
|
||||
if (!ptq->event_buf)
|
||||
goto out_free;
|
||||
@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
|
||||
|
||||
out_free:
|
||||
zfree(&ptq->event_buf);
|
||||
zfree(&ptq->last_branch);
|
||||
zfree(&ptq->last_branch_rb);
|
||||
zfree(&ptq->chain);
|
||||
free(ptq);
|
||||
return NULL;
|
||||
@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv)
|
||||
thread__zput(ptq->thread);
|
||||
intel_pt_decoder_free(ptq->decoder);
|
||||
zfree(&ptq->event_buf);
|
||||
zfree(&ptq->last_branch);
|
||||
zfree(&ptq->last_branch_rb);
|
||||
zfree(&ptq->chain);
|
||||
free(ptq);
|
||||
}
|
||||
@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
|
||||
{
|
||||
struct branch_stack *bs_src = ptq->last_branch_rb;
|
||||
struct branch_stack *bs_dst = ptq->last_branch;
|
||||
size_t nr = 0;
|
||||
|
||||
bs_dst->nr = bs_src->nr;
|
||||
|
||||
if (!bs_src->nr)
|
||||
return;
|
||||
|
||||
nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
|
||||
memcpy(&bs_dst->entries[0],
|
||||
&bs_src->entries[ptq->last_branch_pos],
|
||||
sizeof(struct branch_entry) * nr);
|
||||
|
||||
if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
|
||||
memcpy(&bs_dst->entries[nr],
|
||||
&bs_src->entries[0],
|
||||
sizeof(struct branch_entry) * ptq->last_branch_pos);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
|
||||
{
|
||||
ptq->last_branch_pos = 0;
|
||||
ptq->last_branch_rb->nr = 0;
|
||||
}
|
||||
|
||||
static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
|
||||
{
|
||||
const struct intel_pt_state *state = ptq->state;
|
||||
struct branch_stack *bs = ptq->last_branch_rb;
|
||||
struct branch_entry *be;
|
||||
|
||||
if (!ptq->last_branch_pos)
|
||||
ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
|
||||
|
||||
ptq->last_branch_pos -= 1;
|
||||
|
||||
be = &bs->entries[ptq->last_branch_pos];
|
||||
be->from = state->from_ip;
|
||||
be->to = state->to_ip;
|
||||
be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
|
||||
be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
|
||||
/* No support for mispredict */
|
||||
|
||||
if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
|
||||
bs->nr += 1;
|
||||
}
|
||||
|
||||
static int intel_pt_inject_event(union perf_event *event,
|
||||
struct perf_sample *sample, u64 type,
|
||||
bool swapped)
|
||||
@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
|
||||
struct intel_pt *pt = ptq->pt;
|
||||
union perf_event *event = ptq->event_buf;
|
||||
struct perf_sample sample = { .ip = 0, };
|
||||
struct dummy_branch_stack {
|
||||
u64 nr;
|
||||
struct branch_entry entries;
|
||||
} dummy_bs;
|
||||
|
||||
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
|
||||
return 0;
|
||||
@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
|
||||
sample.flags = ptq->flags;
|
||||
sample.insn_len = ptq->insn_len;
|
||||
|
||||
/*
|
||||
* perf report cannot handle events without a branch stack when using
|
||||
* SORT_MODE__BRANCH so make a dummy one.
|
||||
*/
|
||||
if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
|
||||
dummy_bs = (struct dummy_branch_stack){
|
||||
.nr = 1,
|
||||
.entries = {
|
||||
.from = sample.ip,
|
||||
.to = sample.addr,
|
||||
},
|
||||
};
|
||||
sample.branch_stack = (struct branch_stack *)&dummy_bs;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.inject) {
|
||||
ret = intel_pt_inject_event(event, &sample,
|
||||
pt->branches_sample_type,
|
||||
@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
|
||||
sample.callchain = ptq->chain;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.last_branch) {
|
||||
intel_pt_copy_last_branch_rb(ptq);
|
||||
sample.branch_stack = ptq->last_branch;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.inject) {
|
||||
ret = intel_pt_inject_event(event, &sample,
|
||||
pt->instructions_sample_type,
|
||||
@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
|
||||
pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
|
||||
ret);
|
||||
|
||||
if (pt->synth_opts.last_branch)
|
||||
intel_pt_reset_last_branch_rb(ptq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
|
||||
sample.callchain = ptq->chain;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.last_branch) {
|
||||
intel_pt_copy_last_branch_rb(ptq);
|
||||
sample.branch_stack = ptq->last_branch;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.inject) {
|
||||
ret = intel_pt_inject_event(event, &sample,
|
||||
pt->transactions_sample_type,
|
||||
@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
|
||||
pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
|
||||
ret);
|
||||
|
||||
if (pt->synth_opts.callchain)
|
||||
intel_pt_reset_last_branch_rb(ptq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (pt->synth_opts.last_branch)
|
||||
intel_pt_update_last_branch_rb(ptq);
|
||||
|
||||
if (!pt->sync_switch)
|
||||
return 0;
|
||||
|
||||
@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
|
||||
pt->instructions_sample_period = attr.sample_period;
|
||||
if (pt->synth_opts.callchain)
|
||||
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
|
||||
if (pt->synth_opts.last_branch)
|
||||
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
||||
pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
|
||||
id, (u64)attr.sample_type);
|
||||
err = intel_pt_synth_event(session, &attr, id);
|
||||
@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
|
||||
attr.sample_period = 1;
|
||||
if (pt->synth_opts.callchain)
|
||||
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
|
||||
if (pt->synth_opts.last_branch)
|
||||
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
||||
pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
|
||||
id, (u64)attr.sample_type);
|
||||
err = intel_pt_synth_event(session, &attr, id);
|
||||
@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
|
||||
attr.sample_period = 1;
|
||||
attr.sample_type |= PERF_SAMPLE_ADDR;
|
||||
attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
|
||||
attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
|
||||
pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
|
||||
id, (u64)attr.sample_type);
|
||||
err = intel_pt_synth_event(session, &attr, id);
|
||||
|
Loading…
Reference in New Issue
Block a user