perf/core improvements and fixes:
New features: - Add 'brstackinsn' field in 'perf script' to reuse the x86 instruction decoder used in the Intel PT code to study hot paths to samples (Andi Kleen) Kernel: - Default UPROBES_EVENTS to Y (Alexei Starovoitov) - Fix check for kretprobe offset within function entry (Naveen N. Rao) Infrastructure: - Introduce util func is_sdt_event() (Ravi Bangoria) - Make perf_event__synthesize_mmap_events() scale on older kernels where reading /proc/pid/maps is way slower than reading /proc/pid/task/pid/maps (Stephane Eranian) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJYyrdSAAoJENZQFvNTUqpAe+4P/3c4ilBSOxLCCxGO7jDYo9oq /KqlvsCIg7+vo5eqrOUJAb4qXFnvpYxwjMMkL5rx7gdsBCRfRXIINGWUMrq5mNyk MgxuqYnp+yRuxLYml2wn+tdwLzcHWSN2EO9mqQ14N4I+HvgdLmVPQ44ACQXs6KfL dk/Ix8YtnFWl2sDZjvyr7ZBqwCPzzklZgHM6erxNUr/WJspzUiixAWqUmewodOUl P3PitlHXkITOK3AxSqOjJ4g1k933215nGih7hr0XdjEm4pIYaYksShQ6k9DASCrv dn2o1pF1LTu7KCtAo70aaSB7GXydwoA//o2gRbDkSwJJ25DIImZxJXQz9PAYDOo1 vXSIhmlQ72c4/Yv/XzVOrIoMMMpmWKS3lGZxMVGR/Ie9Gw4kbotkaoEqEpNQsaDZ iIaU5v/EcvvToT7T7VHrGg0+vmHgYxm5gSlyASi2IrO2/wJAs0v2pYfuL6gYhXGp mhv/pHUv4l9OW+Ubm+zJEEcg337c2RQU5wT/bk4PihxY6nQyEH2Pn5VzdNbZLuMR eWnqTH/md+8/bkhmuZJp71wm60oPHoPvbDjvtfVmXAa52AzO+NWSc9Veke3C/QRm XgNkrXlzeKopEso3j4gw2iAolqw9t8FHFLGgbTkS+6UCKjAM7vNLiIV02LQqhM50 qCnKEusMDCRgzeOXxYt+ =Bg5M -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-4.12-20170316' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add 'brstackinsn' field in 'perf script' to reuse the x86 instruction decoder used in the Intel PT code to study hot paths to samples (Andi Kleen) Kernel changes: - Default UPROBES_EVENTS to Y (Alexei Starovoitov) - Fix check for kretprobe offset within function entry (Naveen N. Rao) Infrastructure changes: - Introduce util func is_sdt_event() (Ravi Bangoria) - Make perf_event__synthesize_mmap_events() scale on older kernels where reading /proc/pid/maps is way slower than reading /proc/pid/task/pid/maps (Stephane Eranian) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
61f63e3837
@ -268,6 +268,7 @@ extern void show_registers(struct pt_regs *regs);
|
||||
extern void kprobes_inc_nmissed_count(struct kprobe *p);
|
||||
extern bool arch_within_kprobe_blacklist(unsigned long addr);
|
||||
extern bool arch_function_offset_within_entry(unsigned long offset);
|
||||
extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
|
||||
|
||||
extern bool within_kprobe_blacklist(unsigned long addr);
|
||||
|
||||
|
@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
|
||||
* This returns encoded errors if it fails to look up symbol or invalid
|
||||
* combination of parameters.
|
||||
*/
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
|
||||
const char *symbol_name, unsigned int offset)
|
||||
{
|
||||
kprobe_opcode_t *addr = p->addr;
|
||||
|
||||
if ((p->symbol_name && p->addr) ||
|
||||
(!p->symbol_name && !p->addr))
|
||||
if ((symbol_name && addr) || (!symbol_name && !addr))
|
||||
goto invalid;
|
||||
|
||||
if (p->symbol_name) {
|
||||
kprobe_lookup_name(p->symbol_name, addr);
|
||||
if (symbol_name) {
|
||||
kprobe_lookup_name(symbol_name, addr);
|
||||
if (!addr)
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + offset);
|
||||
if (addr)
|
||||
return addr;
|
||||
|
||||
@ -1413,6 +1411,11 @@ invalid:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
{
|
||||
return _kprobe_addr(p->addr, p->symbol_name, p->offset);
|
||||
}
|
||||
|
||||
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
|
||||
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
@ -1881,19 +1884,28 @@ bool __weak arch_function_offset_within_entry(unsigned long offset)
|
||||
return !offset;
|
||||
}
|
||||
|
||||
bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
|
||||
{
|
||||
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
|
||||
|
||||
if (IS_ERR(kp_addr))
|
||||
return false;
|
||||
|
||||
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
|
||||
!arch_function_offset_within_entry(offset))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kretprobe_instance *inst;
|
||||
int i;
|
||||
void *addr;
|
||||
unsigned long offset;
|
||||
|
||||
addr = kprobe_addr(&rp->kp);
|
||||
if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset))
|
||||
return -EINVAL;
|
||||
|
||||
if (!arch_function_offset_within_entry(offset))
|
||||
if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
|
||||
return -EINVAL;
|
||||
|
||||
if (kretprobe_blacklist_size) {
|
||||
|
@ -455,7 +455,7 @@ config UPROBE_EVENTS
|
||||
select UPROBES
|
||||
select PROBE_EVENTS
|
||||
select TRACING
|
||||
default n
|
||||
default y
|
||||
help
|
||||
This allows the user to add tracing events on top of userspace
|
||||
dynamic events (similar to tracepoints) on the fly via the trace
|
||||
|
@ -697,7 +697,7 @@ static int create_trace_kprobe(int argc, char **argv)
|
||||
return ret;
|
||||
}
|
||||
if (offset && is_return &&
|
||||
!arch_function_offset_within_entry(offset)) {
|
||||
!function_offset_within_entry(NULL, symbol, offset)) {
|
||||
pr_info("Given offset is not valid for return probe.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -100,7 +100,7 @@
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
@ -186,7 +186,7 @@
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
@ -321,5 +321,4 @@
|
||||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
@ -116,7 +116,7 @@ OPTIONS
|
||||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
|
||||
callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
|
||||
@ -189,15 +189,20 @@ OPTIONS
|
||||
i.e., -F "" is not allowed.
|
||||
|
||||
The brstack output includes branch related information with raw addresses using the
|
||||
/v/v/v/v/ syntax in the following order:
|
||||
/v/v/v/v/cycles syntax in the following order:
|
||||
FROM: branch source instruction
|
||||
TO : branch target instruction
|
||||
M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
|
||||
X/- : X=branch inside a transactional region, -=not in transaction region or not supported
|
||||
A/- : A=TSX abort entry, -=not aborted region or not supported
|
||||
cycles
|
||||
|
||||
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
|
||||
|
||||
When brstackinsn is specified the full assembler sequences of branch sequences for each sample
|
||||
is printed. This is the full execution path leading to the sample. This is only supported when the
|
||||
sample was recorded with perf record -b or -j any.
|
||||
|
||||
-k::
|
||||
--vmlinux=<file>::
|
||||
vmlinux pathname
|
||||
@ -302,6 +307,10 @@ include::itrace.txt[]
|
||||
stop time is not given (i.e, time string is 'x.y,') then analysis goes
|
||||
to end of file.
|
||||
|
||||
--max-blocks::
|
||||
Set the maximum number of program blocks to print with brstackasm for
|
||||
each sample.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/time64.h>
|
||||
#include "asm/bug.h"
|
||||
#include "util/mem-events.h"
|
||||
#include "util/dump-insn.h"
|
||||
|
||||
static char const *script_name;
|
||||
static char const *generate_script_lang;
|
||||
@ -42,6 +43,7 @@ static bool nanosecs;
|
||||
static const char *cpu_list;
|
||||
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
||||
static struct perf_stat_config stat_config;
|
||||
static int max_blocks;
|
||||
|
||||
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
|
||||
|
||||
@ -69,6 +71,7 @@ enum perf_output_field {
|
||||
PERF_OUTPUT_CALLINDENT = 1U << 20,
|
||||
PERF_OUTPUT_INSN = 1U << 21,
|
||||
PERF_OUTPUT_INSNLEN = 1U << 22,
|
||||
PERF_OUTPUT_BRSTACKINSN = 1U << 23,
|
||||
};
|
||||
|
||||
struct output_option {
|
||||
@ -98,6 +101,7 @@ struct output_option {
|
||||
{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
|
||||
{.str = "insn", .field = PERF_OUTPUT_INSN},
|
||||
{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
|
||||
{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
|
||||
};
|
||||
|
||||
/* default set to maintain compatibility with current format */
|
||||
@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
|
||||
"selected. Hence, no address to lookup the source line number.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (PRINT_FIELD(BRSTACKINSN) &&
|
||||
!(perf_evlist__combined_branch_type(session->evlist) &
|
||||
PERF_SAMPLE_BRANCH_ANY)) {
|
||||
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
|
||||
"Hint: run 'perf record -b ...'\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
|
||||
perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
|
||||
PERF_OUTPUT_TID|PERF_OUTPUT_PID))
|
||||
@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample,
|
||||
}
|
||||
}
|
||||
|
||||
#define MAXBB 16384UL
|
||||
|
||||
static int grab_bb(u8 *buffer, u64 start, u64 end,
|
||||
struct machine *machine, struct thread *thread,
|
||||
bool *is64bit, u8 *cpumode, bool last)
|
||||
{
|
||||
long offset, len;
|
||||
struct addr_location al;
|
||||
bool kernel;
|
||||
|
||||
if (!start || !end)
|
||||
return 0;
|
||||
|
||||
kernel = machine__kernel_ip(machine, start);
|
||||
if (kernel)
|
||||
*cpumode = PERF_RECORD_MISC_KERNEL;
|
||||
else
|
||||
*cpumode = PERF_RECORD_MISC_USER;
|
||||
|
||||
/*
|
||||
* Block overlaps between kernel and user.
|
||||
* This can happen due to ring filtering
|
||||
* On Intel CPUs the entry into the kernel is filtered,
|
||||
* but the exit is not. Let the caller patch it up.
|
||||
*/
|
||||
if (kernel != machine__kernel_ip(machine, end)) {
|
||||
printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n",
|
||||
start, end);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
memset(&al, 0, sizeof(al));
|
||||
if (end - start > MAXBB - MAXINSN) {
|
||||
if (last)
|
||||
printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
|
||||
else
|
||||
printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
|
||||
if (!al.map || !al.map->dso) {
|
||||
printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
|
||||
return 0;
|
||||
}
|
||||
if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
|
||||
printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Load maps to ensure dso->is_64_bit has been updated */
|
||||
map__load(al.map);
|
||||
|
||||
offset = al.map->map_ip(al.map, start);
|
||||
len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer,
|
||||
end - start + MAXINSN);
|
||||
|
||||
*is64bit = al.map->dso->is_64_bit;
|
||||
if (len <= 0)
|
||||
printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
|
||||
start, end);
|
||||
return len;
|
||||
}
|
||||
|
||||
static void print_jump(uint64_t ip, struct branch_entry *en,
|
||||
struct perf_insn *x, u8 *inbuf, int len,
|
||||
int insn)
|
||||
{
|
||||
printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s",
|
||||
ip,
|
||||
dump_insn(x, ip, inbuf, len, NULL),
|
||||
en->flags.predicted ? " PRED" : "",
|
||||
en->flags.mispred ? " MISPRED" : "",
|
||||
en->flags.in_tx ? " INTX" : "",
|
||||
en->flags.abort ? " ABORT" : "");
|
||||
if (en->flags.cycles) {
|
||||
printf(" %d cycles", en->flags.cycles);
|
||||
if (insn)
|
||||
printf(" %.2f IPC", (float)insn / en->flags.cycles);
|
||||
}
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu,
|
||||
uint64_t addr, struct symbol **lastsym,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
struct addr_location al;
|
||||
int off;
|
||||
|
||||
memset(&al, 0, sizeof(al));
|
||||
|
||||
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
|
||||
if (!al.map)
|
||||
thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
|
||||
addr, &al);
|
||||
if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
|
||||
return;
|
||||
|
||||
al.cpu = cpu;
|
||||
al.sym = NULL;
|
||||
if (al.map)
|
||||
al.sym = map__find_symbol(al.map, al.addr);
|
||||
|
||||
if (!al.sym)
|
||||
return;
|
||||
|
||||
if (al.addr < al.sym->end)
|
||||
off = al.addr - al.sym->start;
|
||||
else
|
||||
off = al.addr - al.map->start - al.sym->start;
|
||||
printf("\t%s", al.sym->name);
|
||||
if (off)
|
||||
printf("%+d", off);
|
||||
putchar(':');
|
||||
if (PRINT_FIELD(SRCLINE))
|
||||
map__fprintf_srcline(al.map, al.addr, "\t", stdout);
|
||||
putchar('\n');
|
||||
*lastsym = al.sym;
|
||||
}
|
||||
|
||||
static void print_sample_brstackinsn(struct perf_sample *sample,
|
||||
struct thread *thread,
|
||||
struct perf_event_attr *attr,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct branch_stack *br = sample->branch_stack;
|
||||
u64 start, end;
|
||||
int i, insn, len, nr, ilen;
|
||||
struct perf_insn x;
|
||||
u8 buffer[MAXBB];
|
||||
unsigned off;
|
||||
struct symbol *lastsym = NULL;
|
||||
|
||||
if (!(br && br->nr))
|
||||
return;
|
||||
nr = br->nr;
|
||||
if (max_blocks && nr > max_blocks + 1)
|
||||
nr = max_blocks + 1;
|
||||
|
||||
x.thread = thread;
|
||||
x.cpu = sample->cpu;
|
||||
|
||||
putchar('\n');
|
||||
|
||||
/* Handle first from jump, of which we don't know the entry. */
|
||||
len = grab_bb(buffer, br->entries[nr-1].from,
|
||||
br->entries[nr-1].from,
|
||||
machine, thread, &x.is64bit, &x.cpumode, false);
|
||||
if (len > 0) {
|
||||
print_ip_sym(thread, x.cpumode, x.cpu,
|
||||
br->entries[nr - 1].from, &lastsym, attr);
|
||||
print_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
|
||||
&x, buffer, len, 0);
|
||||
}
|
||||
|
||||
/* Print all blocks */
|
||||
for (i = nr - 2; i >= 0; i--) {
|
||||
if (br->entries[i].from || br->entries[i].to)
|
||||
pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
|
||||
br->entries[i].from,
|
||||
br->entries[i].to);
|
||||
start = br->entries[i + 1].to;
|
||||
end = br->entries[i].from;
|
||||
|
||||
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
|
||||
/* Patch up missing kernel transfers due to ring filters */
|
||||
if (len == -ENXIO && i > 0) {
|
||||
end = br->entries[--i].from;
|
||||
pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
|
||||
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
|
||||
}
|
||||
if (len <= 0)
|
||||
continue;
|
||||
|
||||
insn = 0;
|
||||
for (off = 0;; off += ilen) {
|
||||
uint64_t ip = start + off;
|
||||
|
||||
print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr);
|
||||
if (ip == end) {
|
||||
print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn);
|
||||
break;
|
||||
} else {
|
||||
printf("\t%016" PRIx64 "\t%s\n", ip,
|
||||
dump_insn(&x, ip, buffer + off, len - off, &ilen));
|
||||
if (ilen == 0)
|
||||
break;
|
||||
insn++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Hit the branch? In this case we are already done, and the target
|
||||
* has not been executed yet.
|
||||
*/
|
||||
if (br->entries[0].from == sample->ip)
|
||||
return;
|
||||
if (br->entries[0].flags.abort)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Print final block upto sample
|
||||
*/
|
||||
start = br->entries[0].to;
|
||||
end = sample->ip;
|
||||
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
|
||||
print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr);
|
||||
if (len <= 0) {
|
||||
/* Print at least last IP if basic block did not work */
|
||||
len = grab_bb(buffer, sample->ip, sample->ip,
|
||||
machine, thread, &x.is64bit, &x.cpumode, false);
|
||||
if (len <= 0)
|
||||
return;
|
||||
|
||||
printf("\t%016" PRIx64 "\t%s\n", sample->ip,
|
||||
dump_insn(&x, sample->ip, buffer, len, NULL));
|
||||
return;
|
||||
}
|
||||
for (off = 0; off <= end - start; off += ilen) {
|
||||
printf("\t%016" PRIx64 "\t%s\n", start + off,
|
||||
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
|
||||
if (ilen == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void print_sample_addr(struct perf_sample *sample,
|
||||
struct thread *thread,
|
||||
@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample,
|
||||
}
|
||||
|
||||
static void print_insn(struct perf_sample *sample,
|
||||
struct perf_event_attr *attr)
|
||||
struct perf_event_attr *attr,
|
||||
struct thread *thread,
|
||||
struct machine *machine)
|
||||
{
|
||||
if (PRINT_FIELD(INSNLEN))
|
||||
printf(" ilen: %d", sample->insn_len);
|
||||
@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample,
|
||||
for (i = 0; i < sample->insn_len; i++)
|
||||
printf(" %02x", (unsigned char)sample->insn[i]);
|
||||
}
|
||||
if (PRINT_FIELD(BRSTACKINSN))
|
||||
print_sample_brstackinsn(sample, thread, attr, machine);
|
||||
}
|
||||
|
||||
static void print_sample_bts(struct perf_sample *sample,
|
||||
struct perf_evsel *evsel,
|
||||
struct thread *thread,
|
||||
struct addr_location *al)
|
||||
struct addr_location *al,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct perf_event_attr *attr = &evsel->attr;
|
||||
bool print_srcline_last = false;
|
||||
@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample,
|
||||
if (print_srcline_last)
|
||||
map__fprintf_srcline(al->map, al->addr, "\n ", stdout);
|
||||
|
||||
print_insn(sample, attr);
|
||||
print_insn(sample, attr, thread, machine);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
@ -872,7 +1114,8 @@ static size_t data_src__printf(u64 data_src)
|
||||
|
||||
static void process_event(struct perf_script *script,
|
||||
struct perf_sample *sample, struct perf_evsel *evsel,
|
||||
struct addr_location *al)
|
||||
struct addr_location *al,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct thread *thread = al->thread;
|
||||
struct perf_event_attr *attr = &evsel->attr;
|
||||
@ -899,7 +1142,7 @@ static void process_event(struct perf_script *script,
|
||||
print_sample_flags(sample->flags);
|
||||
|
||||
if (is_bts_event(attr)) {
|
||||
print_sample_bts(sample, evsel, thread, al);
|
||||
print_sample_bts(sample, evsel, thread, al, machine);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -937,7 +1180,7 @@ static void process_event(struct perf_script *script,
|
||||
|
||||
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
|
||||
print_sample_bpf_output(sample);
|
||||
print_insn(sample, attr);
|
||||
print_insn(sample, attr, thread, machine);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
@ -1047,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool,
|
||||
if (scripting_ops)
|
||||
scripting_ops->process_event(event, sample, evsel, &al);
|
||||
else
|
||||
process_event(scr, sample, evsel, &al);
|
||||
process_event(scr, sample, evsel, &al, machine);
|
||||
|
||||
out_put:
|
||||
addr_location__put(&al);
|
||||
@ -2191,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"Valid types: hw,sw,trace,raw. "
|
||||
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
|
||||
"addr,symoff,period,iregs,brstack,brstacksym,flags,"
|
||||
"bpf-output,callindent,insn,insnlen", parse_output_fields),
|
||||
"bpf-output,callindent,insn,insnlen,brstackinsn",
|
||||
parse_output_fields),
|
||||
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
||||
"system-wide collection from all CPUs"),
|
||||
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
|
||||
@ -2222,6 +2466,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
|
||||
"Show namespace events (if recorded)"),
|
||||
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
|
||||
OPT_INTEGER(0, "max-blocks", &max_blocks,
|
||||
"Maximum number of code blocks to dump with brstackinsn"),
|
||||
OPT_BOOLEAN(0, "ns", &nanosecs,
|
||||
"Use 9 decimal places when displaying time"),
|
||||
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
|
||||
|
@ -82,6 +82,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
|
||||
libperf-y += parse-branch-options.o
|
||||
libperf-y += dump-insn.o
|
||||
libperf-y += parse-regs-options.o
|
||||
libperf-y += term.o
|
||||
libperf-y += help-unknown-cmd.o
|
||||
|
14
tools/perf/util/dump-insn.c
Normal file
14
tools/perf/util/dump-insn.c
Normal file
@ -0,0 +1,14 @@
|
||||
#include <linux/compiler.h>
|
||||
#include "dump-insn.h"
|
||||
|
||||
/* Fallback code */
|
||||
|
||||
__weak
|
||||
const char *dump_insn(struct perf_insn *x __maybe_unused,
|
||||
u64 ip __maybe_unused, u8 *inbuf __maybe_unused,
|
||||
int inlen __maybe_unused, int *lenp)
|
||||
{
|
||||
if (lenp)
|
||||
*lenp = 0;
|
||||
return "?";
|
||||
}
|
22
tools/perf/util/dump-insn.h
Normal file
22
tools/perf/util/dump-insn.h
Normal file
@ -0,0 +1,22 @@
|
||||
#ifndef __PERF_DUMP_INSN_H
|
||||
#define __PERF_DUMP_INSN_H 1
|
||||
|
||||
#define MAXINSN 15
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct thread;
|
||||
|
||||
struct perf_insn {
|
||||
/* Initialized by callers: */
|
||||
struct thread *thread;
|
||||
u8 cpumode;
|
||||
bool is64bit;
|
||||
int cpu;
|
||||
/* Temporary */
|
||||
char out[256];
|
||||
};
|
||||
|
||||
const char *dump_insn(struct perf_insn *x, u64 ip,
|
||||
u8 *inbuf, int inlen, int *lenp);
|
||||
#endif
|
@ -325,8 +325,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
|
||||
if (machine__is_default_guest(machine))
|
||||
return 0;
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s/proc/%d/maps",
|
||||
machine->root_dir, pid);
|
||||
snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
|
||||
machine->root_dir, pid, pid);
|
||||
|
||||
fp = fopen(filename, "r");
|
||||
if (fp == NULL) {
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "insn.c"
|
||||
|
||||
#include "intel-pt-insn-decoder.h"
|
||||
#include "dump-insn.h"
|
||||
|
||||
#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
|
||||
#error Instruction buffer size too small
|
||||
@ -179,6 +180,29 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
|
||||
u8 *inbuf, int inlen, int *lenp)
|
||||
{
|
||||
struct insn insn;
|
||||
int n, i;
|
||||
int left;
|
||||
|
||||
insn_init(&insn, inbuf, inlen, x->is64bit);
|
||||
insn_get_length(&insn);
|
||||
if (!insn_complete(&insn) || insn.length > inlen)
|
||||
return "<bad>";
|
||||
if (lenp)
|
||||
*lenp = insn.length;
|
||||
left = sizeof(x->out);
|
||||
n = snprintf(x->out, left, "insn: ");
|
||||
left -= n;
|
||||
for (i = 0; i < insn.length; i++) {
|
||||
n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
|
||||
left -= n;
|
||||
}
|
||||
return x->out;
|
||||
}
|
||||
|
||||
const char *branch_name[] = {
|
||||
[INTEL_PT_OP_OTHER] = "Other",
|
||||
[INTEL_PT_OP_CALL] = "Call",
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <stdbool.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <string.h>
|
||||
|
||||
struct list_head;
|
||||
struct perf_evsel;
|
||||
@ -196,4 +197,23 @@ int is_valid_tracepoint(const char *event_string);
|
||||
int valid_event_mount(const char *eventfs);
|
||||
char *parse_events_formats_error_string(char *additional_terms);
|
||||
|
||||
#ifdef HAVE_LIBELF_SUPPORT
|
||||
/*
|
||||
* If the probe point starts with '%',
|
||||
* or starts with "sdt_" and has a ':' but no '=',
|
||||
* then it should be a SDT/cached probe point.
|
||||
*/
|
||||
static inline bool is_sdt_event(char *str)
|
||||
{
|
||||
return (str[0] == '%' ||
|
||||
(!strncmp(str, "sdt_", 4) &&
|
||||
!!strchr(str, ':') && !strchr(str, '=')));
|
||||
}
|
||||
#else
|
||||
static inline bool is_sdt_event(char *str __maybe_unused)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* HAVE_LIBELF_SUPPORT */
|
||||
|
||||
#endif /* __PERF_PARSE_EVENTS_H */
|
||||
|
@ -1341,14 +1341,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If the probe point starts with '%',
|
||||
* or starts with "sdt_" and has a ':' but no '=',
|
||||
* then it should be a SDT/cached probe point.
|
||||
*/
|
||||
if (arg[0] == '%' ||
|
||||
(!strncmp(arg, "sdt_", 4) &&
|
||||
!!strchr(arg, ':') && !strchr(arg, '='))) {
|
||||
if (is_sdt_event(arg)) {
|
||||
pev->sdt = true;
|
||||
if (arg[0] == '%')
|
||||
arg++;
|
||||
|
Loading…
Reference in New Issue
Block a user