perf/core improvements and fixes:

- Introduce 'perf trace --max-events' for stopping 'perf trace' when
   that many syscalls (enter+exit), tracepoints or other events such as
   page faults take place.
 
   Support that as well on a per-event basis, e.g.:
 
    perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
 
   Will stop when 2 context switches, 4 block plugs, 1 block unplug and
   3 net_dev_queue tracepoints take place. (Arnaldo Carvalho de Melo)
 
 - Poll for monitored tasks being alive in 'perf stat -p/-t', exiting when
   those tasks all terminate (Jiri Olsa)
 
 - Encode -k clockid frequency into perf.data to enable timestamps derived
   metrics conversion into wall clock time on reporting stage. (Alexey Budankov)
 
 - Improve Intel PT call graph from SQL database and GUI python scripts,
   including adopting the Qt MDI interface to allow for multiple subwindows
   for all the tables, helping in better visualizing the data in the SQL
   tables, also uses, when available, the Intel XED disassembler libraries
   to present the Intel PT data as x86 asm mnemonics. This last feature
   is not currently working in some cases, fix is being discussed (Adrian Hunter)
 
 - Implement a ftrace function_graph view in 'perf script' when processing
   hardware trace data such as Intel PT (Andi Kleen)
 
 - Better integration with the Intel XED disassembler, when available, in
   'perf script' (Andi Kleen)
 
 - Some 'perf trace' drop refcount fixes (Arnaldo Carvalho de Melo)
 
 - Add Sparc support to 'perf annotate', jitdump (David Miller)
 
 - Fix PLT symbols entry/header sizes properly on Sparc (David Miller)
 
 - Fix generation of system call table failure with /tmp mounted with 'noexec'
   in arm64 (Hongxu Jia)
 
 - Allow extended console debug output in 'perf script' (Milian Wolff)
 
 - Flush output stream after events in 'perf script' verbose mode (Milian Wolff)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCW9Gj1gAKCRCyPKLppCJ+
 J/YFAQCTlCJPJBET/edCj0XLre68j/nuiHQKR8ey7qDDKr+HtQEAilKQhn6vNDae
 xRwAGywC4/dGkkkffMMKdqxWFQ/JUwg=
 =8UYP
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-4.20-20181025' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Introduce 'perf trace --max-events' for stopping 'perf trace' when
  that many syscalls (enter+exit), tracepoints or other events such as
  page faults take place.

  Support that as well on a per-event basis, e.g.:

   perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/

  Will stop when 2 context switches, 4 block plugs, 1 block unplug and
  3 net_dev_queue tracepoints take place. (Arnaldo Carvalho de Melo)

- Poll for monitored tasks being alive in 'perf stat -p/-t', exiting when
  those tasks all terminate (Jiri Olsa)

- Encode -k clockid frequency into perf.data to enable timestamps derived
  metrics conversion into wall clock time on reporting stage. (Alexey Budankov)

- Improve Intel PT call graph from SQL database and GUI python scripts,
  including adopting the Qt MDI interface to allow for multiple subwindows
  for all the tables, helping in better visualizing the data in the SQL
  tables, also uses, when available, the Intel XED disassembler libraries
  to present the Intel PT data as x86 asm mnemonics. This last feature
  is not currently working in some cases, fix is being discussed (Adrian Hunter)

- Implement a ftrace function_graph view in 'perf script' when processing
  hardware trace data such as Intel PT (Andi Kleen)

- Better integration with the Intel XED disassembler, when available, in
  'perf script' (Andi Kleen)

- Some 'perf trace' drop refcount fixes (Arnaldo Carvalho de Melo)

- Add Sparc support to 'perf annotate', jitdump (David Miller)

- Fix PLT symbols entry/header sizes properly on Sparc (David Miller)

- Fix generation of system call table failure with /tmp mounted with 'noexec'
  in arm64 (Hongxu Jia)

- Allow extended console debug output in 'perf script' (Milian Wolff)

- Flush output stream after events in 'perf script' verbose mode (Milian Wolff)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2018-10-26 09:22:45 +02:00
commit efe8eaf7b5
38 changed files with 2778 additions and 396 deletions

View File

@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
case OPTION_ULONG:
case OPTION_U64:
default:
break;
@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
case OPTION_ULONG:
case OPTION_U64:
default:
break;
@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
return opterror(opt, "expects a numerical value", flags);
return 0;
case OPTION_ULONG:
if (unset) {
*(unsigned long *)opt->value = 0;
return 0;
}
if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
*(unsigned long *)opt->value = opt->defval;
return 0;
}
if (get_arg(p, opt, flags, &arg))
return -1;
*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
if (*s)
return opterror(opt, "expects a numerical value", flags);
return 0;
case OPTION_U64:
if (unset) {
*(u64 *)opt->value = 0;
@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
case OPTION_ARGUMENT:
break;
case OPTION_LONG:
case OPTION_ULONG:
case OPTION_U64:
case OPTION_INTEGER:
case OPTION_UINTEGER:

View File

@ -25,6 +25,7 @@ enum parse_opt_type {
OPTION_STRING,
OPTION_INTEGER,
OPTION_LONG,
OPTION_ULONG,
OPTION_CALLBACK,
OPTION_U64,
OPTION_UINTEGER,
@ -133,6 +134,7 @@ struct option {
#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
#define OPT_STRING_OPTARG(s, l, v, a, h, d) \

View File

@ -0,0 +1,19 @@
For --xed the xed tool is needed. Here is how to install it:
$ git clone https://github.com/intelxed/mbuild.git mbuild
$ git clone https://github.com/intelxed/xed
$ cd xed
$ ./mfile.py --share
$ ./mfile.py examples
$ sudo ./mfile.py --prefix=/usr/local install
$ sudo ldconfig
$ sudo cp obj/examples/xed /usr/local/bin
Basic xed testing:
$ xed | head -3
ERROR: required argument(s) were missing
Copyright (C) 2017, Intel Corporation. All rights reserved.
XED version: [v10.0-328-g7d62c8c49b7b]
$

View File

@ -106,7 +106,7 @@ in transaction, respectively.
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a sqlite or postgresql database.
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
and to script call-graph-from-sql.py for an example of using the database.
and to script exported-sql-viewer.py for an example of using the database.
There is also script intel-pt-events.py which provides an example of how to
unpack the raw data for power events and PTWRITE.

View File

@ -11,10 +11,11 @@
l synthesize last branch entries (use with i or x)
s skip initial number of events
The default is all events i.e. the same as --itrace=ibxwpe
The default is all events i.e. the same as --itrace=ibxwpe,
except for perf script where it is --itrace=ce
In addition, the period (default 100000) for instructions events
can be specified in units of:
In addition, the period (default 100000, except for perf script where it is 1)
for instructions events can be specified in units of:
i instructions
t ticks

View File

@ -383,6 +383,24 @@ include::itrace.txt[]
will be printed. Each entry has function name and file/line. Enabled by
default, disable with --no-inline.
--insn-trace::
Show instruction stream for intel_pt traces. Combine with --xed to
show disassembly.
--xed::
Run xed disassembler on output. Requires installing the xed disassembler.
--call-trace::
Show call stream for intel_pt traces. The CPUs are interleaved, but
can be filtered with -C.
--call-ret-trace::
Show call and return stream for intel_pt traces.
--graph-function::
For itrace only show specified functions and their callees for
itrace. Multiple functions can be separated by comma.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],

View File

@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--kernel-syscall-graph::
Show the kernel callchains on the syscall exit path.
--max-events=N::
Stop after processing N events. Note that strace-like events are considered
only at exit time or when a syscall is interrupted, i.e. in those cases this
option is equivalent to the number of lines printed.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. Note that at this point
@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
As you can see, there was major pagefault in python process, from
CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
$ perf trace -e open* --max-events 4
[root@jouet perf]# trace -e open* --max-events 4
2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
$
Trace the first minor page fault when running a workload:
# perf trace -F min --max-stack=7 --max-events 1 sleep 1
0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
__clear_user ([kernel.kallsyms])
load_elf_binary ([kernel.kallsyms])
search_binary_handler ([kernel.kallsyms])
__do_execve_file.isra.33 ([kernel.kallsyms])
__x64_sys_execve ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
#
Trace the next min page page fault to take place on the first CPU:
# perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
js::gc::FreeSpan::initAsEmpty (inlined)
js::gc::Arena::setAsNotAllocated (inlined)
js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
JSThinInlineString::new_<(js::AllowGC)1> (inlined)
AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
[0x18b26e6bc2bd] (/tmp/perf-17136.map)
#
Trace the next two sched:sched_switch events, four block:*_plug events, the
next block:*_unplug and the next three net:*dev_queue events, this last one
with a backtrace of at most 16 entries, system wide:
# perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
__dev_queue_xmit ([kernel.kallsyms])
273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
__dev_queue_xmit ([kernel.kallsyms])
274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
__dev_queue_xmit ([kernel.kallsyms])
2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
#
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]

View File

@ -23,7 +23,7 @@ create_table_from_c()
{
local sc nr last_sc
create_table_exe=`mktemp /tmp/create-table-XXXXXX`
create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
{

View File

@ -1,3 +1,5 @@
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
PERF_HAVE_JITDUMP := 1

View File

@ -0,0 +1,169 @@
// SPDX-License-Identifier: GPL-2.0
static int is_branch_cond(const char *cond)
{
if (cond[0] == '\0')
return 1;
if (cond[0] == 'a' && cond[1] == '\0')
return 1;
if (cond[0] == 'c' &&
(cond[1] == 'c' || cond[1] == 's') &&
cond[2] == '\0')
return 1;
if (cond[0] == 'e' &&
(cond[1] == '\0' ||
(cond[1] == 'q' && cond[2] == '\0')))
return 1;
if (cond[0] == 'g' &&
(cond[1] == '\0' ||
(cond[1] == 't' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
return 1;
if (cond[0] == 'l' &&
(cond[1] == '\0' ||
(cond[1] == 't' && cond[2] == '\0') ||
(cond[1] == 'u' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
return 1;
if (cond[0] == 'n' &&
(cond[1] == '\0' ||
(cond[1] == 'e' && cond[2] == '\0') ||
(cond[1] == 'z' && cond[2] == '\0') ||
(cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
return 1;
if (cond[0] == 'b' &&
cond[1] == 'p' &&
cond[2] == 'o' &&
cond[3] == 's' &&
cond[4] == '\0')
return 1;
if (cond[0] == 'v' &&
(cond[1] == 'c' || cond[1] == 's') &&
cond[2] == '\0')
return 1;
if (cond[0] == 'b' &&
cond[1] == 'z' &&
cond[2] == '\0')
return 1;
return 0;
}
static int is_branch_reg_cond(const char *cond)
{
if ((cond[0] == 'n' || cond[0] == 'l') &&
cond[1] == 'z' &&
cond[2] == '\0')
return 1;
if (cond[0] == 'z' &&
cond[1] == '\0')
return 1;
if ((cond[0] == 'g' || cond[0] == 'l') &&
cond[1] == 'e' &&
cond[2] == 'z' &&
cond[3] == '\0')
return 1;
if (cond[0] == 'g' &&
cond[1] == 'z' &&
cond[2] == '\0')
return 1;
return 0;
}
static int is_branch_float_cond(const char *cond)
{
if (cond[0] == '\0')
return 1;
if ((cond[0] == 'a' || cond[0] == 'e' ||
cond[0] == 'z' || cond[0] == 'g' ||
cond[0] == 'l' || cond[0] == 'n' ||
cond[0] == 'o' || cond[0] == 'u') &&
cond[1] == '\0')
return 1;
if (((cond[0] == 'g' && cond[1] == 'e') ||
(cond[0] == 'l' && (cond[1] == 'e' ||
cond[1] == 'g')) ||
(cond[0] == 'n' && (cond[1] == 'e' ||
cond[1] == 'z')) ||
(cond[0] == 'u' && (cond[1] == 'e' ||
cond[1] == 'g' ||
cond[1] == 'l'))) &&
cond[2] == '\0')
return 1;
if (cond[0] == 'u' &&
(cond[1] == 'g' || cond[1] == 'l') &&
cond[2] == 'e' &&
cond[3] == '\0')
return 1;
return 0;
}
static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;
if (!strcmp(name, "call") ||
!strcmp(name, "jmp") ||
!strcmp(name, "jmpl")) {
ops = &call_ops;
} else if (!strcmp(name, "ret") ||
!strcmp(name, "retl") ||
!strcmp(name, "return")) {
ops = &ret_ops;
} else if (!strcmp(name, "mov")) {
ops = &mov_ops;
} else {
if (name[0] == 'c' &&
(name[1] == 'w' || name[1] == 'x'))
name += 2;
if (name[0] == 'b') {
const char *cond = name + 1;
if (cond[0] == 'r') {
if (is_branch_reg_cond(cond + 1))
ops = &jump_ops;
} else if (is_branch_cond(cond)) {
ops = &jump_ops;
}
} else if (name[0] == 'f' && name[1] == 'b') {
if (is_branch_float_cond(name + 2))
ops = &jump_ops;
}
}
if (ops)
arch__associate_ins_ops(arch, name, ops);
return ops;
}
static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = sparc__associate_instruction_ops;
arch->objdump.comment_char = '#';
}
return 0;
}

View File

@ -592,6 +592,9 @@ static void record__init_features(struct record *rec)
if (!rec->opts.full_auxtrace)
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__init_features(rec);
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
argv, data->is_pipe,
@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = {
CLOCKID_END,
};
static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
{
struct timespec res;
*res_ns = 0;
if (!clock_getres(clk_id, &res))
*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
else
pr_warning("WARNING: Failed to determine specified clock resolution.\n");
return 0;
}
static int parse_clockid(const struct option *opt, const char *str, int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
/* if its a number, we're done */
if (sscanf(str, "%d", &opts->clockid) == 1)
return 0;
return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
/* allow a "CLOCK_" prefix to the name */
if (!strncasecmp(str, "CLOCK_", 6))
@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
for (cm = clockids; cm->name; cm++) {
if (!strcasecmp(str, cm->name)) {
opts->clockid = cm->clockid;
return 0;
return get_clockid_res(opts->clockid,
&opts->clockid_res_ns);
}
}

View File

@ -44,6 +44,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <subcmd/pager.h>
#include "sane_ctype.h"
@ -912,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
int insn, FILE *fp)
int insn, FILE *fp, int *total_cycles)
{
int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
dump_insn(x, ip, inbuf, len, NULL),
@ -921,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
en->flags.in_tx ? " INTX" : "",
en->flags.abort ? " ABORT" : "");
if (en->flags.cycles) {
printed += fprintf(fp, " %d cycles", en->flags.cycles);
*total_cycles += en->flags.cycles;
printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
if (insn)
printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
}
@ -978,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
u8 buffer[MAXBB];
unsigned off;
struct symbol *lastsym = NULL;
int total_cycles = 0;
if (!(br && br->nr))
return 0;
@ -998,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
&x, buffer, len, 0, fp);
&x, buffer, len, 0, fp, &total_cycles);
}
/* Print all blocks */
@ -1026,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
&total_cycles);
break;
} else {
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
@ -1104,6 +1108,35 @@ out:
return printed;
}
static const char *resolve_branch_sym(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
struct addr_location *al,
u64 *ip)
{
struct addr_location addr_al;
struct perf_event_attr *attr = &evsel->attr;
const char *name = NULL;
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
if (sample_addr_correlates_sym(attr)) {
thread__resolve(thread, &addr_al, sample);
if (addr_al.sym)
name = addr_al.sym->name;
else
*ip = sample->addr;
} else {
*ip = sample->addr;
}
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
if (al->sym)
name = al->sym->name;
else
*ip = sample->ip;
}
return name;
}
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
@ -1111,7 +1144,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
{
struct perf_event_attr *attr = &evsel->attr;
size_t depth = thread_stack__depth(thread);
struct addr_location addr_al;
const char *name = NULL;
static int spacing;
int len = 0;
@ -1125,22 +1157,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
depth += 1;
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
if (sample_addr_correlates_sym(attr)) {
thread__resolve(thread, &addr_al, sample);
if (addr_al.sym)
name = addr_al.sym->name;
else
ip = sample->addr;
} else {
ip = sample->addr;
}
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
if (al->sym)
name = al->sym->name;
else
ip = sample->ip;
}
name = resolve_branch_sym(sample, evsel, thread, al, &ip);
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
dlen += fprintf(fp, "(");
@ -1646,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
}
}
static bool show_event(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
struct addr_location *al)
{
int depth = thread_stack__depth(thread);
if (!symbol_conf.graph_function)
return true;
if (thread->filter) {
if (depth <= thread->filter_entry_depth) {
thread->filter = false;
return false;
}
return true;
} else {
const char *s = symbol_conf.graph_function;
u64 ip;
const char *name = resolve_branch_sym(sample, evsel, thread, al,
&ip);
unsigned nlen;
if (!name)
return false;
nlen = strlen(name);
while (*s) {
unsigned len = strcspn(s, ",");
if (nlen == len && !strncmp(name, s, len)) {
thread->filter = true;
thread->filter_entry_depth = depth;
return true;
}
s += len;
if (*s == ',')
s++;
}
return false;
}
}
static void process_event(struct perf_script *script,
struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al,
@ -1660,6 +1718,9 @@ static void process_event(struct perf_script *script,
if (output[type].fields == 0)
return;
if (!show_event(sample, evsel, thread, al))
return;
++es->samples;
perf_sample__fprintf_start(sample, thread, evsel,
@ -1737,6 +1798,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(METRIC))
perf_sample__fprint_metric(script, thread, evsel, sample, fp);
if (verbose)
fflush(fp);
}
static struct scripting_ops *scripting_ops;
@ -3100,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
#define perf_script__process_auxtrace_info 0
#endif
static int parse_insn_trace(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
{
parse_output_fields(NULL, "+insn,-event,-period", 0);
itrace_parse_synth_opts(opt, "i0ns", 0);
nanosecs = true;
return 0;
}
static int parse_xed(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
{
force_pager("xed -F insn: -A -64 | less");
return 0;
}
static int parse_call_trace(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
{
parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
itrace_parse_synth_opts(opt, "cewp", 0);
nanosecs = true;
return 0;
}
static int parse_callret_trace(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
{
parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
itrace_parse_synth_opts(opt, "crewp", 0);
nanosecs = true;
return 0;
}
int cmd_script(int argc, const char **argv)
{
bool show_full_info = false;
@ -3109,7 +3211,10 @@ int cmd_script(int argc, const char **argv)
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
struct itrace_synth_opts itrace_synth_opts = { .set = false, };
struct itrace_synth_opts itrace_synth_opts = {
.set = false,
.default_no_sample = true,
};
char *script_path = NULL;
const char **__argv;
int i, j, err = 0;
@ -3184,6 +3289,16 @@ int cmd_script(int argc, const char **argv)
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
"Decode instructions from itrace", parse_insn_trace),
OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
"Run xed disassembler on output", parse_xed),
OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
"Decode calls from from itrace", parse_call_trace),
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
"Decode calls and returns from itrace", parse_callret_trace),
OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
"Only print symbols and callees with --call-trace/--call-ret-trace"),
OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
"Stop display of callgraph at these symbols"),
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@ -3417,8 +3532,10 @@ int cmd_script(int argc, const char **argv)
exit(-1);
}
if (!script_name)
if (!script_name) {
setup_pager();
use_browser = 0;
}
session = perf_session__new(&data, false, &script.tool);
if (session == NULL)
@ -3439,7 +3556,8 @@ int cmd_script(int argc, const char **argv)
script.session = session;
script__setup_sample_type(&script);
if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
symbol_conf.graph_function)
itrace_synth_opts.thread_stack = true;
session->itrace_synth_opts = &itrace_synth_opts;

View File

@ -409,6 +409,28 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
return leader;
}
static bool is_target_alive(struct target *_target,
struct thread_map *threads)
{
struct stat st;
int i;
if (!target__has_task(_target))
return true;
for (i = 0; i < threads->nr; i++) {
char path[PATH_MAX];
scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
threads->map[i].pid);
if (!stat(path, &st))
return true;
}
return false;
}
static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
@ -579,6 +601,8 @@ try_again:
enable_counters();
while (!done) {
nanosleep(&ts, NULL);
if (!is_target_alive(&target, evsel_list->threads))
break;
if (timeout)
break;
if (interval) {

View File

@ -89,6 +89,8 @@ struct trace {
u64 base_time;
FILE *output;
unsigned long nr_events;
unsigned long nr_events_printed;
unsigned long max_events;
struct strlist *ev_qualifier;
struct {
size_t nr;
@ -1664,6 +1666,8 @@ static int trace__printf_interrupted_entry(struct trace *trace)
printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
ttrace->entry_pending = false;
++trace->nr_events_printed;
return printed;
}
@ -1810,12 +1814,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
int max_stack = evsel->attr.sample_max_stack ?
evsel->attr.sample_max_stack :
trace->max_stack;
int err;
if (machine__resolve(trace->host, &al, sample) < 0 ||
thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
if (machine__resolve(trace->host, &al, sample) < 0)
return -1;
return 0;
err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
addr_location__put(&al);
return err;
}
static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
@ -1940,6 +1946,13 @@ errno_print: {
fputc('\n', trace->output);
/*
* We only consider an 'event' for the sake of --max-events a non-filtered
* sys_enter + sys_exit and other tracepoint events.
*/
if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
interrupted = true;
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
@ -2072,14 +2085,25 @@ static void bpf_output__fprintf(struct trace *trace,
{
binary__fprintf(sample->raw_data, sample->raw_size, 8,
bpf_output__printer, NULL, trace->output);
++trace->nr_events_printed;
}
static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
struct thread *thread;
int callchain_ret = 0;
/*
* Check if we called perf_evsel__disable(evsel) due to, for instance,
* this event's max_events having been hit and this is an entry coming
* from the ring buffer that we should discard, since the max events
* have already been considered/printed.
*/
if (evsel->disabled)
return 0;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
if (sample->callchain) {
callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
@ -2127,6 +2151,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
event_format__fprintf(evsel->tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
trace->output);
++trace->nr_events_printed;
if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
perf_evsel__disable(evsel);
perf_evsel__close(evsel);
}
}
}
@ -2137,8 +2167,8 @@ newline:
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
thread__put(thread);
out:
thread__put(thread);
return 0;
}
@ -2225,6 +2255,8 @@ static int trace__pgfault(struct trace *trace,
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
++trace->nr_events_printed;
out:
err = 0;
out_put:
@ -2402,6 +2434,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
tracepoint_handler handler = evsel->handler;
handler(trace, evsel, event, sample);
}
if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
interrupted = true;
}
static int trace__add_syscall_newtp(struct trace *trace)
@ -2706,7 +2741,7 @@ next_event:
int timeout = done ? 100 : -1;
if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
draining = true;
goto again;
@ -3249,6 +3284,7 @@ int cmd_trace(int argc, const char **argv)
.trace_syscalls = false,
.kernel_syscallchains = false,
.max_stack = UINT_MAX,
.max_events = ULONG_MAX,
};
const char *output_name = NULL;
const struct option trace_options[] = {
@ -3301,6 +3337,8 @@ int cmd_trace(int argc, const char **argv)
&record_parse_callchain_opt),
OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
"Show the kernel callchains on the syscall exit path"),
OPT_ULONG(0, "max-events", &trace.max_events,
"Set the maximum number of events to print, exit after that is reached. "),
OPT_UINTEGER(0, "min-stack", &trace.min_stack,
"Set the minimum stack depth when parsing the callchain, "
"anything below the specified depth will be ignored."),

View File

@ -81,6 +81,7 @@ struct record_opts {
unsigned initial_delay;
bool use_clockid;
clockid_t clockid;
u64 clockid_res_ns;
unsigned int proc_map_timeout;
};

View File

@ -1,339 +0,0 @@
#!/usr/bin/python2
# call-graph-from-sql.py: create call-graph from sql database
# Copyright (c) 2014-2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
# To use this script you will need to have exported data using either the
# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those
# scripts for details.
#
# Following on from the example in the export scripts, a
# call-graph can be displayed for the pt_example database like this:
#
# python tools/perf/scripts/python/call-graph-from-sql.py pt_example
#
# Note that for PostgreSQL, this script supports connecting to remote databases
# by setting hostname, port, username, password, and dbname e.g.
#
# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
#
# The result is a GUI window with a tree representing a context-sensitive
# call-graph. Expanding a couple of levels of the tree and adjusting column
# widths to suit will display something like:
#
# Call Graph: pt_example
# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
# v- ls
# v- 2638:2638
# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
# |- unknown unknown 1 13198 0.1 1 0.0
# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
# >- __libc_csu_init ls 1 10354 0.1 10 0.0
# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
# v- main ls 1 8182043 99.6 180254 99.9
#
# Points to note:
# The top level is a command name (comm)
# The next level is a thread (pid:tid)
# Subsequent levels are functions
# 'Count' is the number of calls
# 'Time' is the elapsed time until the function returns
# Percentages are relative to the level above
# 'Branch Count' is the total number of branches for that function and all
# functions that it calls
import sys
from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtSql import *
from decimal import *
class TreeItem():
def __init__(self, db, row, parent_item):
self.db = db
self.row = row
self.parent_item = parent_item
self.query_done = False;
self.child_count = 0
self.child_items = []
self.data = ["", "", "", "", "", "", ""]
self.comm_id = 0
self.thread_id = 0
self.call_path_id = 1
self.branch_count = 0
self.time = 0
if not parent_item:
self.setUpRoot()
def setUpRoot(self):
self.query_done = True
query = QSqlQuery(self.db)
ret = query.exec_('SELECT id, comm FROM comms')
if not ret:
raise Exception("Query failed: " + query.lastError().text())
while query.next():
if not query.value(0):
continue
child_item = TreeItem(self.db, self.child_count, self)
self.child_items.append(child_item)
self.child_count += 1
child_item.setUpLevel1(query.value(0), query.value(1))
def setUpLevel1(self, comm_id, comm):
self.query_done = True;
self.comm_id = comm_id
self.data[0] = comm
self.child_items = []
self.child_count = 0
query = QSqlQuery(self.db)
ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
if not ret:
raise Exception("Query failed: " + query.lastError().text())
while query.next():
child_item = TreeItem(self.db, self.child_count, self)
self.child_items.append(child_item)
self.child_count += 1
child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
def setUpLevel2(self, comm_id, thread_id, pid, tid):
self.comm_id = comm_id
self.thread_id = thread_id
self.data[0] = str(pid) + ":" + str(tid)
def getChildItem(self, row):
return self.child_items[row]
def getParentItem(self):
return self.parent_item
def getRow(self):
return self.row
def timePercent(self, b):
if not self.time:
return "0.0"
x = (b * Decimal(100)) / self.time
return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
def branchPercent(self, b):
if not self.branch_count:
return "0.0"
x = (b * Decimal(100)) / self.branch_count
return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
def addChild(self, call_path_id, name, dso, count, time, branch_count):
child_item = TreeItem(self.db, self.child_count, self)
child_item.comm_id = self.comm_id
child_item.thread_id = self.thread_id
child_item.call_path_id = call_path_id
child_item.branch_count = branch_count
child_item.time = time
child_item.data[0] = name
if dso == "[kernel.kallsyms]":
dso = "[kernel]"
child_item.data[1] = dso
child_item.data[2] = str(count)
child_item.data[3] = str(time)
child_item.data[4] = self.timePercent(time)
child_item.data[5] = str(branch_count)
child_item.data[6] = self.branchPercent(branch_count)
self.child_items.append(child_item)
self.child_count += 1
def selectCalls(self):
self.query_done = True;
query = QSqlQuery(self.db)
ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
'( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
'( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
'( SELECT ip FROM call_paths where id = call_path_id ) '
'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
' ORDER BY call_path_id')
if not ret:
raise Exception("Query failed: " + query.lastError().text())
last_call_path_id = 0
name = ""
dso = ""
count = 0
branch_count = 0
total_branch_count = 0
time = 0
total_time = 0
while query.next():
if query.value(1) == last_call_path_id:
count += 1
branch_count += query.value(2)
time += query.value(4) - query.value(3)
else:
if count:
self.addChild(last_call_path_id, name, dso, count, time, branch_count)
last_call_path_id = query.value(1)
name = query.value(5)
dso = query.value(6)
count = 1
total_branch_count += branch_count
total_time += time
branch_count = query.value(2)
time = query.value(4) - query.value(3)
if count:
self.addChild(last_call_path_id, name, dso, count, time, branch_count)
total_branch_count += branch_count
total_time += time
# Top level does not have time or branch count, so fix that here
if total_branch_count > self.branch_count:
self.branch_count = total_branch_count
if self.branch_count:
for child_item in self.child_items:
child_item.data[6] = self.branchPercent(child_item.branch_count)
if total_time > self.time:
self.time = total_time
if self.time:
for child_item in self.child_items:
child_item.data[4] = self.timePercent(child_item.time)
def childCount(self):
if not self.query_done:
self.selectCalls()
return self.child_count
def columnCount(self):
return 7
def columnHeader(self, column):
headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
return headers[column]
def getData(self, column):
return self.data[column]
class TreeModel(QAbstractItemModel):
def __init__(self, db, parent=None):
super(TreeModel, self).__init__(parent)
self.db = db
self.root = TreeItem(db, 0, None)
def columnCount(self, parent):
return self.root.columnCount()
def rowCount(self, parent):
if parent.isValid():
parent_item = parent.internalPointer()
else:
parent_item = self.root
return parent_item.childCount()
def headerData(self, section, orientation, role):
if role == Qt.TextAlignmentRole:
if section > 1:
return Qt.AlignRight
if role != Qt.DisplayRole:
return None
if orientation != Qt.Horizontal:
return None
return self.root.columnHeader(section)
def parent(self, child):
child_item = child.internalPointer()
if child_item is self.root:
return QModelIndex()
parent_item = child_item.getParentItem()
return self.createIndex(parent_item.getRow(), 0, parent_item)
def index(self, row, column, parent):
if parent.isValid():
parent_item = parent.internalPointer()
else:
parent_item = self.root
child_item = parent_item.getChildItem(row)
return self.createIndex(row, column, child_item)
def data(self, index, role):
if role == Qt.TextAlignmentRole:
if index.column() > 1:
return Qt.AlignRight
if role != Qt.DisplayRole:
return None
index_item = index.internalPointer()
return index_item.getData(index.column())
class MainWindow(QMainWindow):
def __init__(self, db, dbname, parent=None):
super(MainWindow, self).__init__(parent)
self.setObjectName("MainWindow")
self.setWindowTitle("Call Graph: " + dbname)
self.move(100, 100)
self.resize(800, 600)
style = self.style()
icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
self.setWindowIcon(icon);
self.model = TreeModel(db)
self.view = QTreeView()
self.view.setModel(self.model)
self.setCentralWidget(self.view)
if __name__ == '__main__':
if (len(sys.argv) < 2):
print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
raise Exception("Too few arguments")
dbname = sys.argv[1]
is_sqlite3 = False
try:
f = open(dbname)
if f.read(15) == "SQLite format 3":
is_sqlite3 = True
f.close()
except:
pass
if is_sqlite3:
db = QSqlDatabase.addDatabase('QSQLITE')
else:
db = QSqlDatabase.addDatabase('QPSQL')
opts = dbname.split()
for opt in opts:
if '=' in opt:
opt = opt.split('=')
if opt[0] == 'hostname':
db.setHostName(opt[1])
elif opt[0] == 'port':
db.setPort(int(opt[1]))
elif opt[0] == 'username':
db.setUserName(opt[1])
elif opt[0] == 'password':
db.setPassword(opt[1])
elif opt[0] == 'dbname':
dbname = opt[1]
else:
dbname = opt
db.setDatabaseName(dbname)
if not db.open():
raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
app = QApplication(sys.argv)
window = MainWindow(db, dbname)
window.show()
err = app.exec_()
db.close()
sys.exit(err)

View File

@ -59,7 +59,7 @@ import datetime
# pt_example=# \q
#
# An example of using the database is provided by the script
# call-graph-from-sql.py. Refer to that script for details.
# exported-sql-viewer.py. Refer to that script for details.
#
# Tables:
#

View File

@ -40,7 +40,7 @@ import datetime
# sqlite> .quit
#
# An example of using the database is provided by the script
# call-graph-from-sql.py. Refer to that script for details.
# exported-sql-viewer.py. Refer to that script for details.
#
# The database structure is practically the same as created by the script
# export-to-postgresql.py. Refer to that script for details. A notable

File diff suppressed because it is too large Load Diff

View File

@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
#include "arch/x86/annotate/instructions.c"
#include "arch/powerpc/annotate/instructions.c"
#include "arch/s390/annotate/instructions.c"
#include "arch/sparc/annotate/instructions.c"
static struct arch architectures[] = {
{
@ -170,6 +171,13 @@ static struct arch architectures[] = {
.comment_char = '#',
},
},
{
.name = "sparc",
.init = sparc__annotate_init,
.objdump = {
.comment_char = '#',
},
},
};
static void ins__delete(struct ins_operands *ops)

View File

@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
bool no_sample)
{
synth_opts->instructions = true;
synth_opts->branches = true;
synth_opts->transactions = true;
synth_opts->ptwrites = true;
synth_opts->pwr_events = true;
synth_opts->errors = true;
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
if (no_sample) {
synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
synth_opts->period = 1;
synth_opts->calls = true;
} else {
synth_opts->instructions = true;
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
}
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
synth_opts->initial_skip = 0;
@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
if (!str) {
itrace_synth_opts__set_default(synth_opts);
itrace_synth_opts__set_default(synth_opts, false);
return 0;
}

View File

@ -58,6 +58,7 @@ enum itrace_period_type {
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
* @set: indicates whether or not options have been set
* @default_no_sample: Default to no sampling.
* @inject: indicates the event (not just the sample) must be fully synthesized
* because 'perf inject' will write it out
* @instructions: whether to synthesize 'instructions' events
@ -82,6 +83,7 @@ enum itrace_period_type {
*/
struct itrace_synth_opts {
bool set;
bool default_no_sample;
bool inject;
bool instructions;
bool branches;
@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
union perf_event *event);
int itrace_parse_synth_opts(const struct option *opt, const char *str,
int unset);
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
bool no_sample);
size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
void perf_session__auxtrace_error_inc(struct perf_session *session,

View File

@ -1432,7 +1432,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
etm->synth_opts = *session->itrace_synth_opts;
} else {
itrace_synth_opts__set_default(&etm->synth_opts);
itrace_synth_opts__set_default(&etm->synth_opts,
session->itrace_synth_opts->default_no_sample);
etm->synth_opts.callchain = false;
}

View File

@ -63,6 +63,7 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
u64 clockid_res_ns;
};
extern struct perf_env perf_env;

View File

@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
struct perf_evsel *pos;
evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->fd)
if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
continue;
perf_evsel__disable(pos);
}

View File

@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->leader = evsel;
evsel->unit = "";
evsel->scale = 1.0;
evsel->max_events = ULONG_MAX;
evsel->evlist = NULL;
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->node);
@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
max_stack = term->val.max_stack;
break;
case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
evsel->max_events = term->val.max_events;
break;
case PERF_EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
@ -1203,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
int perf_evsel__enable(struct perf_evsel *evsel)
{
return perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_ENABLE,
0);
int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
if (!err)
evsel->disabled = false;
return err;
}
int perf_evsel__disable(struct perf_evsel *evsel)
{
return perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_DISABLE,
0);
int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
/*
* We mark it disabled here so that tools that disable a event can
* ignore events after they disable it. I.e. the ring buffer may have
* already a few more events queued up before the kernel got the stop
* request.
*/
if (!err)
evsel->disabled = true;
return err;
}
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)

View File

@ -46,6 +46,7 @@ enum term_type {
PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_INHERIT,
PERF_EVSEL__CONFIG_TERM_MAX_STACK,
PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
PERF_EVSEL__CONFIG_TERM_OVERWRITE,
PERF_EVSEL__CONFIG_TERM_DRV_CFG,
PERF_EVSEL__CONFIG_TERM_BRANCH,
@ -65,6 +66,7 @@ struct perf_evsel_config_term {
bool inherit;
bool overwrite;
char *branch;
unsigned long max_events;
} val;
bool weak;
};
@ -99,6 +101,8 @@ struct perf_evsel {
struct perf_counts *prev_raw_counts;
int idx;
u32 ids;
unsigned long max_events;
unsigned long nr_events_printed;
char *name;
double scale;
const char *unit;
@ -119,6 +123,7 @@ struct perf_evsel {
bool snapshot;
bool supported;
bool needs_swap;
bool disabled;
bool no_aux_samples;
bool immediate;
bool system_wide;

View File

@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__powerpc__)
#define GEN_ELF_ARCH EM_PPC
#define GEN_ELF_CLASS ELFCLASS32
#elif defined(__sparc__) && defined(__arch64__)
#define GEN_ELF_ARCH EM_SPARCV9
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__sparc__)
#define GEN_ELF_ARCH EM_SPARC
#define GEN_ELF_CLASS ELFCLASS32
#else
#error "unsupported architecture"
#endif

View File

@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
return err;
}
static int write_clockid(struct feat_fd *ff,
struct perf_evlist *evlist __maybe_unused)
{
return do_write(ff, &ff->ph->env.clockid_res_ns,
sizeof(ff->ph->env.clockid_res_ns));
}
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# Core ID and Socket ID information is not available\n");
}
static void print_clockid(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
ff->ph->env.clockid_res_ns * 1000);
}
static void free_event_desc(struct perf_evsel *events)
{
struct perf_evsel *evsel;
@ -2531,6 +2544,15 @@ out:
return ret;
}
static int process_clockid(struct feat_fd *ff,
void *data __maybe_unused)
{
if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
return -1;
return 0;
}
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
FEAT_OPR(CLOCKID, clockid, false)
};
struct header_print_data {

View File

@ -38,6 +38,7 @@ enum {
HEADER_CACHE,
HEADER_SAMPLE_TIME,
HEADER_MEM_TOPOLOGY,
HEADER_CLOCKID,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};

View File

@ -910,7 +910,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
bts->synth_opts = *session->itrace_synth_opts;
} else {
itrace_synth_opts__set_default(&bts->synth_opts);
itrace_synth_opts__set_default(&bts->synth_opts,
session->itrace_synth_opts->default_no_sample);
if (session->itrace_synth_opts)
bts->synth_opts.thread_stack =
session->itrace_synth_opts->thread_stack;

View File

@ -2559,7 +2559,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
pt->synth_opts = *session->itrace_synth_opts;
} else {
itrace_synth_opts__set_default(&pt->synth_opts);
itrace_synth_opts__set_default(&pt->synth_opts,
session->itrace_synth_opts->default_no_sample);
if (use_browser != -1) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;

View File

@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
[PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
[PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr",
[PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
[PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
@ -1037,6 +1038,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
CHECK_TYPE_VAL(NUM);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
CHECK_TYPE_VAL(NUM);
break;
default:
err->str = strdup("unknown term");
err->idx = term->err_term;
@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_INHERIT:
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
return config_term_common(attr, term, err);
@ -1162,6 +1167,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
break;
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
break;

View File

@ -71,6 +71,7 @@ enum {
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
PARSE_EVENTS__TERM_TYPE_INHERIT,
PARSE_EVENTS__TERM_TYPE_MAX_STACK,
PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
PARSE_EVENTS__TERM_TYPE_OVERWRITE,
PARSE_EVENTS__TERM_TYPE_DRV_CFG,

View File

@ -269,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }

View File

@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
plt_entry_size = 16;
break;
default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
case EM_SPARC:
plt_header_size = 48;
plt_entry_size = 12;
break;
case EM_SPARCV9:
plt_header_size = 128;
plt_entry_size = 32;
break;
default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
plt_header_size = shdr_plt.sh_entsize;
plt_entry_size = shdr_plt.sh_entsize;
break;

View File

@ -123,7 +123,8 @@ struct symbol_conf {
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
*field_sep;
*field_sep,
*graph_function;
const char *default_guest_vmlinux_name,
*default_guest_kallsyms,
*default_guest_modules;

View File

@ -42,6 +42,8 @@ struct thread {
void *addr_space;
struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif
bool filter;
int filter_entry_depth;
};
struct machine;