2014-06-01 13:38:29 +00:00
|
|
|
#include <sys/mman.h>
|
2009-09-24 16:02:49 +00:00
|
|
|
#include "sort.h"
|
2010-07-20 17:42:52 +00:00
|
|
|
#include "hist.h"
|
2013-09-13 07:28:57 +00:00
|
|
|
#include "comm.h"
|
2013-04-03 12:26:19 +00:00
|
|
|
#include "symbol.h"
|
2014-03-03 02:46:55 +00:00
|
|
|
#include "evsel.h"
|
2015-12-22 17:07:01 +00:00
|
|
|
#include "evlist.h"
|
|
|
|
#include <traceevent/event-parse.h>
|
2009-09-24 16:02:49 +00:00
|
|
|
|
|
|
|
regex_t parent_regex;
|
2010-05-17 19:22:41 +00:00
|
|
|
const char default_parent_pattern[] = "^sys_|^do_page_fault";
|
|
|
|
const char *parent_pattern = default_parent_pattern;
|
|
|
|
const char default_sort_order[] = "comm,dso,symbol";
|
2015-07-18 15:24:53 +00:00
|
|
|
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
|
2014-03-18 02:31:39 +00:00
|
|
|
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
|
|
|
|
const char default_top_sort_order[] = "dso,symbol";
|
|
|
|
const char default_diff_sort_order[] = "dso,symbol";
|
2015-12-22 17:07:10 +00:00
|
|
|
const char default_tracepoint_sort_order[] = "trace";
|
2014-03-18 02:31:39 +00:00
|
|
|
const char *sort_order;
|
2014-03-04 01:46:34 +00:00
|
|
|
const char *field_order;
|
2012-12-07 05:48:05 +00:00
|
|
|
regex_t ignore_callees_regex;
|
|
|
|
int have_ignore_callees = 0;
|
2009-10-22 21:23:22 +00:00
|
|
|
int sort__need_collapse = 0;
|
|
|
|
int sort__has_parent = 0;
|
2012-09-14 08:35:27 +00:00
|
|
|
int sort__has_sym = 0;
|
2013-12-18 05:21:10 +00:00
|
|
|
int sort__has_dso = 0;
|
2015-09-04 14:45:43 +00:00
|
|
|
int sort__has_socket = 0;
|
2016-01-21 22:13:24 +00:00
|
|
|
int sort__has_thread = 0;
|
2013-04-01 11:35:20 +00:00
|
|
|
enum sort_mode sort__mode = SORT_MODE__NORMAL;
|
perf tools: Bind callchains to the first sort dimension column
Currently, the callchains are displayed using a constant left
margin. So depending on the current sort dimension
configuration, callchains may appear to be well attached to the
first sort dimension column field which is mostly the case,
except when the first dimension of sorting is done by comm,
because these are right aligned.
This patch binds the callchain to the first letter in the first
column, whatever type of column it is (dso, comm, symbol).
Before:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
After:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
Also, for clarity, we don't put anymore the callchain as is but:
- If we have a top level ancestor in the callchain, start it
with a first ascii hook.
Before:
0.80% perf [kernel] [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
After:
0.80% perf [kernel] [k] __lock_acquire
|
--- __lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
- Otherwise, if we have several top level ancestors, then
display these like we did before:
1.69% Xorg
|
|--21.21%-- vread_hpet
| 0x7fffd85b46fc
| 0x7fffd85b494d
| 0x7f4fafb4e54d
|
|--15.15%-- exaOffscreenAlloc
|
|--9.09%-- I830WaitLpRing
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-22 21:23:23 +00:00
|
|
|
|
2016-02-12 14:27:51 +00:00
|
|
|
/*
|
|
|
|
* Replaces all occurrences of a char used with the:
|
|
|
|
*
|
|
|
|
* -t, --field-separator
|
|
|
|
*
|
|
|
|
* option, that uses a special separator character and don't pad with spaces,
|
|
|
|
* replacing all occurances of this separator in symbol names (and other
|
|
|
|
* output) with a '.' character, that thus it's the only non valid separator.
|
|
|
|
*/
|
2010-03-31 14:33:40 +00:00
|
|
|
static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
|
2009-09-24 16:02:49 +00:00
|
|
|
{
|
|
|
|
int n;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
2010-03-31 14:33:40 +00:00
|
|
|
n = vsnprintf(bf, size, fmt, ap);
|
2012-09-06 15:46:56 +00:00
|
|
|
if (symbol_conf.field_sep && n > 0) {
|
2010-03-31 14:33:40 +00:00
|
|
|
char *sep = bf;
|
|
|
|
|
|
|
|
while (1) {
|
2012-09-06 15:46:56 +00:00
|
|
|
sep = strchr(sep, *symbol_conf.field_sep);
|
2010-03-31 14:33:40 +00:00
|
|
|
if (sep == NULL)
|
|
|
|
break;
|
|
|
|
*sep = '.';
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
va_end(ap);
|
perf tools: Incorrect use of snprintf results in SEGV
I have a workload where perf top scribbles over the stack and we SEGV.
What makes it interesting is that an snprintf is causing this.
The workload is a c++ gem that has method names over 3000 characters
long, but snprintf is designed to avoid overrunning buffers. So what
went wrong?
The problem is we assume snprintf returns the number of characters
written:
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
...
ret += repsep_snprintf(bf + ret, size - ret, "%s", self->ms.sym->name);
Unfortunately this is not how snprintf works. snprintf returns the
number of characters that would have been written if there was enough
space. In the above case, if the first snprintf returns a value larger
than size, we pass a negative size into the second snprintf and happily
scribble over the stack. If you have 3000 character c++ methods thats a
lot of stack to trample.
This patch fixes repsep_snprintf by clamping the value at size - 1 which
is the maximum snprintf can write before adding the NULL terminator.
I get the sinking feeling that there are a lot of other uses of snprintf
that have this same bug, we should audit them all.
Cc: David Ahern <dsahern@gmail.com>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Cc: stable@kernel.org
Link: http://lkml.kernel.org/r/20120307114249.44275ca3@kryten
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-03-07 00:42:49 +00:00
|
|
|
|
|
|
|
if (n >= (int)size)
|
|
|
|
return size - 1;
|
2009-09-24 16:02:49 +00:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2013-09-11 12:46:56 +00:00
|
|
|
static int64_t cmp_null(const void *l, const void *r)
|
2011-06-29 01:14:52 +00:00
|
|
|
{
|
|
|
|
if (!l && !r)
|
|
|
|
return 0;
|
|
|
|
else if (!l)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* --sort pid */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-07-04 13:20:31 +00:00
|
|
|
return right->thread->tid - left->thread->tid;
|
2011-06-29 01:14:52 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
|
2010-03-31 14:33:40 +00:00
|
|
|
size_t size, unsigned int width)
|
2009-09-24 16:02:49 +00:00
|
|
|
{
|
2013-09-11 12:46:56 +00:00
|
|
|
const char *comm = thread__comm_str(he->thread);
|
2014-07-31 05:47:38 +00:00
|
|
|
|
|
|
|
width = max(7U, width) - 6;
|
|
|
|
return repsep_snprintf(bf, size, "%5d:%-*.*s", he->thread->tid,
|
|
|
|
width, width, comm ?: "");
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
struct sort_entry sort_thread = {
|
2014-07-31 05:47:35 +00:00
|
|
|
.se_header = " Pid:Command",
|
2011-06-29 01:14:52 +00:00
|
|
|
.se_cmp = sort__thread_cmp,
|
|
|
|
.se_snprintf = hist_entry__thread_snprintf,
|
|
|
|
.se_width_idx = HISTC_THREAD,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* --sort comm */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-09-11 15:18:09 +00:00
|
|
|
/* Compare the addr that should be unique among comm */
|
2015-05-15 15:54:28 +00:00
|
|
|
return strcmp(comm__str(right->comm), comm__str(left->comm));
|
2011-06-29 01:14:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-09-13 07:28:57 +00:00
|
|
|
/* Compare the addr that should be unique among comm */
|
2015-05-15 15:54:28 +00:00
|
|
|
return strcmp(comm__str(right->comm), comm__str(left->comm));
|
2011-06-29 01:14:52 +00:00
|
|
|
}
|
|
|
|
|
2014-03-04 02:01:41 +00:00
|
|
|
static int64_t
|
|
|
|
sort__comm_sort(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return strcmp(comm__str(right->comm), comm__str(left->comm));
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
|
2010-03-31 14:33:40 +00:00
|
|
|
size_t size, unsigned int width)
|
2009-09-24 16:02:49 +00:00
|
|
|
{
|
2014-07-31 05:47:38 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, comm__str(he->comm));
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
|
2012-12-27 09:11:38 +00:00
|
|
|
struct sort_entry sort_comm = {
|
|
|
|
.se_header = "Command",
|
|
|
|
.se_cmp = sort__comm_cmp,
|
|
|
|
.se_collapse = sort__comm_collapse,
|
2014-03-04 02:01:41 +00:00
|
|
|
.se_sort = sort__comm_sort,
|
2012-12-27 09:11:38 +00:00
|
|
|
.se_snprintf = hist_entry__comm_snprintf,
|
|
|
|
.se_width_idx = HISTC_COMM,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* --sort dso */
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
|
|
|
|
{
|
|
|
|
struct dso *dso_l = map_l ? map_l->dso : NULL;
|
|
|
|
struct dso *dso_r = map_r ? map_r->dso : NULL;
|
|
|
|
const char *dso_name_l, *dso_name_r;
|
|
|
|
|
|
|
|
if (!dso_l || !dso_r)
|
2014-03-04 02:01:41 +00:00
|
|
|
return cmp_null(dso_r, dso_l);
|
2012-02-09 22:21:01 +00:00
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
dso_name_l = dso_l->long_name;
|
|
|
|
dso_name_r = dso_r->long_name;
|
|
|
|
} else {
|
|
|
|
dso_name_l = dso_l->short_name;
|
|
|
|
dso_name_r = dso_r->short_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
return strcmp(dso_name_l, dso_name_r);
|
|
|
|
}
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
static int64_t
|
2009-09-24 16:02:49 +00:00
|
|
|
sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-03-04 02:01:41 +00:00
|
|
|
return _sort__dso_cmp(right->ms.map, left->ms.map);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
2009-09-24 16:02:49 +00:00
|
|
|
|
2012-12-27 09:11:38 +00:00
|
|
|
static int _hist_entry__dso_snprintf(struct map *map, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
if (map && map->dso) {
|
|
|
|
const char *dso_name = !verbose ? map->dso->short_name :
|
|
|
|
map->dso->long_name;
|
2014-07-31 05:47:38 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
|
2012-12-27 09:11:38 +00:00
|
|
|
}
|
|
|
|
|
2014-07-31 05:47:38 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "[unknown]");
|
2012-12-27 09:11:38 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 09:11:38 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-22 22:01:31 +00:00
|
|
|
return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
|
2012-12-27 09:11:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_dso = {
|
|
|
|
.se_header = "Shared Object",
|
|
|
|
.se_cmp = sort__dso_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_snprintf,
|
|
|
|
.se_width_idx = HISTC_DSO,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* --sort symbol */
|
2009-09-24 16:02:49 +00:00
|
|
|
|
2013-12-18 05:21:09 +00:00
|
|
|
static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
|
|
|
|
{
|
|
|
|
return (int64_t)(right_ip - left_ip);
|
|
|
|
}
|
|
|
|
|
2013-02-06 05:57:15 +00:00
|
|
|
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
|
2012-02-09 22:21:01 +00:00
|
|
|
{
|
|
|
|
if (!sym_l || !sym_r)
|
|
|
|
return cmp_null(sym_l, sym_r);
|
|
|
|
|
|
|
|
if (sym_l == sym_r)
|
|
|
|
return 0;
|
|
|
|
|
2015-06-17 23:41:10 +00:00
|
|
|
if (sym_l->start != sym_r->start)
|
|
|
|
return (int64_t)(sym_r->start - sym_l->start);
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2015-06-17 23:41:10 +00:00
|
|
|
return (int64_t)(sym_r->end - sym_l->end);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
2012-12-27 09:11:38 +00:00
|
|
|
static int64_t
|
|
|
|
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
|
2012-02-09 22:21:01 +00:00
|
|
|
{
|
2013-10-15 02:01:56 +00:00
|
|
|
int64_t ret;
|
|
|
|
|
2012-12-27 09:11:38 +00:00
|
|
|
if (!left->ms.sym && !right->ms.sym)
|
2013-12-18 05:21:09 +00:00
|
|
|
return _sort__addr_cmp(left->ip, right->ip);
|
2009-09-24 16:02:49 +00:00
|
|
|
|
2013-10-15 02:01:56 +00:00
|
|
|
/*
|
|
|
|
* comparing symbol address alone is not enough since it's a
|
|
|
|
* relative address within a dso.
|
|
|
|
*/
|
2013-12-18 05:21:10 +00:00
|
|
|
if (!sort__has_dso) {
|
|
|
|
ret = sort__dso_cmp(left, right);
|
|
|
|
if (ret != 0)
|
|
|
|
return ret;
|
|
|
|
}
|
2013-10-15 02:01:56 +00:00
|
|
|
|
2013-02-06 05:57:15 +00:00
|
|
|
return _sort__sym_cmp(left->ms.sym, right->ms.sym);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
2014-03-04 02:01:41 +00:00
|
|
|
static int64_t
|
|
|
|
sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
if (!left->ms.sym || !right->ms.sym)
|
|
|
|
return cmp_null(left->ms.sym, right->ms.sym);
|
|
|
|
|
|
|
|
return strcmp(right->ms.sym->name, left->ms.sym->name);
|
|
|
|
}
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
|
|
|
|
u64 ip, char level, char *bf, size_t size,
|
2012-12-27 09:11:39 +00:00
|
|
|
unsigned int width)
|
2012-02-09 22:21:01 +00:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
char o = map ? dso__symtab_origin(map->dso) : '!';
|
|
|
|
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
|
2013-04-01 11:35:19 +00:00
|
|
|
BITS_PER_LONG / 4 + 2, ip, o);
|
2009-10-02 06:29:58 +00:00
|
|
|
}
|
2009-09-24 16:02:49 +00:00
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
|
2013-01-24 15:10:35 +00:00
|
|
|
if (sym && map) {
|
|
|
|
if (map->type == MAP__VARIABLE) {
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
|
2013-01-24 15:10:42 +00:00
|
|
|
ip - map->unmap_ip(map, sym->start));
|
2013-01-24 15:10:35 +00:00
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
|
|
|
width - ret, "");
|
|
|
|
} else {
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
|
|
|
width - ret,
|
|
|
|
sym->name);
|
|
|
|
}
|
|
|
|
} else {
|
2012-02-09 22:21:01 +00:00
|
|
|
size_t len = BITS_PER_LONG / 4;
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
|
|
|
|
len, ip);
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
|
|
|
width - ret, "");
|
|
|
|
}
|
|
|
|
|
2014-07-31 05:47:38 +00:00
|
|
|
if (ret > width)
|
|
|
|
bf[width] = '\0';
|
|
|
|
|
|
|
|
return width;
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 09:11:39 +00:00
|
|
|
size_t size, unsigned int width)
|
2012-02-09 22:21:01 +00:00
|
|
|
{
|
2013-10-22 22:01:31 +00:00
|
|
|
return _hist_entry__sym_snprintf(he->ms.map, he->ms.sym, he->ip,
|
|
|
|
he->level, bf, size, width);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
2009-09-24 16:02:49 +00:00
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
struct sort_entry sort_sym = {
|
|
|
|
.se_header = "Symbol",
|
|
|
|
.se_cmp = sort__sym_cmp,
|
2014-03-04 02:01:41 +00:00
|
|
|
.se_sort = sort__sym_sort,
|
2011-06-29 01:14:52 +00:00
|
|
|
.se_snprintf = hist_entry__sym_snprintf,
|
|
|
|
.se_width_idx = HISTC_SYMBOL,
|
|
|
|
};
|
2009-09-24 16:02:49 +00:00
|
|
|
|
2012-05-30 13:33:24 +00:00
|
|
|
/* --sort srcline */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-09-11 05:09:33 +00:00
|
|
|
if (!left->srcline) {
|
|
|
|
if (!left->ms.map)
|
|
|
|
left->srcline = SRCLINE_UNKNOWN;
|
|
|
|
else {
|
|
|
|
struct map *map = left->ms.map;
|
|
|
|
left->srcline = get_srcline(map->dso,
|
2014-11-13 02:05:27 +00:00
|
|
|
map__rip_2objdump(map, left->ip),
|
|
|
|
left->ms.sym, true);
|
2013-09-11 05:09:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!right->srcline) {
|
|
|
|
if (!right->ms.map)
|
|
|
|
right->srcline = SRCLINE_UNKNOWN;
|
|
|
|
else {
|
|
|
|
struct map *map = right->ms.map;
|
|
|
|
right->srcline = get_srcline(map->dso,
|
2014-11-13 02:05:27 +00:00
|
|
|
map__rip_2objdump(map, right->ip),
|
|
|
|
right->ms.sym, true);
|
2013-09-11 05:09:33 +00:00
|
|
|
}
|
|
|
|
}
|
2014-03-04 02:01:41 +00:00
|
|
|
return strcmp(right->srcline, left->srcline);
|
2012-05-30 13:33:24 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
|
2014-07-31 05:47:38 +00:00
|
|
|
size_t size, unsigned int width)
|
2012-05-30 13:33:24 +00:00
|
|
|
{
|
2014-11-18 21:02:51 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline);
|
2012-05-30 13:33:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_srcline = {
|
|
|
|
.se_header = "Source:Line",
|
|
|
|
.se_cmp = sort__srcline_cmp,
|
|
|
|
.se_snprintf = hist_entry__srcline_snprintf,
|
|
|
|
.se_width_idx = HISTC_SRCLINE,
|
|
|
|
};
|
|
|
|
|
2015-08-07 22:54:24 +00:00
|
|
|
/* --sort srcfile */
|
|
|
|
|
|
|
|
static char no_srcfile[1];
|
|
|
|
|
|
|
|
static char *get_srcfile(struct hist_entry *e)
|
|
|
|
{
|
|
|
|
char *sf, *p;
|
|
|
|
struct map *map = e->ms.map;
|
|
|
|
|
2015-09-01 18:47:19 +00:00
|
|
|
sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
|
|
|
|
e->ms.sym, false, true);
|
2015-08-11 13:36:55 +00:00
|
|
|
if (!strcmp(sf, SRCLINE_UNKNOWN))
|
|
|
|
return no_srcfile;
|
2015-08-07 22:54:24 +00:00
|
|
|
p = strchr(sf, ':');
|
|
|
|
if (p && *sf) {
|
|
|
|
*p = 0;
|
|
|
|
return sf;
|
|
|
|
}
|
|
|
|
free(sf);
|
|
|
|
return no_srcfile;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
if (!left->srcfile) {
|
|
|
|
if (!left->ms.map)
|
|
|
|
left->srcfile = no_srcfile;
|
|
|
|
else
|
|
|
|
left->srcfile = get_srcfile(left);
|
|
|
|
}
|
|
|
|
if (!right->srcfile) {
|
|
|
|
if (!right->ms.map)
|
|
|
|
right->srcfile = no_srcfile;
|
|
|
|
else
|
|
|
|
right->srcfile = get_srcfile(right);
|
|
|
|
}
|
|
|
|
return strcmp(right->srcfile, left->srcfile);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_srcfile = {
|
|
|
|
.se_header = "Source File",
|
|
|
|
.se_cmp = sort__srcfile_cmp,
|
|
|
|
.se_snprintf = hist_entry__srcfile_snprintf,
|
|
|
|
.se_width_idx = HISTC_SRCFILE,
|
|
|
|
};
|
|
|
|
|
2009-09-24 16:02:49 +00:00
|
|
|
/* --sort parent */
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
static int64_t
|
2009-09-24 16:02:49 +00:00
|
|
|
sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct symbol *sym_l = left->parent;
|
|
|
|
struct symbol *sym_r = right->parent;
|
|
|
|
|
|
|
|
if (!sym_l || !sym_r)
|
|
|
|
return cmp_null(sym_l, sym_r);
|
|
|
|
|
2014-03-04 02:01:41 +00:00
|
|
|
return strcmp(sym_r->name, sym_l->name);
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
|
2010-03-31 14:33:40 +00:00
|
|
|
size_t size, unsigned int width)
|
2009-09-24 16:02:49 +00:00
|
|
|
{
|
2014-07-31 05:47:38 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width,
|
2013-10-22 22:01:31 +00:00
|
|
|
he->parent ? he->parent->name : "[other]");
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
struct sort_entry sort_parent = {
|
|
|
|
.se_header = "Parent symbol",
|
|
|
|
.se_cmp = sort__parent_cmp,
|
|
|
|
.se_snprintf = hist_entry__parent_snprintf,
|
|
|
|
.se_width_idx = HISTC_PARENT,
|
|
|
|
};
|
|
|
|
|
2010-06-04 14:27:10 +00:00
|
|
|
/* --sort cpu */
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
static int64_t
|
2010-06-04 14:27:10 +00:00
|
|
|
sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return right->cpu - left->cpu;
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
2010-06-04 14:27:10 +00:00
|
|
|
{
|
2014-07-31 05:47:38 +00:00
|
|
|
return repsep_snprintf(bf, size, "%*.*d", width, width, he->cpu);
|
2010-06-04 14:27:10 +00:00
|
|
|
}
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
struct sort_entry sort_cpu = {
|
|
|
|
.se_header = "CPU",
|
|
|
|
.se_cmp = sort__cpu_cmp,
|
|
|
|
.se_snprintf = hist_entry__cpu_snprintf,
|
|
|
|
.se_width_idx = HISTC_CPU,
|
|
|
|
};
|
|
|
|
|
2015-09-04 14:45:43 +00:00
|
|
|
/* --sort socket */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return right->socket - left->socket;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_socket = {
|
|
|
|
.se_header = "Socket",
|
|
|
|
.se_cmp = sort__socket_cmp,
|
|
|
|
.se_snprintf = hist_entry__socket_snprintf,
|
|
|
|
.se_width_idx = HISTC_SOCKET,
|
|
|
|
};
|
|
|
|
|
2015-12-22 17:07:04 +00:00
|
|
|
/* --sort trace */
|
|
|
|
|
|
|
|
static char *get_trace_output(struct hist_entry *he)
|
|
|
|
{
|
|
|
|
struct trace_seq seq;
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
struct pevent_record rec = {
|
|
|
|
.data = he->raw_data,
|
|
|
|
.size = he->raw_size,
|
|
|
|
};
|
|
|
|
|
|
|
|
evsel = hists_to_evsel(he->hists);
|
|
|
|
|
|
|
|
trace_seq_init(&seq);
|
2015-12-22 17:07:05 +00:00
|
|
|
if (symbol_conf.raw_trace) {
|
|
|
|
pevent_print_fields(&seq, he->raw_data, he->raw_size,
|
|
|
|
evsel->tp_format);
|
|
|
|
} else {
|
|
|
|
pevent_event_info(&seq, evsel->tp_format, &rec);
|
|
|
|
}
|
2015-12-22 17:07:04 +00:00
|
|
|
return seq.buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
|
|
|
|
evsel = hists_to_evsel(left->hists);
|
|
|
|
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (left->trace_output == NULL)
|
|
|
|
left->trace_output = get_trace_output(left);
|
|
|
|
if (right->trace_output == NULL)
|
|
|
|
right->trace_output = get_trace_output(right);
|
|
|
|
|
|
|
|
hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
|
|
|
|
hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
|
|
|
|
|
|
|
|
return strcmp(right->trace_output, left->trace_output);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
|
|
|
|
evsel = hists_to_evsel(he->hists);
|
|
|
|
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
|
|
|
|
return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
|
|
|
|
|
|
|
|
if (he->trace_output == NULL)
|
|
|
|
he->trace_output = get_trace_output(he);
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_trace = {
|
|
|
|
.se_header = "Trace output",
|
|
|
|
.se_cmp = sort__trace_cmp,
|
|
|
|
.se_snprintf = hist_entry__trace_snprintf,
|
|
|
|
.se_width_idx = HISTC_TRACE,
|
|
|
|
};
|
|
|
|
|
2012-12-27 09:11:38 +00:00
|
|
|
/* sort keys for branch stacks */
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
static int64_t
|
|
|
|
sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 14:07:07 +00:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
return _sort__dso_cmp(left->branch_info->from.map,
|
|
|
|
right->branch_info->from.map);
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
|
2012-02-09 22:21:01 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 14:07:07 +00:00
|
|
|
if (he->branch_info)
|
|
|
|
return _hist_entry__dso_snprintf(he->branch_info->from.map,
|
|
|
|
bf, size, width);
|
|
|
|
else
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 14:07:06 +00:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
return _sort__dso_cmp(left->branch_info->to.map,
|
|
|
|
right->branch_info->to.map);
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
|
2012-02-09 22:21:01 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 14:07:06 +00:00
|
|
|
if (he->branch_info)
|
|
|
|
return _hist_entry__dso_snprintf(he->branch_info->to.map,
|
|
|
|
bf, size, width);
|
|
|
|
else
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct addr_map_symbol *from_l = &left->branch_info->from;
|
|
|
|
struct addr_map_symbol *from_r = &right->branch_info->from;
|
|
|
|
|
2014-10-16 14:07:05 +00:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
|
|
|
from_l = &left->branch_info->from;
|
|
|
|
from_r = &right->branch_info->from;
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
if (!from_l->sym && !from_r->sym)
|
2013-12-18 05:21:09 +00:00
|
|
|
return _sort__addr_cmp(from_l->addr, from_r->addr);
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2013-02-06 05:57:15 +00:00
|
|
|
return _sort__sym_cmp(from_l->sym, from_r->sym);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 14:07:04 +00:00
|
|
|
struct addr_map_symbol *to_l, *to_r;
|
|
|
|
|
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
|
|
|
to_l = &left->branch_info->to;
|
|
|
|
to_r = &right->branch_info->to;
|
2012-02-09 22:21:01 +00:00
|
|
|
|
|
|
|
if (!to_l->sym && !to_r->sym)
|
2013-12-18 05:21:09 +00:00
|
|
|
return _sort__addr_cmp(to_l->addr, to_r->addr);
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2013-02-06 05:57:15 +00:00
|
|
|
return _sort__sym_cmp(to_l->sym, to_r->sym);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 09:11:39 +00:00
|
|
|
size_t size, unsigned int width)
|
2012-02-09 22:21:01 +00:00
|
|
|
{
|
2014-10-16 14:07:05 +00:00
|
|
|
if (he->branch_info) {
|
|
|
|
struct addr_map_symbol *from = &he->branch_info->from;
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2014-10-16 14:07:05 +00:00
|
|
|
return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
|
|
|
|
he->level, bf, size, width);
|
|
|
|
}
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 09:11:39 +00:00
|
|
|
size_t size, unsigned int width)
|
2012-02-09 22:21:01 +00:00
|
|
|
{
|
2014-10-16 14:07:04 +00:00
|
|
|
if (he->branch_info) {
|
|
|
|
struct addr_map_symbol *to = &he->branch_info->to;
|
|
|
|
|
|
|
|
return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
|
|
|
|
he->level, bf, size, width);
|
|
|
|
}
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2014-10-16 14:07:04 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
2012-12-27 09:11:38 +00:00
|
|
|
struct sort_entry sort_dso_from = {
|
|
|
|
.se_header = "Source Shared Object",
|
|
|
|
.se_cmp = sort__dso_from_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_from_snprintf,
|
|
|
|
.se_width_idx = HISTC_DSO_FROM,
|
|
|
|
};
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
struct sort_entry sort_dso_to = {
|
|
|
|
.se_header = "Target Shared Object",
|
|
|
|
.se_cmp = sort__dso_to_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_to_snprintf,
|
|
|
|
.se_width_idx = HISTC_DSO_TO,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_sym_from = {
|
|
|
|
.se_header = "Source Symbol",
|
|
|
|
.se_cmp = sort__sym_from_cmp,
|
|
|
|
.se_snprintf = hist_entry__sym_from_snprintf,
|
|
|
|
.se_width_idx = HISTC_SYMBOL_FROM,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_sym_to = {
|
|
|
|
.se_header = "Target Symbol",
|
|
|
|
.se_cmp = sort__sym_to_cmp,
|
|
|
|
.se_snprintf = hist_entry__sym_to_snprintf,
|
|
|
|
.se_width_idx = HISTC_SYMBOL_TO,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 14:07:03 +00:00
|
|
|
unsigned char mp, p;
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2014-10-16 14:07:03 +00:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
|
|
|
mp = left->branch_info->flags.mispred != right->branch_info->flags.mispred;
|
|
|
|
p = left->branch_info->flags.predicted != right->branch_info->flags.predicted;
|
2012-02-09 22:21:01 +00:00
|
|
|
return mp || p;
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
|
2012-02-09 22:21:01 +00:00
|
|
|
size_t size, unsigned int width){
|
|
|
|
static const char *out = "N/A";
|
|
|
|
|
2014-10-16 14:07:03 +00:00
|
|
|
if (he->branch_info) {
|
|
|
|
if (he->branch_info->flags.predicted)
|
|
|
|
out = "N";
|
|
|
|
else if (he->branch_info->flags.mispred)
|
|
|
|
out = "Y";
|
|
|
|
}
|
2012-02-09 22:21:01 +00:00
|
|
|
|
2014-07-31 05:47:38 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
|
2012-02-09 22:21:01 +00:00
|
|
|
}
|
|
|
|
|
2015-07-18 15:24:46 +00:00
|
|
|
static int64_t
|
|
|
|
sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return left->branch_info->flags.cycles -
|
|
|
|
right->branch_info->flags.cycles;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
if (he->branch_info->flags.cycles == 0)
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, "-");
|
|
|
|
return repsep_snprintf(bf, size, "%-*hd", width,
|
|
|
|
he->branch_info->flags.cycles);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_cycles = {
|
|
|
|
.se_header = "Basic Block Cycles",
|
|
|
|
.se_cmp = sort__cycles_cmp,
|
|
|
|
.se_snprintf = hist_entry__cycles_snprintf,
|
|
|
|
.se_width_idx = HISTC_CYCLES,
|
|
|
|
};
|
|
|
|
|
2013-01-24 15:10:35 +00:00
|
|
|
/* --sort daddr_sym */
|
|
|
|
static int64_t
|
|
|
|
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
uint64_t l = 0, r = 0;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
l = left->mem_info->daddr.addr;
|
|
|
|
if (right->mem_info)
|
|
|
|
r = right->mem_info->daddr.addr;
|
|
|
|
|
|
|
|
return (int64_t)(r - l);
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:35 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
uint64_t addr = 0;
|
|
|
|
struct map *map = NULL;
|
|
|
|
struct symbol *sym = NULL;
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
if (he->mem_info) {
|
|
|
|
addr = he->mem_info->daddr.addr;
|
|
|
|
map = he->mem_info->daddr.map;
|
|
|
|
sym = he->mem_info->daddr.sym;
|
2013-01-24 15:10:35 +00:00
|
|
|
}
|
2013-10-22 22:01:31 +00:00
|
|
|
return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
|
2013-01-24 15:10:35 +00:00
|
|
|
width);
|
|
|
|
}
|
|
|
|
|
2015-10-05 18:06:07 +00:00
|
|
|
static int64_t
|
|
|
|
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
uint64_t l = 0, r = 0;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
l = left->mem_info->iaddr.addr;
|
|
|
|
if (right->mem_info)
|
|
|
|
r = right->mem_info->iaddr.addr;
|
|
|
|
|
|
|
|
return (int64_t)(r - l);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
uint64_t addr = 0;
|
|
|
|
struct map *map = NULL;
|
|
|
|
struct symbol *sym = NULL;
|
|
|
|
|
|
|
|
if (he->mem_info) {
|
|
|
|
addr = he->mem_info->iaddr.addr;
|
|
|
|
map = he->mem_info->iaddr.map;
|
|
|
|
sym = he->mem_info->iaddr.sym;
|
|
|
|
}
|
|
|
|
return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
|
|
|
|
width);
|
|
|
|
}
|
|
|
|
|
2013-01-24 15:10:35 +00:00
|
|
|
static int64_t
|
|
|
|
sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct map *map_l = NULL;
|
|
|
|
struct map *map_r = NULL;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
map_l = left->mem_info->daddr.map;
|
|
|
|
if (right->mem_info)
|
|
|
|
map_r = right->mem_info->daddr.map;
|
|
|
|
|
|
|
|
return _sort__dso_cmp(map_l, map_r);
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:35 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
struct map *map = NULL;
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
if (he->mem_info)
|
|
|
|
map = he->mem_info->daddr.map;
|
2013-01-24 15:10:35 +00:00
|
|
|
|
|
|
|
return _hist_entry__dso_snprintf(map, bf, size, width);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_lock = PERF_MEM_LOCK_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_lock = PERF_MEM_LOCK_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:35 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
const char *out;
|
|
|
|
u64 mask = PERF_MEM_LOCK_NA;
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
if (he->mem_info)
|
|
|
|
mask = he->mem_info->data_src.mem_lock;
|
2013-01-24 15:10:35 +00:00
|
|
|
|
|
|
|
if (mask & PERF_MEM_LOCK_NA)
|
|
|
|
out = "N/A";
|
|
|
|
else if (mask & PERF_MEM_LOCK_LOCKED)
|
|
|
|
out = "Yes";
|
|
|
|
else
|
|
|
|
out = "No";
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const tlb_access[] = {
|
|
|
|
"N/A",
|
|
|
|
"HIT",
|
|
|
|
"MISS",
|
|
|
|
"L1",
|
|
|
|
"L2",
|
|
|
|
"Walker",
|
|
|
|
"Fault",
|
|
|
|
};
|
|
|
|
#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:35 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
char out[64];
|
|
|
|
size_t sz = sizeof(out) - 1; /* -1 for null termination */
|
|
|
|
size_t l = 0, i;
|
|
|
|
u64 m = PERF_MEM_TLB_NA;
|
|
|
|
u64 hit, miss;
|
|
|
|
|
|
|
|
out[0] = '\0';
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
if (he->mem_info)
|
|
|
|
m = he->mem_info->data_src.mem_dtlb;
|
2013-01-24 15:10:35 +00:00
|
|
|
|
|
|
|
hit = m & PERF_MEM_TLB_HIT;
|
|
|
|
miss = m & PERF_MEM_TLB_MISS;
|
|
|
|
|
|
|
|
/* already taken care of */
|
|
|
|
m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
|
|
|
|
|
|
|
|
for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
|
|
|
|
if (!(m & 0x1))
|
|
|
|
continue;
|
|
|
|
if (l) {
|
|
|
|
strcat(out, " or ");
|
|
|
|
l += 4;
|
|
|
|
}
|
|
|
|
strncat(out, tlb_access[i], sz - l);
|
|
|
|
l += strlen(tlb_access[i]);
|
|
|
|
}
|
|
|
|
if (*out == '\0')
|
|
|
|
strcpy(out, "N/A");
|
|
|
|
if (hit)
|
|
|
|
strncat(out, " hit", sz - l);
|
|
|
|
if (miss)
|
|
|
|
strncat(out, " miss", sz - l);
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_lvl = PERF_MEM_LVL_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_lvl = PERF_MEM_LVL_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const mem_lvl[] = {
|
|
|
|
"N/A",
|
|
|
|
"HIT",
|
|
|
|
"MISS",
|
|
|
|
"L1",
|
|
|
|
"LFB",
|
|
|
|
"L2",
|
|
|
|
"L3",
|
|
|
|
"Local RAM",
|
|
|
|
"Remote RAM (1 hop)",
|
|
|
|
"Remote RAM (2 hops)",
|
|
|
|
"Remote Cache (1 hop)",
|
|
|
|
"Remote Cache (2 hops)",
|
|
|
|
"I/O",
|
|
|
|
"Uncached",
|
|
|
|
};
|
|
|
|
#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:35 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
char out[64];
|
|
|
|
size_t sz = sizeof(out) - 1; /* -1 for null termination */
|
|
|
|
size_t i, l = 0;
|
|
|
|
u64 m = PERF_MEM_LVL_NA;
|
|
|
|
u64 hit, miss;
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
if (he->mem_info)
|
|
|
|
m = he->mem_info->data_src.mem_lvl;
|
2013-01-24 15:10:35 +00:00
|
|
|
|
|
|
|
out[0] = '\0';
|
|
|
|
|
|
|
|
hit = m & PERF_MEM_LVL_HIT;
|
|
|
|
miss = m & PERF_MEM_LVL_MISS;
|
|
|
|
|
|
|
|
/* already taken care of */
|
|
|
|
m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
|
|
|
|
|
|
|
|
for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
|
|
|
|
if (!(m & 0x1))
|
|
|
|
continue;
|
|
|
|
if (l) {
|
|
|
|
strcat(out, " or ");
|
|
|
|
l += 4;
|
|
|
|
}
|
|
|
|
strncat(out, mem_lvl[i], sz - l);
|
|
|
|
l += strlen(mem_lvl[i]);
|
|
|
|
}
|
|
|
|
if (*out == '\0')
|
|
|
|
strcpy(out, "N/A");
|
|
|
|
if (hit)
|
|
|
|
strncat(out, " hit", sz - l);
|
|
|
|
if (miss)
|
|
|
|
strncat(out, " miss", sz - l);
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const snoop_access[] = {
|
|
|
|
"N/A",
|
|
|
|
"None",
|
|
|
|
"Miss",
|
|
|
|
"Hit",
|
|
|
|
"HitM",
|
|
|
|
};
|
|
|
|
#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:35 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
char out[64];
|
|
|
|
size_t sz = sizeof(out) - 1; /* -1 for null termination */
|
|
|
|
size_t i, l = 0;
|
|
|
|
u64 m = PERF_MEM_SNOOP_NA;
|
|
|
|
|
|
|
|
out[0] = '\0';
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
if (he->mem_info)
|
|
|
|
m = he->mem_info->data_src.mem_snoop;
|
2013-01-24 15:10:35 +00:00
|
|
|
|
|
|
|
for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
|
|
|
|
if (!(m & 0x1))
|
|
|
|
continue;
|
|
|
|
if (l) {
|
|
|
|
strcat(out, " or ");
|
|
|
|
l += 4;
|
|
|
|
}
|
|
|
|
strncat(out, snoop_access[i], sz - l);
|
|
|
|
l += strlen(snoop_access[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*out == '\0')
|
|
|
|
strcpy(out, "N/A");
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
2014-06-01 13:38:29 +00:00
|
|
|
static inline u64 cl_address(u64 address)
|
|
|
|
{
|
|
|
|
/* return the cacheline of the address */
|
|
|
|
return (address & ~(cacheline_size - 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
u64 l, r;
|
|
|
|
struct map *l_map, *r_map;
|
|
|
|
|
|
|
|
if (!left->mem_info) return -1;
|
|
|
|
if (!right->mem_info) return 1;
|
|
|
|
|
|
|
|
/* group event types together */
|
|
|
|
if (left->cpumode > right->cpumode) return -1;
|
|
|
|
if (left->cpumode < right->cpumode) return 1;
|
|
|
|
|
|
|
|
l_map = left->mem_info->daddr.map;
|
|
|
|
r_map = right->mem_info->daddr.map;
|
|
|
|
|
|
|
|
/* if both are NULL, jump to sort on al_addr instead */
|
|
|
|
if (!l_map && !r_map)
|
|
|
|
goto addr;
|
|
|
|
|
|
|
|
if (!l_map) return -1;
|
|
|
|
if (!r_map) return 1;
|
|
|
|
|
|
|
|
if (l_map->maj > r_map->maj) return -1;
|
|
|
|
if (l_map->maj < r_map->maj) return 1;
|
|
|
|
|
|
|
|
if (l_map->min > r_map->min) return -1;
|
|
|
|
if (l_map->min < r_map->min) return 1;
|
|
|
|
|
|
|
|
if (l_map->ino > r_map->ino) return -1;
|
|
|
|
if (l_map->ino < r_map->ino) return 1;
|
|
|
|
|
|
|
|
if (l_map->ino_generation > r_map->ino_generation) return -1;
|
|
|
|
if (l_map->ino_generation < r_map->ino_generation) return 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Addresses with no major/minor numbers are assumed to be
|
|
|
|
* anonymous in userspace. Sort those on pid then address.
|
|
|
|
*
|
|
|
|
* The kernel and non-zero major/minor mapped areas are
|
|
|
|
* assumed to be unity mapped. Sort those on address.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
|
|
|
|
(!(l_map->flags & MAP_SHARED)) &&
|
|
|
|
!l_map->maj && !l_map->min && !l_map->ino &&
|
|
|
|
!l_map->ino_generation) {
|
|
|
|
/* userspace anonymous */
|
|
|
|
|
|
|
|
if (left->thread->pid_ > right->thread->pid_) return -1;
|
|
|
|
if (left->thread->pid_ < right->thread->pid_) return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
addr:
|
|
|
|
/* al_addr does all the right addr - start + offset calculations */
|
|
|
|
l = cl_address(left->mem_info->daddr.al_addr);
|
|
|
|
r = cl_address(right->mem_info->daddr.al_addr);
|
|
|
|
|
|
|
|
if (l > r) return -1;
|
|
|
|
if (l < r) return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
|
|
|
|
uint64_t addr = 0;
|
|
|
|
struct map *map = NULL;
|
|
|
|
struct symbol *sym = NULL;
|
|
|
|
char level = he->level;
|
|
|
|
|
|
|
|
if (he->mem_info) {
|
|
|
|
addr = cl_address(he->mem_info->daddr.al_addr);
|
|
|
|
map = he->mem_info->daddr.map;
|
|
|
|
sym = he->mem_info->daddr.sym;
|
|
|
|
|
|
|
|
/* print [s] for shared data mmaps */
|
|
|
|
if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
|
|
|
|
map && (map->type == MAP__VARIABLE) &&
|
|
|
|
(map->flags & MAP_SHARED) &&
|
|
|
|
(map->maj || map->min || map->ino ||
|
|
|
|
map->ino_generation))
|
|
|
|
level = 's';
|
|
|
|
else if (!map)
|
|
|
|
level = 'X';
|
|
|
|
}
|
|
|
|
return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
|
|
|
|
width);
|
|
|
|
}
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
struct sort_entry sort_mispredict = {
|
|
|
|
.se_header = "Branch Mispredicted",
|
|
|
|
.se_cmp = sort__mispredict_cmp,
|
|
|
|
.se_snprintf = hist_entry__mispredict_snprintf,
|
|
|
|
.se_width_idx = HISTC_MISPREDICT,
|
|
|
|
};
|
|
|
|
|
2013-01-24 15:10:29 +00:00
|
|
|
static u64 he_weight(struct hist_entry *he)
|
|
|
|
{
|
|
|
|
return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return he_weight(left) - he_weight(right);
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:29 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-22 22:01:31 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
|
2013-01-24 15:10:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_local_weight = {
|
|
|
|
.se_header = "Local Weight",
|
|
|
|
.se_cmp = sort__local_weight_cmp,
|
|
|
|
.se_snprintf = hist_entry__local_weight_snprintf,
|
|
|
|
.se_width_idx = HISTC_LOCAL_WEIGHT,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return left->stat.weight - right->stat.weight;
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 15:10:29 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-22 22:01:31 +00:00
|
|
|
return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
|
2013-01-24 15:10:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_global_weight = {
|
|
|
|
.se_header = "Weight",
|
|
|
|
.se_cmp = sort__global_weight_cmp,
|
|
|
|
.se_snprintf = hist_entry__global_weight_snprintf,
|
|
|
|
.se_width_idx = HISTC_GLOBAL_WEIGHT,
|
|
|
|
};
|
|
|
|
|
2013-01-24 15:10:35 +00:00
|
|
|
struct sort_entry sort_mem_daddr_sym = {
|
|
|
|
.se_header = "Data Symbol",
|
|
|
|
.se_cmp = sort__daddr_cmp,
|
|
|
|
.se_snprintf = hist_entry__daddr_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_DADDR_SYMBOL,
|
|
|
|
};
|
|
|
|
|
2015-10-05 18:06:07 +00:00
|
|
|
struct sort_entry sort_mem_iaddr_sym = {
|
|
|
|
.se_header = "Code Symbol",
|
|
|
|
.se_cmp = sort__iaddr_cmp,
|
|
|
|
.se_snprintf = hist_entry__iaddr_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_IADDR_SYMBOL,
|
|
|
|
};
|
|
|
|
|
2013-01-24 15:10:35 +00:00
|
|
|
struct sort_entry sort_mem_daddr_dso = {
|
|
|
|
.se_header = "Data Object",
|
|
|
|
.se_cmp = sort__dso_daddr_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_daddr_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_DADDR_SYMBOL,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_locked = {
|
|
|
|
.se_header = "Locked",
|
|
|
|
.se_cmp = sort__locked_cmp,
|
|
|
|
.se_snprintf = hist_entry__locked_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_LOCKED,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_tlb = {
|
|
|
|
.se_header = "TLB access",
|
|
|
|
.se_cmp = sort__tlb_cmp,
|
|
|
|
.se_snprintf = hist_entry__tlb_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_TLB,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_lvl = {
|
|
|
|
.se_header = "Memory access",
|
|
|
|
.se_cmp = sort__lvl_cmp,
|
|
|
|
.se_snprintf = hist_entry__lvl_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_LVL,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_snoop = {
|
|
|
|
.se_header = "Snoop",
|
|
|
|
.se_cmp = sort__snoop_cmp,
|
|
|
|
.se_snprintf = hist_entry__snoop_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_SNOOP,
|
|
|
|
};
|
|
|
|
|
2014-06-01 13:38:29 +00:00
|
|
|
struct sort_entry sort_mem_dcacheline = {
|
|
|
|
.se_header = "Data Cacheline",
|
|
|
|
.se_cmp = sort__dcacheline_cmp,
|
|
|
|
.se_snprintf = hist_entry__dcacheline_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_DCACHELINE,
|
|
|
|
};
|
|
|
|
|
2013-09-20 14:40:41 +00:00
|
|
|
static int64_t
|
|
|
|
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 14:07:01 +00:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2013-09-20 14:40:41 +00:00
|
|
|
return left->branch_info->flags.abort !=
|
|
|
|
right->branch_info->flags.abort;
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
|
2013-09-20 14:40:41 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 14:07:01 +00:00
|
|
|
static const char *out = "N/A";
|
|
|
|
|
|
|
|
if (he->branch_info) {
|
|
|
|
if (he->branch_info->flags.abort)
|
|
|
|
out = "A";
|
|
|
|
else
|
|
|
|
out = ".";
|
|
|
|
}
|
2013-09-20 14:40:41 +00:00
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_abort = {
|
|
|
|
.se_header = "Transaction abort",
|
|
|
|
.se_cmp = sort__abort_cmp,
|
|
|
|
.se_snprintf = hist_entry__abort_snprintf,
|
|
|
|
.se_width_idx = HISTC_ABORT,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 14:07:02 +00:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2013-09-20 14:40:41 +00:00
|
|
|
return left->branch_info->flags.in_tx !=
|
|
|
|
right->branch_info->flags.in_tx;
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
|
2013-09-20 14:40:41 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 14:07:02 +00:00
|
|
|
static const char *out = "N/A";
|
2013-09-20 14:40:41 +00:00
|
|
|
|
2014-10-16 14:07:02 +00:00
|
|
|
if (he->branch_info) {
|
|
|
|
if (he->branch_info->flags.in_tx)
|
|
|
|
out = "T";
|
|
|
|
else
|
|
|
|
out = ".";
|
|
|
|
}
|
2013-09-20 14:40:41 +00:00
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_in_tx = {
|
|
|
|
.se_header = "Branch in transaction",
|
|
|
|
.se_cmp = sort__in_tx_cmp,
|
|
|
|
.se_snprintf = hist_entry__in_tx_snprintf,
|
|
|
|
.se_width_idx = HISTC_IN_TX,
|
|
|
|
};
|
|
|
|
|
2013-09-20 14:40:43 +00:00
|
|
|
static int64_t
|
|
|
|
sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return left->transaction - right->transaction;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline char *add_str(char *p, const char *str)
|
|
|
|
{
|
|
|
|
strcpy(p, str);
|
|
|
|
return p + strlen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct txbit {
|
|
|
|
unsigned flag;
|
|
|
|
const char *name;
|
|
|
|
int skip_for_len;
|
|
|
|
} txbits[] = {
|
|
|
|
{ PERF_TXN_ELISION, "EL ", 0 },
|
|
|
|
{ PERF_TXN_TRANSACTION, "TX ", 1 },
|
|
|
|
{ PERF_TXN_SYNC, "SYNC ", 1 },
|
|
|
|
{ PERF_TXN_ASYNC, "ASYNC ", 0 },
|
|
|
|
{ PERF_TXN_RETRY, "RETRY ", 0 },
|
|
|
|
{ PERF_TXN_CONFLICT, "CON ", 0 },
|
|
|
|
{ PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
|
|
|
|
{ PERF_TXN_CAPACITY_READ, "CAP-READ ", 0 },
|
|
|
|
{ 0, NULL, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
int hist_entry__transaction_len(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int len = 0;
|
|
|
|
|
|
|
|
for (i = 0; txbits[i].name; i++) {
|
|
|
|
if (!txbits[i].skip_for_len)
|
|
|
|
len += strlen(txbits[i].name);
|
|
|
|
}
|
|
|
|
len += 4; /* :XX<space> */
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2013-10-22 22:01:31 +00:00
|
|
|
static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
|
2013-09-20 14:40:43 +00:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-22 22:01:31 +00:00
|
|
|
u64 t = he->transaction;
|
2013-09-20 14:40:43 +00:00
|
|
|
char buf[128];
|
|
|
|
char *p = buf;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
buf[0] = 0;
|
|
|
|
for (i = 0; txbits[i].name; i++)
|
|
|
|
if (txbits[i].flag & t)
|
|
|
|
p = add_str(p, txbits[i].name);
|
|
|
|
if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
|
|
|
|
p = add_str(p, "NEITHER ");
|
|
|
|
if (t & PERF_TXN_ABORT_MASK) {
|
|
|
|
sprintf(p, ":%" PRIx64,
|
|
|
|
(t & PERF_TXN_ABORT_MASK) >>
|
|
|
|
PERF_TXN_ABORT_SHIFT);
|
|
|
|
p += strlen(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_transaction = {
|
|
|
|
.se_header = "Transaction ",
|
|
|
|
.se_cmp = sort__transaction_cmp,
|
|
|
|
.se_snprintf = hist_entry__transaction_snprintf,
|
|
|
|
.se_width_idx = HISTC_TRANSACTION,
|
|
|
|
};
|
|
|
|
|
2011-06-29 01:14:52 +00:00
|
|
|
struct sort_dimension {
|
|
|
|
const char *name;
|
|
|
|
struct sort_entry *entry;
|
|
|
|
int taken;
|
|
|
|
};
|
|
|
|
|
2012-02-09 22:21:01 +00:00
|
|
|
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
|
|
|
|
|
2012-12-27 09:11:46 +00:00
|
|
|
static struct sort_dimension common_sort_dimensions[] = {
|
2012-02-09 22:21:01 +00:00
|
|
|
DIM(SORT_PID, "pid", sort_thread),
|
|
|
|
DIM(SORT_COMM, "comm", sort_comm),
|
|
|
|
DIM(SORT_DSO, "dso", sort_dso),
|
|
|
|
DIM(SORT_SYM, "symbol", sort_sym),
|
|
|
|
DIM(SORT_PARENT, "parent", sort_parent),
|
|
|
|
DIM(SORT_CPU, "cpu", sort_cpu),
|
2015-09-04 14:45:43 +00:00
|
|
|
DIM(SORT_SOCKET, "socket", sort_socket),
|
2012-05-30 13:33:24 +00:00
|
|
|
DIM(SORT_SRCLINE, "srcline", sort_srcline),
|
2015-08-07 22:54:24 +00:00
|
|
|
DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
|
2013-07-18 22:58:53 +00:00
|
|
|
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
|
|
|
|
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
|
2013-09-20 14:40:43 +00:00
|
|
|
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
|
2015-12-22 17:07:04 +00:00
|
|
|
DIM(SORT_TRACE, "trace", sort_trace),
|
2011-06-29 01:14:52 +00:00
|
|
|
};
|
|
|
|
|
2012-12-27 09:11:46 +00:00
|
|
|
#undef DIM
|
|
|
|
|
|
|
|
#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
|
|
|
|
|
|
|
|
static struct sort_dimension bstack_sort_dimensions[] = {
|
|
|
|
DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
|
|
|
|
DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
|
|
|
|
DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
|
|
|
|
DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
|
|
|
|
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
|
2013-09-20 14:40:41 +00:00
|
|
|
DIM(SORT_IN_TX, "in_tx", sort_in_tx),
|
|
|
|
DIM(SORT_ABORT, "abort", sort_abort),
|
2015-07-18 15:24:46 +00:00
|
|
|
DIM(SORT_CYCLES, "cycles", sort_cycles),
|
2012-12-27 09:11:46 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#undef DIM
|
|
|
|
|
2013-04-03 12:26:11 +00:00
|
|
|
#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
|
|
|
|
|
|
|
|
static struct sort_dimension memory_sort_dimensions[] = {
|
|
|
|
DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
|
2015-10-05 18:06:07 +00:00
|
|
|
DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym),
|
2013-04-03 12:26:11 +00:00
|
|
|
DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
|
|
|
|
DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
|
|
|
|
DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
|
|
|
|
DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
|
|
|
|
DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
|
2014-06-01 13:38:29 +00:00
|
|
|
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
|
2013-04-03 12:26:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#undef DIM
|
|
|
|
|
2014-03-04 00:06:42 +00:00
|
|
|
struct hpp_dimension {
|
|
|
|
const char *name;
|
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
int taken;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
|
|
|
|
|
|
|
|
static struct hpp_dimension hpp_sort_dimensions[] = {
|
|
|
|
DIM(PERF_HPP__OVERHEAD, "overhead"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
|
2013-10-30 07:06:59 +00:00
|
|
|
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
|
2014-03-04 00:06:42 +00:00
|
|
|
DIM(PERF_HPP__SAMPLES, "sample"),
|
|
|
|
DIM(PERF_HPP__PERIOD, "period"),
|
|
|
|
};
|
|
|
|
|
|
|
|
#undef DIM
|
|
|
|
|
2014-03-03 02:46:55 +00:00
|
|
|
struct hpp_sort_entry {
|
|
|
|
struct perf_hpp_fmt hpp;
|
|
|
|
struct sort_entry *se;
|
|
|
|
};
|
|
|
|
|
2014-07-31 05:47:37 +00:00
|
|
|
void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
|
2014-03-20 02:18:54 +00:00
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
return;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
2014-07-31 05:47:40 +00:00
|
|
|
hists__new_col_len(hists, hse->se->se_width_idx, strlen(fmt->name));
|
2014-03-20 02:18:54 +00:00
|
|
|
}
|
|
|
|
|
2014-03-03 02:46:55 +00:00
|
|
|
static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct perf_evsel *evsel)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-07-31 05:47:38 +00:00
|
|
|
size_t len = fmt->user_len;
|
2014-03-03 02:46:55 +00:00
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
|
2014-07-31 05:47:38 +00:00
|
|
|
if (!len)
|
2014-10-09 16:13:41 +00:00
|
|
|
len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
|
2014-07-31 05:47:38 +00:00
|
|
|
|
2014-07-31 05:47:40 +00:00
|
|
|
return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
|
2014-03-03 02:46:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
|
|
|
|
struct perf_hpp *hpp __maybe_unused,
|
|
|
|
struct perf_evsel *evsel)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-07-31 05:47:38 +00:00
|
|
|
size_t len = fmt->user_len;
|
2014-03-03 02:46:55 +00:00
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
|
2014-07-31 05:47:38 +00:00
|
|
|
if (!len)
|
2014-10-09 16:13:41 +00:00
|
|
|
len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
|
2014-07-31 05:47:38 +00:00
|
|
|
|
|
|
|
return len;
|
2014-03-03 02:46:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct hist_entry *he)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-07-31 05:47:38 +00:00
|
|
|
size_t len = fmt->user_len;
|
2014-03-03 02:46:55 +00:00
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
2014-07-31 05:47:38 +00:00
|
|
|
|
|
|
|
if (!len)
|
|
|
|
len = hists__col_len(he->hists, hse->se->se_width_idx);
|
2014-03-03 02:46:55 +00:00
|
|
|
|
|
|
|
return hse->se->se_snprintf(he, hpp->buf, hpp->size, len);
|
|
|
|
}
|
|
|
|
|
2015-01-08 00:45:46 +00:00
|
|
|
static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
return hse->se->se_cmp(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp;
|
|
|
|
return collapse_fn(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *);
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
sort_fn = hse->se->se_sort ?: hse->se->se_cmp;
|
|
|
|
return sort_fn(a, b);
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:03 +00:00
|
|
|
bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
|
|
|
|
{
|
|
|
|
return format->header == __sort__hpp_header;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse_a;
|
|
|
|
struct hpp_sort_entry *hse_b;
|
|
|
|
|
|
|
|
if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
hse_a = container_of(a, struct hpp_sort_entry, hpp);
|
|
|
|
hse_b = container_of(b, struct hpp_sort_entry, hpp);
|
|
|
|
|
|
|
|
return hse_a->se == hse_b->se;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:09 +00:00
|
|
|
static void hse_free(struct perf_hpp_fmt *fmt)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
free(hse);
|
|
|
|
}
|
|
|
|
|
2014-03-04 01:46:34 +00:00
|
|
|
static struct hpp_sort_entry *
|
|
|
|
__sort_dimension__alloc_hpp(struct sort_dimension *sd)
|
2014-03-03 02:46:55 +00:00
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
hse = malloc(sizeof(*hse));
|
|
|
|
if (hse == NULL) {
|
|
|
|
pr_err("Memory allocation failed\n");
|
2014-03-04 01:46:34 +00:00
|
|
|
return NULL;
|
2014-03-03 02:46:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
hse->se = sd->entry;
|
2014-07-31 05:47:40 +00:00
|
|
|
hse->hpp.name = sd->entry->se_header;
|
2014-03-03 02:46:55 +00:00
|
|
|
hse->hpp.header = __sort__hpp_header;
|
|
|
|
hse->hpp.width = __sort__hpp_width;
|
|
|
|
hse->hpp.entry = __sort__hpp_entry;
|
|
|
|
hse->hpp.color = NULL;
|
|
|
|
|
2015-01-08 00:45:46 +00:00
|
|
|
hse->hpp.cmp = __sort__hpp_cmp;
|
|
|
|
hse->hpp.collapse = __sort__hpp_collapse;
|
|
|
|
hse->hpp.sort = __sort__hpp_sort;
|
2016-01-18 09:24:03 +00:00
|
|
|
hse->hpp.equal = __sort__hpp_equal;
|
2016-01-18 09:24:09 +00:00
|
|
|
hse->hpp.free = hse_free;
|
2014-03-03 02:46:55 +00:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&hse->hpp.list);
|
|
|
|
INIT_LIST_HEAD(&hse->hpp.sort_list);
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
hse->hpp.elide = false;
|
2014-07-31 05:47:37 +00:00
|
|
|
hse->hpp.len = 0;
|
2014-07-31 05:47:38 +00:00
|
|
|
hse->hpp.user_len = 0;
|
2014-03-03 02:46:55 +00:00
|
|
|
|
2014-03-04 01:46:34 +00:00
|
|
|
return hse;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:09 +00:00
|
|
|
static void hpp_free(struct perf_hpp_fmt *fmt)
|
|
|
|
{
|
|
|
|
free(fmt);
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:07 +00:00
|
|
|
static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd)
|
|
|
|
{
|
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
|
|
|
|
fmt = memdup(hd->fmt, sizeof(*fmt));
|
|
|
|
if (fmt) {
|
|
|
|
INIT_LIST_HEAD(&fmt->list);
|
|
|
|
INIT_LIST_HEAD(&fmt->sort_list);
|
2016-01-18 09:24:09 +00:00
|
|
|
fmt->free = hpp_free;
|
2016-01-18 09:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fmt;
|
|
|
|
}
|
|
|
|
|
2014-03-04 01:46:34 +00:00
|
|
|
static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
|
|
|
|
|
|
|
|
if (hse == NULL)
|
|
|
|
return -1;
|
|
|
|
|
2014-03-03 02:46:55 +00:00
|
|
|
perf_hpp__register_sort_field(&hse->hpp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list,
|
|
|
|
struct sort_dimension *sd)
|
2014-03-04 01:46:34 +00:00
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
|
|
|
|
|
|
|
|
if (hse == NULL)
|
|
|
|
return -1;
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
perf_hpp_list__column_register(list, &hse->hpp);
|
2014-03-04 01:46:34 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
struct hpp_dynamic_entry {
|
|
|
|
struct perf_hpp_fmt hpp;
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
struct format_field *field;
|
|
|
|
unsigned dynamic_len;
|
2015-12-22 17:07:05 +00:00
|
|
|
bool raw_trace;
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static int hde_width(struct hpp_dynamic_entry *hde)
|
|
|
|
{
|
|
|
|
if (!hde->hpp.len) {
|
|
|
|
int len = hde->dynamic_len;
|
|
|
|
int namelen = strlen(hde->field->name);
|
|
|
|
int fieldlen = hde->field->size;
|
|
|
|
|
|
|
|
if (namelen > len)
|
|
|
|
len = namelen;
|
|
|
|
|
|
|
|
if (!(hde->field->flags & FIELD_IS_STRING)) {
|
|
|
|
/* length for print hex numbers */
|
|
|
|
fieldlen = hde->field->size * 2 + 2;
|
|
|
|
}
|
|
|
|
if (fieldlen > len)
|
|
|
|
len = fieldlen;
|
|
|
|
|
|
|
|
hde->hpp.len = len;
|
|
|
|
}
|
|
|
|
return hde->hpp.len;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:03 +00:00
|
|
|
static void update_dynamic_len(struct hpp_dynamic_entry *hde,
|
|
|
|
struct hist_entry *he)
|
|
|
|
{
|
|
|
|
char *str, *pos;
|
|
|
|
struct format_field *field = hde->field;
|
|
|
|
size_t namelen;
|
|
|
|
bool last = false;
|
|
|
|
|
2015-12-22 17:07:05 +00:00
|
|
|
if (hde->raw_trace)
|
|
|
|
return;
|
|
|
|
|
2015-12-22 17:07:03 +00:00
|
|
|
/* parse pretty print result and update max length */
|
|
|
|
if (!he->trace_output)
|
|
|
|
he->trace_output = get_trace_output(he);
|
|
|
|
|
|
|
|
namelen = strlen(field->name);
|
|
|
|
str = he->trace_output;
|
|
|
|
|
|
|
|
while (str) {
|
|
|
|
pos = strchr(str, ' ');
|
|
|
|
if (pos == NULL) {
|
|
|
|
last = true;
|
|
|
|
pos = str + strlen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strncmp(str, field->name, namelen)) {
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
str += namelen + 1;
|
|
|
|
len = pos - str;
|
|
|
|
|
|
|
|
if (len > hde->dynamic_len)
|
|
|
|
hde->dynamic_len = len;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last)
|
|
|
|
str = NULL;
|
|
|
|
else
|
|
|
|
str = pos + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct perf_evsel *evsel __maybe_unused)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
size_t len = fmt->user_len;
|
|
|
|
|
|
|
|
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
|
|
|
|
|
|
|
|
if (!len)
|
|
|
|
len = hde_width(hde);
|
|
|
|
|
|
|
|
return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __sort__hde_width(struct perf_hpp_fmt *fmt,
|
|
|
|
struct perf_hpp *hpp __maybe_unused,
|
|
|
|
struct perf_evsel *evsel __maybe_unused)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
size_t len = fmt->user_len;
|
|
|
|
|
|
|
|
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
|
|
|
|
|
|
|
|
if (!len)
|
|
|
|
len = hde_width(hde);
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:08 +00:00
|
|
|
bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
|
|
|
|
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
|
|
|
|
|
|
|
|
return hists_to_evsel(hists) == hde->evsel;
|
|
|
|
}
|
|
|
|
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct hist_entry *he)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
size_t len = fmt->user_len;
|
2015-12-22 17:07:03 +00:00
|
|
|
char *str, *pos;
|
|
|
|
struct format_field *field;
|
|
|
|
size_t namelen;
|
|
|
|
bool last = false;
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
|
|
|
|
|
|
|
|
if (!len)
|
|
|
|
len = hde_width(hde);
|
|
|
|
|
2015-12-22 17:07:05 +00:00
|
|
|
if (hde->raw_trace)
|
|
|
|
goto raw_field;
|
2015-12-22 17:07:03 +00:00
|
|
|
|
2015-12-22 17:07:05 +00:00
|
|
|
field = hde->field;
|
2015-12-22 17:07:03 +00:00
|
|
|
namelen = strlen(field->name);
|
|
|
|
str = he->trace_output;
|
|
|
|
|
|
|
|
while (str) {
|
|
|
|
pos = strchr(str, ' ');
|
|
|
|
if (pos == NULL) {
|
|
|
|
last = true;
|
|
|
|
pos = str + strlen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strncmp(str, field->name, namelen)) {
|
|
|
|
str += namelen + 1;
|
|
|
|
str = strndup(str, pos - str);
|
|
|
|
|
|
|
|
if (str == NULL)
|
|
|
|
return scnprintf(hpp->buf, hpp->size,
|
|
|
|
"%*.*s", len, len, "ERROR");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last)
|
|
|
|
str = NULL;
|
|
|
|
else
|
|
|
|
str = pos + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (str == NULL) {
|
|
|
|
struct trace_seq seq;
|
2015-12-22 17:07:05 +00:00
|
|
|
raw_field:
|
2015-12-22 17:07:03 +00:00
|
|
|
trace_seq_init(&seq);
|
|
|
|
pevent_print_field(&seq, he->raw_data, hde->field);
|
|
|
|
str = seq.buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
|
|
|
|
free(str);
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
struct format_field *field;
|
|
|
|
unsigned offset, size;
|
|
|
|
|
|
|
|
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
|
|
|
|
|
|
|
|
field = hde->field;
|
|
|
|
if (field->flags & FIELD_IS_DYNAMIC) {
|
|
|
|
unsigned long long dyn;
|
|
|
|
|
|
|
|
pevent_read_number_field(field, a->raw_data, &dyn);
|
|
|
|
offset = dyn & 0xffff;
|
|
|
|
size = (dyn >> 16) & 0xffff;
|
|
|
|
|
|
|
|
/* record max width for output */
|
|
|
|
if (size > hde->dynamic_len)
|
|
|
|
hde->dynamic_len = size;
|
|
|
|
} else {
|
|
|
|
offset = field->offset;
|
|
|
|
size = field->size;
|
2015-12-22 17:07:03 +00:00
|
|
|
|
|
|
|
update_dynamic_len(hde, a);
|
|
|
|
update_dynamic_len(hde, b);
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return memcmp(a->raw_data + offset, b->raw_data + offset, size);
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:08 +00:00
|
|
|
bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
|
|
|
|
{
|
|
|
|
return fmt->cmp == __sort__hde_cmp;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:09 +00:00
|
|
|
static void hde_free(struct perf_hpp_fmt *fmt)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
|
|
|
|
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
|
|
|
|
free(hde);
|
|
|
|
}
|
|
|
|
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
static struct hpp_dynamic_entry *
|
|
|
|
__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
|
|
|
|
hde = malloc(sizeof(*hde));
|
|
|
|
if (hde == NULL) {
|
|
|
|
pr_debug("Memory allocation failed\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
hde->evsel = evsel;
|
|
|
|
hde->field = field;
|
|
|
|
hde->dynamic_len = 0;
|
|
|
|
|
|
|
|
hde->hpp.name = field->name;
|
|
|
|
hde->hpp.header = __sort__hde_header;
|
|
|
|
hde->hpp.width = __sort__hde_width;
|
|
|
|
hde->hpp.entry = __sort__hde_entry;
|
|
|
|
hde->hpp.color = NULL;
|
|
|
|
|
|
|
|
hde->hpp.cmp = __sort__hde_cmp;
|
|
|
|
hde->hpp.collapse = __sort__hde_cmp;
|
|
|
|
hde->hpp.sort = __sort__hde_cmp;
|
2016-01-18 09:24:09 +00:00
|
|
|
hde->hpp.free = hde_free;
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&hde->hpp.list);
|
|
|
|
INIT_LIST_HEAD(&hde->hpp.sort_list);
|
|
|
|
hde->hpp.elide = false;
|
|
|
|
hde->hpp.len = 0;
|
|
|
|
hde->hpp.user_len = 0;
|
|
|
|
|
|
|
|
return hde;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:06 +00:00
|
|
|
static int parse_field_name(char *str, char **event, char **field, char **opt)
|
|
|
|
{
|
|
|
|
char *event_name, *field_name, *opt_name;
|
|
|
|
|
|
|
|
event_name = str;
|
|
|
|
field_name = strchr(str, '.');
|
|
|
|
|
|
|
|
if (field_name) {
|
|
|
|
*field_name++ = '\0';
|
|
|
|
} else {
|
|
|
|
event_name = NULL;
|
|
|
|
field_name = str;
|
|
|
|
}
|
|
|
|
|
|
|
|
opt_name = strchr(field_name, '/');
|
|
|
|
if (opt_name)
|
|
|
|
*opt_name++ = '\0';
|
|
|
|
|
|
|
|
*event = event_name;
|
|
|
|
*field = field_name;
|
|
|
|
*opt = opt_name;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find match evsel using a given event name. The event name can be:
|
2016-01-05 10:58:35 +00:00
|
|
|
* 1. '%' + event index (e.g. '%1' for first event)
|
|
|
|
* 2. full event name (e.g. sched:sched_switch)
|
|
|
|
* 3. partial event name (should not contain ':')
|
2015-12-22 17:07:06 +00:00
|
|
|
*/
|
|
|
|
static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_name)
|
|
|
|
{
|
|
|
|
struct perf_evsel *evsel = NULL;
|
|
|
|
struct perf_evsel *pos;
|
|
|
|
bool full_name;
|
|
|
|
|
|
|
|
/* case 1 */
|
|
|
|
if (event_name[0] == '%') {
|
|
|
|
int nr = strtol(event_name+1, NULL, 0);
|
|
|
|
|
|
|
|
if (nr > evlist->nr_entries)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
evsel = perf_evlist__first(evlist);
|
|
|
|
while (--nr > 0)
|
|
|
|
evsel = perf_evsel__next(evsel);
|
|
|
|
|
|
|
|
return evsel;
|
|
|
|
}
|
|
|
|
|
|
|
|
full_name = !!strchr(event_name, ':');
|
|
|
|
evlist__for_each(evlist, pos) {
|
2016-01-05 10:58:35 +00:00
|
|
|
/* case 2 */
|
2015-12-22 17:07:06 +00:00
|
|
|
if (full_name && !strcmp(pos->name, event_name))
|
|
|
|
return pos;
|
2016-01-05 10:58:35 +00:00
|
|
|
/* case 3 */
|
2015-12-22 17:07:06 +00:00
|
|
|
if (!full_name && strstr(pos->name, event_name)) {
|
|
|
|
if (evsel) {
|
|
|
|
pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
|
|
|
|
event_name, evsel->name, pos->name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
evsel = pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return evsel;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:07 +00:00
|
|
|
static int __dynamic_dimension__add(struct perf_evsel *evsel,
|
|
|
|
struct format_field *field,
|
|
|
|
bool raw_trace)
|
|
|
|
{
|
|
|
|
struct hpp_dynamic_entry *hde;
|
|
|
|
|
|
|
|
hde = __alloc_dynamic_entry(evsel, field);
|
|
|
|
if (hde == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
hde->raw_trace = raw_trace;
|
|
|
|
|
|
|
|
perf_hpp__register_sort_field(&hde->hpp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:09 +00:00
|
|
|
static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct format_field *field;
|
|
|
|
|
|
|
|
field = evsel->tp_format->format.fields;
|
|
|
|
while (field) {
|
|
|
|
ret = __dynamic_dimension__add(evsel, field, raw_trace);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
field = field->next;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
|
|
|
|
evlist__for_each(evlist, evsel) {
|
|
|
|
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = add_evsel_fields(evsel, raw_trace);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-05 10:58:35 +00:00
|
|
|
static int add_all_matching_fields(struct perf_evlist *evlist,
|
|
|
|
char *field_name, bool raw_trace)
|
|
|
|
{
|
|
|
|
int ret = -ESRCH;
|
|
|
|
struct perf_evsel *evsel;
|
|
|
|
struct format_field *field;
|
|
|
|
|
|
|
|
evlist__for_each(evlist, evsel) {
|
|
|
|
if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
field = pevent_find_any_field(evsel->tp_format, field_name);
|
|
|
|
if (field == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = __dynamic_dimension__add(evsel, field, raw_trace);
|
|
|
|
if (ret < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
|
|
|
|
{
|
2015-12-22 17:07:06 +00:00
|
|
|
char *str, *event_name, *field_name, *opt_name;
|
|
|
|
struct perf_evsel *evsel;
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
struct format_field *field;
|
2015-12-22 17:07:05 +00:00
|
|
|
bool raw_trace = symbol_conf.raw_trace;
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (evlist == NULL)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
str = strdup(tok);
|
|
|
|
if (str == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-12-22 17:07:06 +00:00
|
|
|
if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:06 +00:00
|
|
|
if (opt_name) {
|
|
|
|
if (strcmp(opt_name, "raw")) {
|
|
|
|
pr_debug("unsupported field option %s\n", opt_name);
|
2015-12-22 17:07:05 +00:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
raw_trace = true;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:09 +00:00
|
|
|
if (!strcmp(field_name, "trace_fields")) {
|
|
|
|
ret = add_all_dynamic_fields(evlist, raw_trace);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-01-05 10:58:35 +00:00
|
|
|
if (event_name == NULL) {
|
|
|
|
ret = add_all_matching_fields(evlist, field_name, raw_trace);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:06 +00:00
|
|
|
evsel = find_evsel(evlist, event_name);
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
if (evsel == NULL) {
|
|
|
|
pr_debug("Cannot find event: %s\n", event_name);
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
|
|
|
|
pr_debug("%s is not a tracepoint event\n", event_name);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:07 +00:00
|
|
|
if (!strcmp(field_name, "*")) {
|
2015-12-22 17:07:09 +00:00
|
|
|
ret = add_evsel_fields(evsel, raw_trace);
|
2015-12-22 17:07:07 +00:00
|
|
|
} else {
|
|
|
|
field = pevent_find_any_field(evsel->tp_format, field_name);
|
|
|
|
if (field == NULL) {
|
|
|
|
pr_debug("Cannot find event field for %s.%s\n",
|
|
|
|
event_name, field_name);
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = __dynamic_dimension__add(evsel, field, raw_trace);
|
|
|
|
}
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
free(str);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-05-19 05:19:30 +00:00
|
|
|
static int __sort_dimension__add(struct sort_dimension *sd)
|
2013-04-03 12:26:10 +00:00
|
|
|
{
|
|
|
|
if (sd->taken)
|
2014-03-03 02:46:55 +00:00
|
|
|
return 0;
|
|
|
|
|
2014-03-04 01:46:34 +00:00
|
|
|
if (__sort_dimension__add_hpp_sort(sd) < 0)
|
2014-03-03 02:46:55 +00:00
|
|
|
return -1;
|
2013-04-03 12:26:10 +00:00
|
|
|
|
|
|
|
if (sd->entry->se_collapse)
|
|
|
|
sort__need_collapse = 1;
|
|
|
|
|
|
|
|
sd->taken = 1;
|
2014-03-03 02:46:55 +00:00
|
|
|
|
|
|
|
return 0;
|
2013-04-03 12:26:10 +00:00
|
|
|
}
|
|
|
|
|
2014-03-04 00:06:42 +00:00
|
|
|
static int __hpp_dimension__add(struct hpp_dimension *hd)
|
|
|
|
{
|
2016-01-18 09:24:07 +00:00
|
|
|
struct perf_hpp_fmt *fmt;
|
2014-03-04 00:06:42 +00:00
|
|
|
|
2016-01-18 09:24:07 +00:00
|
|
|
if (hd->taken)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fmt = __hpp_dimension__alloc_hpp(hd);
|
|
|
|
if (!fmt)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
hd->taken = 1;
|
|
|
|
perf_hpp__register_sort_field(fmt);
|
2014-03-04 00:06:42 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
static int __sort_dimension__add_output(struct perf_hpp_list *list,
|
|
|
|
struct sort_dimension *sd)
|
2014-03-04 01:46:34 +00:00
|
|
|
{
|
|
|
|
if (sd->taken)
|
|
|
|
return 0;
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
if (__sort_dimension__add_hpp_output(list, sd) < 0)
|
2014-03-04 01:46:34 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
sd->taken = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
static int __hpp_dimension__add_output(struct perf_hpp_list *list,
|
|
|
|
struct hpp_dimension *hd)
|
2014-03-04 01:46:34 +00:00
|
|
|
{
|
2016-01-18 09:24:07 +00:00
|
|
|
struct perf_hpp_fmt *fmt;
|
2014-03-04 01:46:34 +00:00
|
|
|
|
2016-01-18 09:24:07 +00:00
|
|
|
if (hd->taken)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fmt = __hpp_dimension__alloc_hpp(hd);
|
|
|
|
if (!fmt)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
hd->taken = 1;
|
2016-01-18 09:24:16 +00:00
|
|
|
perf_hpp_list__column_register(list, fmt);
|
2014-03-04 01:46:34 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-10-06 12:25:11 +00:00
|
|
|
int hpp_dimension__add_output(unsigned col)
|
|
|
|
{
|
|
|
|
BUG_ON(col >= PERF_HPP__MAX_INDEX);
|
2016-01-18 09:24:16 +00:00
|
|
|
return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
|
2015-10-06 12:25:11 +00:00
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:01 +00:00
|
|
|
static int sort_dimension__add(const char *tok,
|
|
|
|
struct perf_evlist *evlist __maybe_unused)
|
2009-09-24 16:02:49 +00:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
2012-12-27 09:11:46 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &common_sort_dimensions[i];
|
2009-09-24 16:02:49 +00:00
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
2012-12-27 09:11:46 +00:00
|
|
|
|
2009-09-24 16:02:49 +00:00
|
|
|
if (sd->entry == &sort_parent) {
|
|
|
|
int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
|
|
|
|
if (ret) {
|
|
|
|
char err[BUFSIZ];
|
|
|
|
|
|
|
|
regerror(ret, &parent_regex, err, sizeof(err));
|
2010-04-02 15:30:57 +00:00
|
|
|
pr_err("Invalid regex: %s\n%s", parent_pattern, err);
|
|
|
|
return -EINVAL;
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
sort__has_parent = 1;
|
2013-04-05 01:26:36 +00:00
|
|
|
} else if (sd->entry == &sort_sym) {
|
2012-09-14 08:35:27 +00:00
|
|
|
sort__has_sym = 1;
|
perf diff: Support for different binaries
Currently, the perf diff only works with same binaries. That's because
it compares the symbol start address. It doesn't work if the perf.data
comes from different binaries. This patch matches the symbol names.
Actually, perf diff once intended to compare the symbol names. The
commit as below can look for a pair by name.
604c5c92972d (perf diff: Change the default sort order to "dso,symbol")
However, at that time, perf diff used a global list of dsos. That means
the binaries which has same name can only be loaded once. That's a
problem for comparing different binaries.
For example, we have an old binary and an updated binary. They very
likely have same name and most of the functions, so only dsos from old
binary will be loaded. When processing the data from updated binary,
perf still use the symbol information from old binary. That's wrong.
Then the commit as below used IP to replace symbol name.
9c443dfdd31e ("perf diff: Fix support for all --sort combinations")
>From that time, perf diff starts to compare the symbol address.
The global dsos is discarded from a patch in 2010.
a1645ce12adb ("perf: 'perf kvm' tool for monitoring guest performance
from host")
However, at that time, perf diff already compared by address. So perf
diff cannot work for different binaries as well.
This patch actually rolls back the perf diff to original design. The
document is also changed, so everybody knows the original design is to
compare the symbol names.
Here are some examples:
The only difference between example_v1.c and example_v2.c is the
location of f2 and f3. There is no change in behavior, but the previous
perf diff display the wrong differential profile.
example_v1.c
noinline void f3(void)
{
volatile int i;
for (i = 0; i < 10000;) {
if(i%2)
i++;
else
i++;
}
}
noinline void f2(void)
{
volatile int a = 100, b, c;
for (b = 0; b < 10000; b++)
c = a * b;
}
noinline void f1(void)
{
f2();
f3();
}
int main()
{
int i;
for (i = 0; i < 100000; i++)
f1();
}
example_v2.c
noinline void f2(void)
{
volatile int a = 100, b, c;
for (b = 0; b < 10000; b++)
c = a * b;
}
noinline void f3(void)
{
volatile int i;
for (i = 0; i < 10000;) {
if(i%2)
i++;
else
i++;
}
}
noinline void f1(void)
{
f2();
f3();
}
int main()
{
int i;
for (i = 0; i < 100000; i++)
f1();
}
[lk@localhost perf_diff]$ gcc example_v1.c -o example
[lk@localhost perf_diff]$ perf record -o example_v1.data ./example
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.813 MB example_v1.data (~35522 samples) ]
[lk@localhost perf_diff]$ gcc example_v2.c -o example
[lk@localhost perf_diff]$ perf record -o example_v2.data ./example
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.824 MB example_v2.data (~36015 samples) ]
Old perf diff result:
[lk@localhost perf_diff]$ perf diff example_v1.data example_v2.data
Event 'cycles'
Baseline Delta Shared Object Symbol
........ ....... ................ ...............................
[kernel.vmlinux] [k] __perf_event_task_sched_out
0.00% [kernel.vmlinux] [k] apic_timer_interrupt
[kernel.vmlinux] [k] idle_cpu
[kernel.vmlinux] [k] intel_pstate_timer_func
[kernel.vmlinux] [k] native_read_msr_safe
0.00% [kernel.vmlinux] [k] native_read_tsc
0.00% [kernel.vmlinux] [k] native_write_msr_safe
[kernel.vmlinux] [k] ntp_tick_length
0.00% [kernel.vmlinux] [k] rb_erase
0.00% [kernel.vmlinux] [k] tick_sched_timer
0.00% [kernel.vmlinux] [k] unmap_single_vma
0.00% [kernel.vmlinux] [k] update_wall_time
0.00% example [.] f1
46.24% example [.] f2
53.71% -7.55% example [.] f3
+53.81% example [.] f3
0.02% example [.] main
New perf diff result:
[lk@localhost perf_diff]$ perf diff example_v1.data example_v2.data
[kernel.vmlinux] [k] __perf_event_task_sched_out
0.00% [kernel.vmlinux] [k] apic_timer_interrupt
[kernel.vmlinux] [k] idle_cpu
[kernel.vmlinux] [k] intel_pstate_timer_func
[kernel.vmlinux] [k] native_read_msr_safe
0.00% [kernel.vmlinux] [k] native_read_tsc
0.00% [kernel.vmlinux] [k] native_write_msr_safe
[kernel.vmlinux] [k] ntp_tick_length
0.00% [kernel.vmlinux] [k] rb_erase
0.00% [kernel.vmlinux] [k] tick_sched_timer
0.00% [kernel.vmlinux] [k] unmap_single_vma
0.00% [kernel.vmlinux] [k] update_wall_time
0.00% example [.] f1
46.24% -0.08% example [.] f2
53.71% +0.11% example [.] f3
0.02% example [.] main
Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1423460384-11645-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-09 05:39:44 +00:00
|
|
|
/*
|
|
|
|
* perf diff displays the performance difference amongst
|
|
|
|
* two or more perf.data files. Those files could come
|
|
|
|
* from different binaries. So we should not compare
|
|
|
|
* their ips, but the name of symbol.
|
|
|
|
*/
|
|
|
|
if (sort__mode == SORT_MODE__DIFF)
|
|
|
|
sd->entry->se_collapse = sort__sym_sort;
|
|
|
|
|
2013-12-18 05:21:10 +00:00
|
|
|
} else if (sd->entry == &sort_dso) {
|
|
|
|
sort__has_dso = 1;
|
2015-09-04 14:45:43 +00:00
|
|
|
} else if (sd->entry == &sort_socket) {
|
|
|
|
sort__has_socket = 1;
|
2016-01-21 22:13:24 +00:00
|
|
|
} else if (sd->entry == &sort_thread) {
|
|
|
|
sort__has_thread = 1;
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
|
|
|
|
2014-05-19 05:19:30 +00:00
|
|
|
return __sort_dimension__add(sd);
|
2009-09-24 16:02:49 +00:00
|
|
|
}
|
2012-12-27 09:11:46 +00:00
|
|
|
|
2014-03-04 00:06:42 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
|
|
|
|
struct hpp_dimension *hd = &hpp_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, hd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return __hpp_dimension__add(hd);
|
|
|
|
}
|
|
|
|
|
2012-12-27 09:11:46 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &bstack_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
2013-04-01 11:35:20 +00:00
|
|
|
if (sort__mode != SORT_MODE__BRANCH)
|
2012-12-27 09:11:46 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
|
|
|
|
sort__has_sym = 1;
|
|
|
|
|
2014-05-19 05:19:30 +00:00
|
|
|
__sort_dimension__add(sd);
|
2012-12-27 09:11:46 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-03 12:26:11 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &memory_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (sort__mode != SORT_MODE__MEMORY)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (sd->entry == &sort_mem_daddr_sym)
|
|
|
|
sort__has_sym = 1;
|
|
|
|
|
2014-05-19 05:19:30 +00:00
|
|
|
__sort_dimension__add(sd);
|
2013-04-03 12:26:11 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
perf tools: Add dynamic sort key for tracepoint events
The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.
For example, a 'perf report' on sched:sched_switch event looks like the
following:
# Overhead Command Shared Object Symbol
# ........ ............... ................ ..............
#
47.22% swapper [kernel.vmlinux] [k] __schedule
21.67% transmission-gt [kernel.vmlinux] [k] __schedule
8.23% netctl-auto [kernel.vmlinux] [k] __schedule
5.53% kworker/0:1H [kernel.vmlinux] [k] __schedule
1.98% Xephyr [kernel.vmlinux] [k] __schedule
1.33% irq/33-iwlwifi [kernel.vmlinux] [k] __schedule
1.17% wpa_cli [kernel.vmlinux] [k] __schedule
1.13% rcu_preempt [kernel.vmlinux] [k] __schedule
0.85% ksoftirqd/0 [kernel.vmlinux] [k] __schedule
0.77% Timer [kernel.vmlinux] [k] __schedule
In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently. The dynamic sort keys
are introduced in this patc to overcome this limitation.
The sched:sched_switch events have following fields:
# sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
name: sched_switch
ID: 268
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char prev_comm[16]; offset:8; size:16; signed:1;
field:pid_t prev_pid; offset:24; size:4; signed:1;
field:int prev_prio; offset:28; size:4; signed:1;
field:long prev_state; offset:32; size:8; signed:1;
field:char next_comm[16]; offset:40; size:16; signed:1;
field:pid_t next_pid; offset:56; size:4; signed:1;
field:int next_prio; offset:60; size:4; signed:1;
print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
next_comm=%s next_pid=%d next_prio=%d",
REC->prev_comm, REC->prev_pid, REC->prev_prio,
REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
"|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
{ 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio
With dynamic sort keys, you can use <event.field> as a sort key. Those
dynamic keys are checked and created on demand. For instance, below is
to sort by next_pid field output on the same data file:
$ perf report -s comm,sched:sched_switch.next_pid --stdio
...
# Overhead Command next_pid
# ........ ............... ..........
#
21.23% transmission-gt 0
20.86% swapper 17773
6.62% netctl-auto 0
5.25% swapper 109
5.21% kworker/0:1H 0
1.98% Xephyr 0
1.98% swapper 6524
1.98% swapper 27478
1.37% swapper 27476
1.17% swapper 233
Multiple dynamic sort keys are also supported:
$ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
...
# Overhead Command next_pid next_comm
# ........ ............... .......... ................
#
20.86% swapper 17773 transmission-gt
9.64% transmission-gt 0 swapper/0
9.16% transmission-gt 0 swapper/2
5.25% swapper 109 kworker/0:1H
5.21% kworker/0:1H 0 swapper/0
2.14% netctl-auto 0 swapper/2
1.98% netctl-auto 0 swapper/0
1.98% swapper 6524 Xephyr
1.98% swapper 27478 netctl-auto
1.78% transmission-gt 0 swapper/3
1.53% Xephyr 0 swapper/0
1.29% netctl-auto 0 swapper/1
1.29% swapper 27476 netctl-auto
1.21% netctl-auto 0 swapper/3
1.17% swapper 233 irq/33-iwlwifi
Note that pid 0 exists for each cpu so have comm of 'swapper/N'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-12-22 17:07:02 +00:00
|
|
|
if (!add_dynamic_entry(evlist, tok))
|
|
|
|
return 0;
|
|
|
|
|
2009-09-24 16:02:49 +00:00
|
|
|
return -ESRCH;
|
|
|
|
}
|
2009-12-14 22:09:29 +00:00
|
|
|
|
2016-01-18 09:24:10 +00:00
|
|
|
static int setup_sort_list(char *str, struct perf_evlist *evlist)
|
|
|
|
{
|
|
|
|
char *tmp, *tok;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
for (tok = strtok_r(str, ", ", &tmp);
|
|
|
|
tok; tok = strtok_r(NULL, ", ", &tmp)) {
|
|
|
|
ret = sort_dimension__add(tok, evlist);
|
|
|
|
if (ret == -EINVAL) {
|
|
|
|
error("Invalid --sort key: `%s'", tok);
|
|
|
|
break;
|
|
|
|
} else if (ret == -ESRCH) {
|
|
|
|
error("Unknown --sort key: `%s'", tok);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:10 +00:00
|
|
|
static const char *get_default_sort_order(struct perf_evlist *evlist)
|
2014-03-18 02:31:39 +00:00
|
|
|
{
|
|
|
|
const char *default_sort_orders[] = {
|
|
|
|
default_sort_order,
|
|
|
|
default_branch_sort_order,
|
|
|
|
default_mem_sort_order,
|
|
|
|
default_top_sort_order,
|
|
|
|
default_diff_sort_order,
|
2015-12-22 17:07:10 +00:00
|
|
|
default_tracepoint_sort_order,
|
2014-03-18 02:31:39 +00:00
|
|
|
};
|
2015-12-22 17:07:10 +00:00
|
|
|
bool use_trace = true;
|
|
|
|
struct perf_evsel *evsel;
|
2014-03-18 02:31:39 +00:00
|
|
|
|
|
|
|
BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
|
|
|
|
|
2015-12-22 17:07:10 +00:00
|
|
|
if (evlist == NULL)
|
|
|
|
goto out_no_evlist;
|
|
|
|
|
|
|
|
evlist__for_each(evlist, evsel) {
|
|
|
|
if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
|
|
|
|
use_trace = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (use_trace) {
|
|
|
|
sort__mode = SORT_MODE__TRACEPOINT;
|
|
|
|
if (symbol_conf.raw_trace)
|
|
|
|
return "trace_fields";
|
|
|
|
}
|
|
|
|
out_no_evlist:
|
2014-03-18 02:31:39 +00:00
|
|
|
return default_sort_orders[sort__mode];
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:10 +00:00
|
|
|
static int setup_sort_order(struct perf_evlist *evlist)
|
2014-08-23 12:59:48 +00:00
|
|
|
{
|
|
|
|
char *new_sort_order;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Append '+'-prefixed sort order to the default sort
|
|
|
|
* order string.
|
|
|
|
*/
|
|
|
|
if (!sort_order || is_strict_order(sort_order))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (sort_order[1] == '\0') {
|
|
|
|
error("Invalid --sort key: `+'");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We allocate new sort_order string, but we never free it,
|
|
|
|
* because it's checked over the rest of the code.
|
|
|
|
*/
|
|
|
|
if (asprintf(&new_sort_order, "%s,%s",
|
2015-12-22 17:07:10 +00:00
|
|
|
get_default_sort_order(evlist), sort_order + 1) < 0) {
|
2014-08-23 12:59:48 +00:00
|
|
|
error("Not enough memory to set up --sort");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
sort_order = new_sort_order;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-07 09:14:08 +00:00
|
|
|
/*
|
|
|
|
* Adds 'pre,' prefix into 'str' is 'pre' is
|
|
|
|
* not already part of 'str'.
|
|
|
|
*/
|
|
|
|
static char *prefix_if_not_in(const char *pre, char *str)
|
|
|
|
{
|
|
|
|
char *n;
|
|
|
|
|
|
|
|
if (!str || strstr(str, pre))
|
|
|
|
return str;
|
|
|
|
|
|
|
|
if (asprintf(&n, "%s,%s", pre, str) < 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
free(str);
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *setup_overhead(char *keys)
|
|
|
|
{
|
|
|
|
keys = prefix_if_not_in("overhead", keys);
|
|
|
|
|
|
|
|
if (symbol_conf.cumulate_callchain)
|
|
|
|
keys = prefix_if_not_in("overhead_children", keys);
|
|
|
|
|
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:01 +00:00
|
|
|
static int __setup_sorting(struct perf_evlist *evlist)
|
2009-12-14 22:09:29 +00:00
|
|
|
{
|
2016-01-18 09:24:10 +00:00
|
|
|
char *str;
|
2014-08-23 12:59:48 +00:00
|
|
|
const char *sort_keys;
|
2013-02-06 05:57:16 +00:00
|
|
|
int ret = 0;
|
2009-12-14 22:09:29 +00:00
|
|
|
|
2015-12-22 17:07:10 +00:00
|
|
|
ret = setup_sort_order(evlist);
|
2014-08-23 12:59:48 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
sort_keys = sort_order;
|
2014-03-04 01:46:34 +00:00
|
|
|
if (sort_keys == NULL) {
|
2014-08-22 13:58:38 +00:00
|
|
|
if (is_strict_order(field_order)) {
|
2014-03-04 01:46:34 +00:00
|
|
|
/*
|
|
|
|
* If user specified field order but no sort order,
|
|
|
|
* we'll honor it and not add default sort orders.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:10 +00:00
|
|
|
sort_keys = get_default_sort_order(evlist);
|
2014-03-04 01:46:34 +00:00
|
|
|
}
|
2014-03-18 02:31:39 +00:00
|
|
|
|
|
|
|
str = strdup(sort_keys);
|
2013-02-06 05:57:17 +00:00
|
|
|
if (str == NULL) {
|
|
|
|
error("Not enough memory to setup sort keys");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2016-01-07 09:14:08 +00:00
|
|
|
/*
|
|
|
|
* Prepend overhead fields for backward compatibility.
|
|
|
|
*/
|
|
|
|
if (!is_strict_order(field_order)) {
|
|
|
|
str = setup_overhead(str);
|
|
|
|
if (str == NULL) {
|
|
|
|
error("Not enough memory to setup overhead keys");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:10 +00:00
|
|
|
ret = setup_sort_list(str, evlist);
|
2009-12-14 22:09:29 +00:00
|
|
|
|
|
|
|
free(str);
|
2013-02-06 05:57:16 +00:00
|
|
|
return ret;
|
2009-12-14 22:09:29 +00:00
|
|
|
}
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 15:49:27 +00:00
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
void perf_hpp__set_elide(int idx, bool elide)
|
2014-03-18 04:00:59 +00:00
|
|
|
{
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-03-18 04:00:59 +00:00
|
|
|
|
2016-01-18 09:24:17 +00:00
|
|
|
perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
if (hse->se->se_width_idx == idx) {
|
|
|
|
fmt->elide = elide;
|
|
|
|
break;
|
|
|
|
}
|
2014-03-18 04:00:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 15:49:27 +00:00
|
|
|
{
|
|
|
|
if (list && strlist__nr_entries(list) == 1) {
|
|
|
|
if (fp != NULL)
|
|
|
|
fprintf(fp, "# %s: %s\n", list_name,
|
|
|
|
strlist__entry(list, 0)->s);
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool get_elide(int idx, FILE *output)
|
|
|
|
{
|
|
|
|
switch (idx) {
|
|
|
|
case HISTC_SYMBOL:
|
|
|
|
return __get_elide(symbol_conf.sym_list, "symbol", output);
|
|
|
|
case HISTC_DSO:
|
|
|
|
return __get_elide(symbol_conf.dso_list, "dso", output);
|
|
|
|
case HISTC_COMM:
|
|
|
|
return __get_elide(symbol_conf.comm_list, "comm", output);
|
|
|
|
default:
|
|
|
|
break;
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 15:49:27 +00:00
|
|
|
}
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
|
|
|
|
if (sort__mode != SORT_MODE__BRANCH)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (idx) {
|
|
|
|
case HISTC_SYMBOL_FROM:
|
|
|
|
return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
|
|
|
|
case HISTC_SYMBOL_TO:
|
|
|
|
return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
|
|
|
|
case HISTC_DSO_FROM:
|
|
|
|
return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
|
|
|
|
case HISTC_DSO_TO:
|
|
|
|
return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 15:49:27 +00:00
|
|
|
}
|
2013-04-03 12:26:19 +00:00
|
|
|
|
|
|
|
void sort__setup_elide(FILE *output)
|
|
|
|
{
|
2014-05-19 05:19:30 +00:00
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
struct hpp_sort_entry *hse;
|
2013-11-08 08:53:42 +00:00
|
|
|
|
2016-01-18 09:24:17 +00:00
|
|
|
perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
fmt->elide = get_elide(hse->se->se_width_idx, output);
|
2013-04-03 12:26:19 +00:00
|
|
|
}
|
|
|
|
|
2013-11-08 08:53:42 +00:00
|
|
|
/*
|
|
|
|
* It makes no sense to elide all of sort entries.
|
|
|
|
* Just revert them to show up again.
|
|
|
|
*/
|
2016-01-18 09:24:17 +00:00
|
|
|
perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
|
2014-05-19 05:19:30 +00:00
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
if (!fmt->elide)
|
2013-11-08 08:53:42 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:17 +00:00
|
|
|
perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
|
2014-05-19 05:19:30 +00:00
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 15:15:47 +00:00
|
|
|
fmt->elide = false;
|
2014-05-19 05:19:30 +00:00
|
|
|
}
|
2013-04-03 12:26:19 +00:00
|
|
|
}
|
2014-03-04 01:46:34 +00:00
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
static int output_field_add(struct perf_hpp_list *list, char *tok)
|
2014-03-04 01:46:34 +00:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &common_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
return __sort_dimension__add_output(list, sd);
|
2014-03-04 01:46:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
|
|
|
|
struct hpp_dimension *hd = &hpp_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, hd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
return __hpp_dimension__add_output(list, hd);
|
2014-03-04 01:46:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &bstack_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
return __sort_dimension__add_output(list, sd);
|
2014-03-04 01:46:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &memory_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
return __sort_dimension__add_output(list, sd);
|
2014-03-04 01:46:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return -ESRCH;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
static int setup_output_list(struct perf_hpp_list *list, char *str)
|
2016-01-18 09:24:11 +00:00
|
|
|
{
|
|
|
|
char *tmp, *tok;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
for (tok = strtok_r(str, ", ", &tmp);
|
|
|
|
tok; tok = strtok_r(NULL, ", ", &tmp)) {
|
2016-01-18 09:24:16 +00:00
|
|
|
ret = output_field_add(list, tok);
|
2016-01-18 09:24:11 +00:00
|
|
|
if (ret == -EINVAL) {
|
|
|
|
error("Invalid --fields key: `%s'", tok);
|
|
|
|
break;
|
|
|
|
} else if (ret == -ESRCH) {
|
|
|
|
error("Unknown --fields key: `%s'", tok);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-03-04 01:46:34 +00:00
|
|
|
static void reset_dimensions(void)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++)
|
|
|
|
common_sort_dimensions[i].taken = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++)
|
|
|
|
hpp_sort_dimensions[i].taken = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++)
|
|
|
|
bstack_sort_dimensions[i].taken = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++)
|
|
|
|
memory_sort_dimensions[i].taken = 0;
|
|
|
|
}
|
|
|
|
|
2014-08-22 13:58:38 +00:00
|
|
|
bool is_strict_order(const char *order)
|
|
|
|
{
|
|
|
|
return order && (*order != '+');
|
|
|
|
}
|
|
|
|
|
2014-03-04 01:46:34 +00:00
|
|
|
static int __setup_output_field(void)
|
|
|
|
{
|
2016-01-18 09:24:11 +00:00
|
|
|
char *str, *strp;
|
2014-08-22 13:58:38 +00:00
|
|
|
int ret = -EINVAL;
|
2014-03-04 01:46:34 +00:00
|
|
|
|
|
|
|
if (field_order == NULL)
|
|
|
|
return 0;
|
|
|
|
|
2014-08-22 13:58:38 +00:00
|
|
|
strp = str = strdup(field_order);
|
2014-03-04 01:46:34 +00:00
|
|
|
if (str == NULL) {
|
|
|
|
error("Not enough memory to setup output fields");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2014-08-22 13:58:38 +00:00
|
|
|
if (!is_strict_order(field_order))
|
|
|
|
strp++;
|
|
|
|
|
|
|
|
if (!strlen(strp)) {
|
|
|
|
error("Invalid --fields key: `+'");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-01-18 09:24:16 +00:00
|
|
|
ret = setup_output_list(&perf_hpp_list, strp);
|
2014-03-04 01:46:34 +00:00
|
|
|
|
2014-08-22 13:58:38 +00:00
|
|
|
out:
|
2014-03-04 01:46:34 +00:00
|
|
|
free(str);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-12-22 17:07:01 +00:00
|
|
|
int setup_sorting(struct perf_evlist *evlist)
|
2014-03-04 01:46:34 +00:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
2015-12-22 17:07:01 +00:00
|
|
|
err = __setup_sorting(evlist);
|
2014-03-04 01:46:34 +00:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (parent_pattern != default_parent_pattern) {
|
2015-12-22 17:07:01 +00:00
|
|
|
err = sort_dimension__add("parent", evlist);
|
2014-03-04 01:46:34 +00:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
reset_dimensions();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* perf diff doesn't use default hpp output fields.
|
|
|
|
*/
|
|
|
|
if (sort__mode != SORT_MODE__DIFF)
|
|
|
|
perf_hpp__init();
|
|
|
|
|
|
|
|
err = __setup_output_field();
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* copy sort keys to output fields */
|
2016-01-18 09:24:21 +00:00
|
|
|
perf_hpp__setup_output_field(&perf_hpp_list);
|
2014-03-04 01:46:34 +00:00
|
|
|
/* and then copy output fields to sort keys */
|
2016-01-18 09:24:21 +00:00
|
|
|
perf_hpp__append_sort_keys(&perf_hpp_list);
|
2014-03-04 01:46:34 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2014-05-07 09:42:24 +00:00
|
|
|
|
|
|
|
void reset_output_field(void)
|
|
|
|
{
|
|
|
|
sort__need_collapse = 0;
|
|
|
|
sort__has_parent = 0;
|
|
|
|
sort__has_sym = 0;
|
|
|
|
sort__has_dso = 0;
|
|
|
|
|
2014-05-23 01:59:01 +00:00
|
|
|
field_order = NULL;
|
|
|
|
sort_order = NULL;
|
|
|
|
|
2014-05-07 09:42:24 +00:00
|
|
|
reset_dimensions();
|
2016-01-18 09:24:21 +00:00
|
|
|
perf_hpp__reset_output_field(&perf_hpp_list);
|
2014-05-07 09:42:24 +00:00
|
|
|
}
|