linux/tools/perf/util/sort.c

578 lines
14 KiB
C
Raw Normal View History

#include "sort.h"
#include "hist.h"
regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char default_sort_order[] = "comm,dso,symbol";
const char *sort_order = default_sort_order;
int sort__need_collapse = 0;
int sort__has_parent = 0;
int sort__branch_mode = -1; /* -1 = means not set */
perf tools: Bind callchains to the first sort dimension column Currently, the callchains are displayed using a constant left margin. So depending on the current sort dimension configuration, callchains may appear to be well attached to the first sort dimension column field which is mostly the case, except when the first dimension of sorting is done by comm, because these are right aligned. This patch binds the callchain to the first letter in the first column, whatever type of column it is (dso, comm, symbol). Before: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent After: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent Also, for clarity, we don't put anymore the callchain as is but: - If we have a top level ancestor in the callchain, start it with a first ascii hook. Before: 0.80% perf [kernel] [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] After: 0.80% perf [kernel] [k] __lock_acquire | --- __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] - Otherwise, if we have several top level ancestors, then display these like we did before: 1.69% Xorg | |--21.21%-- vread_hpet | 0x7fffd85b46fc | 0x7fffd85b494d | 0x7f4fafb4e54d | |--15.15%-- exaOffscreenAlloc | |--9.09%-- I830WaitLpRing Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Anton Blanchard <anton@samba.org> LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-22 21:23:23 +00:00
enum sort_type sort__first_dimension;
char * field_sep;
LIST_HEAD(hist_entry__sort_list);
static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
{
int n;
va_list ap;
va_start(ap, fmt);
n = vsnprintf(bf, size, fmt, ap);
if (field_sep && n > 0) {
char *sep = bf;
while (1) {
sep = strchr(sep, *field_sep);
if (sep == NULL)
break;
*sep = '.';
}
}
va_end(ap);
perf tools: Incorrect use of snprintf results in SEGV I have a workload where perf top scribbles over the stack and we SEGV. What makes it interesting is that an snprintf is causing this. The workload is a c++ gem that has method names over 3000 characters long, but snprintf is designed to avoid overrunning buffers. So what went wrong? The problem is we assume snprintf returns the number of characters written: ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); ... ret += repsep_snprintf(bf + ret, size - ret, "%s", self->ms.sym->name); Unfortunately this is not how snprintf works. snprintf returns the number of characters that would have been written if there was enough space. In the above case, if the first snprintf returns a value larger than size, we pass a negative size into the second snprintf and happily scribble over the stack. If you have 3000 character c++ methods thats a lot of stack to trample. This patch fixes repsep_snprintf by clamping the value at size - 1 which is the maximum snprintf can write before adding the NULL terminator. I get the sinking feeling that there are a lot of other uses of snprintf that have this same bug, we should audit them all. Cc: David Ahern <dsahern@gmail.com> Cc: Eric B Munson <emunson@mgebm.net> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com> Cc: stable@kernel.org Link: http://lkml.kernel.org/r/20120307114249.44275ca3@kryten Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-03-07 00:42:49 +00:00
if (n >= (int)size)
return size - 1;
return n;
}
static int64_t cmp_null(void *l, void *r)
{
if (!l && !r)
return 0;
else if (!l)
return -1;
else
return 1;
}
/* --sort pid */
static int64_t
sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
{
return right->thread->pid - left->thread->pid;
}
static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%*s:%5d", width,
self->thread->comm ?: "", self->thread->pid);
}
struct sort_entry sort_thread = {
.se_header = "Command: Pid",
.se_cmp = sort__thread_cmp,
.se_snprintf = hist_entry__thread_snprintf,
.se_width_idx = HISTC_THREAD,
};
/* --sort comm */
static int64_t
sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
{
return right->thread->pid - left->thread->pid;
}
static int64_t
sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
{
char *comm_l = left->thread->comm;
char *comm_r = right->thread->comm;
if (!comm_l || !comm_r)
return cmp_null(comm_l, comm_r);
return strcmp(comm_l, comm_r);
}
static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
}
static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
{
struct dso *dso_l = map_l ? map_l->dso : NULL;
struct dso *dso_r = map_r ? map_r->dso : NULL;
const char *dso_name_l, *dso_name_r;
if (!dso_l || !dso_r)
return cmp_null(dso_l, dso_r);
if (verbose) {
dso_name_l = dso_l->long_name;
dso_name_r = dso_r->long_name;
} else {
dso_name_l = dso_l->short_name;
dso_name_r = dso_r->short_name;
}
return strcmp(dso_name_l, dso_name_r);
}
struct sort_entry sort_comm = {
.se_header = "Command",
.se_cmp = sort__comm_cmp,
.se_collapse = sort__comm_collapse,
.se_snprintf = hist_entry__comm_snprintf,
.se_width_idx = HISTC_COMM,
};
/* --sort dso */
static int64_t
sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
{
return _sort__dso_cmp(left->ms.map, right->ms.map);
}
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
u64 ip_l, u64 ip_r)
{
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
if (sym_l == sym_r)
return 0;
if (sym_l)
ip_l = sym_l->start;
if (sym_r)
ip_r = sym_r->start;
return (int64_t)(ip_r - ip_l);
}
static int _hist_entry__dso_snprintf(struct map *map, char *bf,
size_t size, unsigned int width)
{
if (map && map->dso) {
const char *dso_name = !verbose ? map->dso->short_name :
map->dso->long_name;
return repsep_snprintf(bf, size, "%-*s", width, dso_name);
perf tools: Rewrite and improve support for kernel modules Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-02 06:29:58 +00:00
}
return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
}
static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
}
static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
u64 ip, char level, char *bf, size_t size,
unsigned int width __used)
{
size_t ret = 0;
if (verbose) {
char o = map ? dso__symtab_origin(map->dso) : '!';
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
BITS_PER_LONG / 4, ip, o);
perf tools: Rewrite and improve support for kernel modules Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-02 06:29:58 +00:00
}
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
if (sym)
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
width - ret,
sym->name);
else {
size_t len = BITS_PER_LONG / 4;
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
len, ip);
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
width - ret, "");
}
return ret;
}
struct sort_entry sort_dso = {
.se_header = "Shared Object",
.se_cmp = sort__dso_cmp,
.se_snprintf = hist_entry__dso_snprintf,
.se_width_idx = HISTC_DSO,
};
static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
self->level, bf, size, width);
}
/* --sort symbol */
static int64_t
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
{
u64 ip_l, ip_r;
perf sort: Fix symbol sort output by separating unresolved samples by type I took a profile that suggested 60% of total CPU time was in the hypervisor: ... 60.20% [H] 0x33d43c 4.43% [k] ._spin_lock_irqsave 1.07% [k] ._spin_lock Using perf stat to get the user/kernel/hypervisor breakdown contradicted this. The problem is we merge all unresolved samples into the one unknown bucket. If add a comparison by sample type to sort__sym_cmp we get the real picture: ... 57.11% [.] 0x80fbf63c 4.43% [k] ._spin_lock_irqsave 1.07% [k] ._spin_lock 0.65% [H] 0x33d43c So it was almost all userspace, not hypervisor as the initial profile suggested. I found another issue while adding this. Symbol sorting sometimes shows multiple entries for the unknown bucket: ... 16.65% [.] 0x6cd3a8 7.25% [.] 0x422460 5.37% [.] yylex 4.79% [.] malloc 4.78% [.] _int_malloc 4.03% [.] _int_free 3.95% [.] hash_source_code_string 2.82% [.] 0x532908 2.64% [.] 0x36b538 0.94% [H] 0x8000000000e132a4 0.82% [H] 0x800000000000e8b0 This happens because we aren't consistent with our sorting. On one hand we check to see if both symbols match and for two unresolved samples sym is NULL so we match: if (left->ms.sym == right->ms.sym) return 0; On the other hand we use sample IP for unresolved samples when comparing against a symbol: ip_l = left->ms.sym ? left->ms.sym->start : left->ip; ip_r = right->ms.sym ? right->ms.sym->start : right->ip; This means unresolved samples end up spread across the rbtree and we can't merge them all. If we use cmp_null all unresolved samples will end up in the one bucket and the output makes more sense: ... 39.12% [.] 0x36b538 5.37% [.] yylex 4.79% [.] malloc 4.78% [.] _int_malloc 4.03% [.] _int_free 3.95% [.] hash_source_code_string 2.26% [H] 0x800000000000e8b0 Acked-by: Eric B Munson <emunson@mgebm.net> Cc: Eric B Munson <emunson@mgebm.net> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Ian Munsie <imunsie@au1.ibm.com> Link: http://lkml.kernel.org/r/20110831115145.4f598ab2@kryten Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-08-31 01:51:45 +00:00
if (!left->ms.sym && !right->ms.sym)
return right->level - left->level;
if (!left->ms.sym || !right->ms.sym)
return cmp_null(left->ms.sym, right->ms.sym);
if (left->ms.sym == right->ms.sym)
return 0;
perf sort: Fix symbol sort output by separating unresolved samples by type I took a profile that suggested 60% of total CPU time was in the hypervisor: ... 60.20% [H] 0x33d43c 4.43% [k] ._spin_lock_irqsave 1.07% [k] ._spin_lock Using perf stat to get the user/kernel/hypervisor breakdown contradicted this. The problem is we merge all unresolved samples into the one unknown bucket. If add a comparison by sample type to sort__sym_cmp we get the real picture: ... 57.11% [.] 0x80fbf63c 4.43% [k] ._spin_lock_irqsave 1.07% [k] ._spin_lock 0.65% [H] 0x33d43c So it was almost all userspace, not hypervisor as the initial profile suggested. I found another issue while adding this. Symbol sorting sometimes shows multiple entries for the unknown bucket: ... 16.65% [.] 0x6cd3a8 7.25% [.] 0x422460 5.37% [.] yylex 4.79% [.] malloc 4.78% [.] _int_malloc 4.03% [.] _int_free 3.95% [.] hash_source_code_string 2.82% [.] 0x532908 2.64% [.] 0x36b538 0.94% [H] 0x8000000000e132a4 0.82% [H] 0x800000000000e8b0 This happens because we aren't consistent with our sorting. On one hand we check to see if both symbols match and for two unresolved samples sym is NULL so we match: if (left->ms.sym == right->ms.sym) return 0; On the other hand we use sample IP for unresolved samples when comparing against a symbol: ip_l = left->ms.sym ? left->ms.sym->start : left->ip; ip_r = right->ms.sym ? right->ms.sym->start : right->ip; This means unresolved samples end up spread across the rbtree and we can't merge them all. If we use cmp_null all unresolved samples will end up in the one bucket and the output makes more sense: ... 39.12% [.] 0x36b538 5.37% [.] yylex 4.79% [.] malloc 4.78% [.] _int_malloc 4.03% [.] _int_free 3.95% [.] hash_source_code_string 2.26% [H] 0x800000000000e8b0 Acked-by: Eric B Munson <emunson@mgebm.net> Cc: Eric B Munson <emunson@mgebm.net> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Ian Munsie <imunsie@au1.ibm.com> Link: http://lkml.kernel.org/r/20110831115145.4f598ab2@kryten Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-08-31 01:51:45 +00:00
ip_l = left->ms.sym->start;
ip_r = right->ms.sym->start;
return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
}
struct sort_entry sort_sym = {
.se_header = "Symbol",
.se_cmp = sort__sym_cmp,
.se_snprintf = hist_entry__sym_snprintf,
.se_width_idx = HISTC_SYMBOL,
};
perf tools: Add sort by src line/number Using addr2line for now, requires debuginfo, needs more work to support detached debuginfo, aka foo-debuginfo packages. Example: [root@sandy ~]# perf record -a sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.555 MB perf.data (~24236 samples) ] [root@sandy ~]# perf report -s dso,srcline 2>&1 | grep -v ^# | head -5 22.41% [kernel.kallsyms] /home/git/linux/drivers/idle/intel_idle.c:280 4.79% [kernel.kallsyms] /home/git/linux/drivers/cpuidle/cpuidle.c:148 4.78% [kernel.kallsyms] /home/git/linux/arch/x86/include/asm/atomic64_64.h:121 4.49% [kernel.kallsyms] /home/git/linux/kernel/sched/core.c:1690 4.30% [kernel.kallsyms] /home/git/linux/include/linux/seqlock.h:90 [root@sandy ~]# [root@sandy ~]# perf top -U -s dso,symbol,srcline Samples: 1K of event 'cycles', Event count (approx.): 589617389 18.66% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:143 7.83% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:39 6.59% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:38 3.66% [kernel] [k] page_fault /home/git/linux/arch/x86/kernel/entry_64.S:1379 3.25% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:40 3.12% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:37 2.74% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:36 2.39% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:43 2.12% [kernel] [k] ioread32 /home/git/linux/lib/iomap.c:90 1.51% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:144 1.19% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:154 Suggested-by: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Namhyung Kim <namhyung@gmail.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/n/tip-pdmqbng9twz06jzkbgtuwbp8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-05-30 13:33:24 +00:00
/* --sort srcline */
static int64_t
sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
{
return (int64_t)(right->ip - left->ip);
}
static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
FILE *fp;
char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
size_t line_len;
if (path != NULL)
goto out_path;
snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64,
self->ms.map->dso->long_name, self->ip);
fp = popen(cmd, "r");
if (!fp)
goto out_ip;
if (getline(&path, &line_len, fp) < 0 || !line_len)
goto out_ip;
fclose(fp);
self->srcline = strdup(path);
if (self->srcline == NULL)
goto out_ip;
nl = strchr(self->srcline, '\n');
if (nl != NULL)
*nl = '\0';
path = self->srcline;
out_path:
return repsep_snprintf(bf, size, "%s", path);
out_ip:
return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
}
struct sort_entry sort_srcline = {
.se_header = "Source:Line",
.se_cmp = sort__srcline_cmp,
.se_snprintf = hist_entry__srcline_snprintf,
.se_width_idx = HISTC_SRCLINE,
};
/* --sort parent */
static int64_t
sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
{
struct symbol *sym_l = left->parent;
struct symbol *sym_r = right->parent;
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
return strcmp(sym_l->name, sym_r->name);
}
static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%-*s", width,
self->parent ? self->parent->name : "[other]");
}
struct sort_entry sort_parent = {
.se_header = "Parent symbol",
.se_cmp = sort__parent_cmp,
.se_snprintf = hist_entry__parent_snprintf,
.se_width_idx = HISTC_PARENT,
};
/* --sort cpu */
static int64_t
sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
{
return right->cpu - left->cpu;
}
static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
}
struct sort_entry sort_cpu = {
.se_header = "CPU",
.se_cmp = sort__cpu_cmp,
.se_snprintf = hist_entry__cpu_snprintf,
.se_width_idx = HISTC_CPU,
};
static int64_t
sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
{
return _sort__dso_cmp(left->branch_info->from.map,
right->branch_info->from.map);
}
static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return _hist_entry__dso_snprintf(self->branch_info->from.map,
bf, size, width);
}
struct sort_entry sort_dso_from = {
.se_header = "Source Shared Object",
.se_cmp = sort__dso_from_cmp,
.se_snprintf = hist_entry__dso_from_snprintf,
.se_width_idx = HISTC_DSO_FROM,
};
static int64_t
sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
{
return _sort__dso_cmp(left->branch_info->to.map,
right->branch_info->to.map);
}
static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width)
{
return _hist_entry__dso_snprintf(self->branch_info->to.map,
bf, size, width);
}
static int64_t
sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
{
struct addr_map_symbol *from_l = &left->branch_info->from;
struct addr_map_symbol *from_r = &right->branch_info->from;
if (!from_l->sym && !from_r->sym)
return right->level - left->level;
return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr,
from_r->addr);
}
static int64_t
sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
{
struct addr_map_symbol *to_l = &left->branch_info->to;
struct addr_map_symbol *to_r = &right->branch_info->to;
if (!to_l->sym && !to_r->sym)
return right->level - left->level;
return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr);
}
static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
struct addr_map_symbol *from = &self->branch_info->from;
return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
self->level, bf, size, width);
}
static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width __used)
{
struct addr_map_symbol *to = &self->branch_info->to;
return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
self->level, bf, size, width);
}
struct sort_entry sort_dso_to = {
.se_header = "Target Shared Object",
.se_cmp = sort__dso_to_cmp,
.se_snprintf = hist_entry__dso_to_snprintf,
.se_width_idx = HISTC_DSO_TO,
};
struct sort_entry sort_sym_from = {
.se_header = "Source Symbol",
.se_cmp = sort__sym_from_cmp,
.se_snprintf = hist_entry__sym_from_snprintf,
.se_width_idx = HISTC_SYMBOL_FROM,
};
struct sort_entry sort_sym_to = {
.se_header = "Target Symbol",
.se_cmp = sort__sym_to_cmp,
.se_snprintf = hist_entry__sym_to_snprintf,
.se_width_idx = HISTC_SYMBOL_TO,
};
static int64_t
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
{
const unsigned char mp = left->branch_info->flags.mispred !=
right->branch_info->flags.mispred;
const unsigned char p = left->branch_info->flags.predicted !=
right->branch_info->flags.predicted;
return mp || p;
}
static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
size_t size, unsigned int width){
static const char *out = "N/A";
if (self->branch_info->flags.predicted)
out = "N";
else if (self->branch_info->flags.mispred)
out = "Y";
return repsep_snprintf(bf, size, "%-*s", width, out);
}
struct sort_entry sort_mispredict = {
.se_header = "Branch Mispredicted",
.se_cmp = sort__mispredict_cmp,
.se_snprintf = hist_entry__mispredict_snprintf,
.se_width_idx = HISTC_MISPREDICT,
};
struct sort_dimension {
const char *name;
struct sort_entry *entry;
int taken;
};
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
static struct sort_dimension sort_dimensions[] = {
DIM(SORT_PID, "pid", sort_thread),
DIM(SORT_COMM, "comm", sort_comm),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
DIM(SORT_SYM, "symbol", sort_sym),
DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
perf tools: Add sort by src line/number Using addr2line for now, requires debuginfo, needs more work to support detached debuginfo, aka foo-debuginfo packages. Example: [root@sandy ~]# perf record -a sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.555 MB perf.data (~24236 samples) ] [root@sandy ~]# perf report -s dso,srcline 2>&1 | grep -v ^# | head -5 22.41% [kernel.kallsyms] /home/git/linux/drivers/idle/intel_idle.c:280 4.79% [kernel.kallsyms] /home/git/linux/drivers/cpuidle/cpuidle.c:148 4.78% [kernel.kallsyms] /home/git/linux/arch/x86/include/asm/atomic64_64.h:121 4.49% [kernel.kallsyms] /home/git/linux/kernel/sched/core.c:1690 4.30% [kernel.kallsyms] /home/git/linux/include/linux/seqlock.h:90 [root@sandy ~]# [root@sandy ~]# perf top -U -s dso,symbol,srcline Samples: 1K of event 'cycles', Event count (approx.): 589617389 18.66% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:143 7.83% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:39 6.59% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:38 3.66% [kernel] [k] page_fault /home/git/linux/arch/x86/kernel/entry_64.S:1379 3.25% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:40 3.12% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:37 2.74% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:36 2.39% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:43 2.12% [kernel] [k] ioread32 /home/git/linux/lib/iomap.c:90 1.51% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:144 1.19% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:154 Suggested-by: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Namhyung Kim <namhyung@gmail.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/n/tip-pdmqbng9twz06jzkbgtuwbp8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-05-30 13:33:24 +00:00
DIM(SORT_SRCLINE, "srcline", sort_srcline),
};
int sort_dimension__add(const char *tok)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
struct sort_dimension *sd = &sort_dimensions[i];
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
if (sd->entry == &sort_parent) {
int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
if (ret) {
char err[BUFSIZ];
regerror(ret, &parent_regex, err, sizeof(err));
pr_err("Invalid regex: %s\n%s", parent_pattern, err);
return -EINVAL;
}
sort__has_parent = 1;
}
if (sd->taken)
return 0;
if (sd->entry->se_collapse)
sort__need_collapse = 1;
perf tools: Bind callchains to the first sort dimension column Currently, the callchains are displayed using a constant left margin. So depending on the current sort dimension configuration, callchains may appear to be well attached to the first sort dimension column field which is mostly the case, except when the first dimension of sorting is done by comm, because these are right aligned. This patch binds the callchain to the first letter in the first column, whatever type of column it is (dso, comm, symbol). Before: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent After: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent Also, for clarity, we don't put anymore the callchain as is but: - If we have a top level ancestor in the callchain, start it with a first ascii hook. Before: 0.80% perf [kernel] [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] After: 0.80% perf [kernel] [k] __lock_acquire | --- __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] - Otherwise, if we have several top level ancestors, then display these like we did before: 1.69% Xorg | |--21.21%-- vread_hpet | 0x7fffd85b46fc | 0x7fffd85b494d | 0x7f4fafb4e54d | |--15.15%-- exaOffscreenAlloc | |--9.09%-- I830WaitLpRing Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Anton Blanchard <anton@samba.org> LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-22 21:23:23 +00:00
if (list_empty(&hist_entry__sort_list)) {
if (!strcmp(sd->name, "pid"))
sort__first_dimension = SORT_PID;
else if (!strcmp(sd->name, "comm"))
sort__first_dimension = SORT_COMM;
else if (!strcmp(sd->name, "dso"))
sort__first_dimension = SORT_DSO;
else if (!strcmp(sd->name, "symbol"))
sort__first_dimension = SORT_SYM;
else if (!strcmp(sd->name, "parent"))
sort__first_dimension = SORT_PARENT;
else if (!strcmp(sd->name, "cpu"))
sort__first_dimension = SORT_CPU;
else if (!strcmp(sd->name, "symbol_from"))
sort__first_dimension = SORT_SYM_FROM;
else if (!strcmp(sd->name, "symbol_to"))
sort__first_dimension = SORT_SYM_TO;
else if (!strcmp(sd->name, "dso_from"))
sort__first_dimension = SORT_DSO_FROM;
else if (!strcmp(sd->name, "dso_to"))
sort__first_dimension = SORT_DSO_TO;
else if (!strcmp(sd->name, "mispredict"))
sort__first_dimension = SORT_MISPREDICT;
perf tools: Bind callchains to the first sort dimension column Currently, the callchains are displayed using a constant left margin. So depending on the current sort dimension configuration, callchains may appear to be well attached to the first sort dimension column field which is mostly the case, except when the first dimension of sorting is done by comm, because these are right aligned. This patch binds the callchain to the first letter in the first column, whatever type of column it is (dso, comm, symbol). Before: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent After: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent Also, for clarity, we don't put anymore the callchain as is but: - If we have a top level ancestor in the callchain, start it with a first ascii hook. Before: 0.80% perf [kernel] [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] After: 0.80% perf [kernel] [k] __lock_acquire | --- __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] - Otherwise, if we have several top level ancestors, then display these like we did before: 1.69% Xorg | |--21.21%-- vread_hpet | 0x7fffd85b46fc | 0x7fffd85b494d | 0x7f4fafb4e54d | |--15.15%-- exaOffscreenAlloc | |--9.09%-- I830WaitLpRing Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Anton Blanchard <anton@samba.org> LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-22 21:23:23 +00:00
}
list_add_tail(&sd->entry->list, &hist_entry__sort_list);
sd->taken = 1;
return 0;
}
return -ESRCH;
}
void setup_sorting(const char * const usagestr[], const struct option *opts)
{
char *tmp, *tok, *str = strdup(sort_order);
for (tok = strtok_r(str, ", ", &tmp);
tok; tok = strtok_r(NULL, ", ", &tmp)) {
if (sort_dimension__add(tok) < 0) {
error("Unknown --sort key: `%s'", tok);
usage_with_options(usagestr, opts);
}
}
free(str);
}
perf diff: Use perf_session__fprintf_hists just like 'perf record' That means that almost everything you can do with 'perf report' can be done with 'perf diff', for instance: $ perf record -f find / > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.062 MB perf.data (~2699 samples) ] $ perf record -f find / > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.062 MB perf.data (~2687 samples) ] perf diff | head -8 9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal 2.91% -1.00% find [kernel] [k] __kmalloc 2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent 1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock 2.44% find [kernel] [k] half_md4_transform $ So if you want to zoom into libc: $ perf diff --dsos libc-2.10.1.so | head -8 37.34% find [.] _IO_vfprintf_internal 10.34% find [.] __GI_memmove 8.25% +2.00% find [.] _int_malloc 5.07% -1.00% find [.] __GI_mempcpy 7.62% +2.00% find [.] _int_free $ And if there were multiple commands using libc, it is also possible to aggregate them all by using --sort symbol: $ perf diff --dsos libc-2.10.1.so --sort symbol | head -8 37.34% [.] _IO_vfprintf_internal 10.34% [.] __GI_memmove 8.25% +2.00% [.] _int_malloc 5.07% -1.00% [.] __GI_mempcpy 7.62% +2.00% [.] _int_free $ The displacement column now is off by default, to use it: perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8 37.34% [.] _IO_vfprintf_internal 10.34% [.] __GI_memmove 8.25% +2.00% [.] _int_malloc 5.07% -1.00% +2 [.] __GI_mempcpy 7.62% +2.00% -1 [.] _int_free $ Using -t/--field-separator can be used for scripting: $ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8 37.34, , ,[.] _IO_vfprintf_internal 10.34, , ,[.] __GI_memmove 8.25,+2.00%, ,[.] _int_malloc 5.07,-1.00%, +2,[.] __GI_mempcpy 7.62,+2.00%, -1,[.] _int_free 6.99,+1.00%, -1,[.] _IO_new_file_xsputn 1.89,-2.00%, +4,[.] __readdir64 $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 15:49:27 +00:00
void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
const char *list_name, FILE *fp)
{
if (list && strlist__nr_entries(list) == 1) {
if (fp != NULL)
fprintf(fp, "# %s: %s\n", list_name,
strlist__entry(list, 0)->s);
self->elide = true;
}
}