linux/tools/perf/util/data_map.c

292 lines
6.6 KiB
C
Raw Normal View History

#include "data_map.h"
#include "symbol.h"
#include "util.h"
#include "debug.h"
static struct perf_file_handler *curr_handler;
static unsigned long mmap_window = 32;
static char __cwd[PATH_MAX];
static int process_event_stub(event_t *event __used)
{
dump_printf(": unhandled!\n");
return 0;
}
void register_perf_file_handler(struct perf_file_handler *handler)
{
if (!handler->process_sample_event)
handler->process_sample_event = process_event_stub;
if (!handler->process_mmap_event)
handler->process_mmap_event = process_event_stub;
if (!handler->process_comm_event)
handler->process_comm_event = process_event_stub;
if (!handler->process_fork_event)
handler->process_fork_event = process_event_stub;
if (!handler->process_exit_event)
handler->process_exit_event = process_event_stub;
if (!handler->process_lost_event)
handler->process_lost_event = process_event_stub;
if (!handler->process_read_event)
handler->process_read_event = process_event_stub;
if (!handler->process_throttle_event)
handler->process_throttle_event = process_event_stub;
if (!handler->process_unthrottle_event)
handler->process_unthrottle_event = process_event_stub;
curr_handler = handler;
}
static const char *event__name[] = {
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
[PERF_RECORD_LOST] = "LOST",
[PERF_RECORD_COMM] = "COMM",
[PERF_RECORD_EXIT] = "EXIT",
[PERF_RECORD_THROTTLE] = "THROTTLE",
[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
[PERF_RECORD_FORK] = "FORK",
[PERF_RECORD_READ] = "READ",
[PERF_RECORD_SAMPLE] = "SAMPLE",
};
unsigned long event__total[PERF_RECORD_MAX];
void event__print_totals(void)
{
int i;
for (i = 0; i < PERF_RECORD_MAX; ++i)
pr_info("%10s events: %10ld\n",
event__name[i], event__total[i]);
}
static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
trace_event(event);
if (event->header.type < PERF_RECORD_MAX) {
dump_printf("%p [%p]: PERF_RECORD_%s",
(void *)(offset + head),
(void *)(long)(event->header.size),
event__name[event->header.type]);
++event__total[0];
++event__total[event->header.type];
}
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
return curr_handler->process_sample_event(event);
case PERF_RECORD_MMAP:
return curr_handler->process_mmap_event(event);
case PERF_RECORD_COMM:
return curr_handler->process_comm_event(event);
case PERF_RECORD_FORK:
return curr_handler->process_fork_event(event);
case PERF_RECORD_EXIT:
return curr_handler->process_exit_event(event);
case PERF_RECORD_LOST:
return curr_handler->process_lost_event(event);
case PERF_RECORD_READ:
return curr_handler->process_read_event(event);
case PERF_RECORD_THROTTLE:
return curr_handler->process_throttle_event(event);
case PERF_RECORD_UNTHROTTLE:
return curr_handler->process_unthrottle_event(event);
default:
curr_handler->total_unknown++;
return -1;
}
}
int perf_header__read_build_ids(int input, off_t offset, off_t size)
perf symbols: Use the buildids if present With this change 'perf record' will intercept PERF_RECORD_MMAP calls, creating a linked list of DSOs, then when the session finishes, it will traverse this list and read the buildids, stashing them at the end of the file and will set up a new feature bit in the header bitmask. 'perf report' will then notice this feature and populate the 'dsos' list and set the build ids. When reading the symtabs it will refuse to load from a file that doesn't have the same build id. This improves the reliability of the profiler output, as symbols and profiling data is more guaranteed to match. Example: [root@doppio ~]# perf report | head /home/acme/bin/perf with build id b1ea544ac3746e7538972548a09aadecc5753868 not found, continuing without symbols # Samples: 2621434559 # # Overhead Command Shared Object Symbol # ........ ............... ............................. ...... # 7.91% init [kernel] [k] read_hpet 7.64% init [kernel] [k] mwait_idle_with_hints 7.60% swapper [kernel] [k] read_hpet 7.60% swapper [kernel] [k] mwait_idle_with_hints 3.65% init [kernel] [k] 0xffffffffa02339d9 [root@doppio ~]# In this case the 'perf' binary was an older one, vanished, so its symbols probably wouldn't match or would cause subtly different (and misleading) output. Next patches will support the kernel as well, reading the build id notes for it and the modules from /sys. Another patch should also introduce a new plumbing command: 'perf list-buildids' that will then be used in porcelain that is distro specific to fetch -debuginfo packages where such buildids are present. This will in turn allow for one to run 'perf record' in one machine and 'perf report' in another. Future work on having the buildid sent directly from the kernel in the PERF_RECORD_MMAP event is needed to close races, as the DSO can be changed during a 'perf record' session, but this patch at least helps with non-corner cases and current/older kernels. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Jim Keniston <jkenisto@us.ibm.com> Cc: K. Prasad <prasad@linux.vnet.ibm.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roland McGrath <roland@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Steven Rostedt <rostedt@goodmis.org> LKML-Reference: <1257367843-26224-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-04 20:50:43 +00:00
{
struct build_id_event bev;
char filename[PATH_MAX];
perf tools: Bring linear set of section headers for features Build a set of section headers for features right after the datas. Each implemented feature will have one of such section header that provides the offset and the size of the data manipulated by the feature. The trace informations have moved after the data and are recorded on exit time. The new layout is as follows: ----------------------- ___ [ magic ] | [ header size ] | [ attr size ] | [ attr content offset ] | [ attr content size ] | [ data offset ] File Headers [ data size ] | [ event_types offset ] | [ event_types size ] | [ feature bitmap ] v [ attr section ] [ events section ] ___ [ X ] | [ X ] | [ X ] Datas [ X ] | [ X ] v ___ [ Feature 1 offset ] | [ Feature 1 size ] Features headers [ Feature 2 offset ] | [ Feature 2 size ] v [ Feature 1 content ] [ Feature 2 content ] ----------------------- We have as many feature's section headers as we have features in use for the current file. Say Feat 1 and Feat 3 are used by the file, but not Feat 2. Then the feature headers will be like follows: [ Feature 1 offset ] | [ Feature 1 size ] Features headers [ Feature 3 offset ] | [ Feature 3 size ] v There is no hole to cover Feature 2 that is not in use here. We only need to cover the needed headers in order, from the lowest feature bit to the highest. Currently we have two features: HEADER_TRACE_INFO and HEADER_BUILD_ID. Both have their contents that follow the feature headers. Putting the contents right after the feature headers is not mandatory though. While we keep the feature headers right after the data and in order, their offsets can point everywhere. We have just put the two above feature contents in the end of the file for convenience. The purpose of this layout change is to have a file format that scales while keeping it simple: having such linear feature headers is less error prone wrt forward/backward compatibility as the content of a feature can be put anywhere, its location can even change by the time, it's fine because its headers will tell where it is. And we know how to find these headers, following the above rules. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> LKML-Reference: <1257911467-28276-6-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-11 03:51:07 +00:00
off_t limit = offset + size;
perf symbols: Use the buildids if present With this change 'perf record' will intercept PERF_RECORD_MMAP calls, creating a linked list of DSOs, then when the session finishes, it will traverse this list and read the buildids, stashing them at the end of the file and will set up a new feature bit in the header bitmask. 'perf report' will then notice this feature and populate the 'dsos' list and set the build ids. When reading the symtabs it will refuse to load from a file that doesn't have the same build id. This improves the reliability of the profiler output, as symbols and profiling data is more guaranteed to match. Example: [root@doppio ~]# perf report | head /home/acme/bin/perf with build id b1ea544ac3746e7538972548a09aadecc5753868 not found, continuing without symbols # Samples: 2621434559 # # Overhead Command Shared Object Symbol # ........ ............... ............................. ...... # 7.91% init [kernel] [k] read_hpet 7.64% init [kernel] [k] mwait_idle_with_hints 7.60% swapper [kernel] [k] read_hpet 7.60% swapper [kernel] [k] mwait_idle_with_hints 3.65% init [kernel] [k] 0xffffffffa02339d9 [root@doppio ~]# In this case the 'perf' binary was an older one, vanished, so its symbols probably wouldn't match or would cause subtly different (and misleading) output. Next patches will support the kernel as well, reading the build id notes for it and the modules from /sys. Another patch should also introduce a new plumbing command: 'perf list-buildids' that will then be used in porcelain that is distro specific to fetch -debuginfo packages where such buildids are present. This will in turn allow for one to run 'perf record' in one machine and 'perf report' in another. Future work on having the buildid sent directly from the kernel in the PERF_RECORD_MMAP event is needed to close races, as the DSO can be changed during a 'perf record' session, but this patch at least helps with non-corner cases and current/older kernels. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Jim Keniston <jkenisto@us.ibm.com> Cc: K. Prasad <prasad@linux.vnet.ibm.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roland McGrath <roland@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Steven Rostedt <rostedt@goodmis.org> LKML-Reference: <1257367843-26224-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-04 20:50:43 +00:00
int err = -1;
perf tools: Bring linear set of section headers for features Build a set of section headers for features right after the datas. Each implemented feature will have one of such section header that provides the offset and the size of the data manipulated by the feature. The trace informations have moved after the data and are recorded on exit time. The new layout is as follows: ----------------------- ___ [ magic ] | [ header size ] | [ attr size ] | [ attr content offset ] | [ attr content size ] | [ data offset ] File Headers [ data size ] | [ event_types offset ] | [ event_types size ] | [ feature bitmap ] v [ attr section ] [ events section ] ___ [ X ] | [ X ] | [ X ] Datas [ X ] | [ X ] v ___ [ Feature 1 offset ] | [ Feature 1 size ] Features headers [ Feature 2 offset ] | [ Feature 2 size ] v [ Feature 1 content ] [ Feature 2 content ] ----------------------- We have as many feature's section headers as we have features in use for the current file. Say Feat 1 and Feat 3 are used by the file, but not Feat 2. Then the feature headers will be like follows: [ Feature 1 offset ] | [ Feature 1 size ] Features headers [ Feature 3 offset ] | [ Feature 3 size ] v There is no hole to cover Feature 2 that is not in use here. We only need to cover the needed headers in order, from the lowest feature bit to the highest. Currently we have two features: HEADER_TRACE_INFO and HEADER_BUILD_ID. Both have their contents that follow the feature headers. Putting the contents right after the feature headers is not mandatory though. While we keep the feature headers right after the data and in order, their offsets can point everywhere. We have just put the two above feature contents in the end of the file for convenience. The purpose of this layout change is to have a file format that scales while keeping it simple: having such linear feature headers is less error prone wrt forward/backward compatibility as the content of a feature can be put anywhere, its location can even change by the time, it's fine because its headers will tell where it is. And we know how to find these headers, following the above rules. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> LKML-Reference: <1257911467-28276-6-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-11 03:51:07 +00:00
while (offset < limit) {
perf symbols: Use the buildids if present With this change 'perf record' will intercept PERF_RECORD_MMAP calls, creating a linked list of DSOs, then when the session finishes, it will traverse this list and read the buildids, stashing them at the end of the file and will set up a new feature bit in the header bitmask. 'perf report' will then notice this feature and populate the 'dsos' list and set the build ids. When reading the symtabs it will refuse to load from a file that doesn't have the same build id. This improves the reliability of the profiler output, as symbols and profiling data is more guaranteed to match. Example: [root@doppio ~]# perf report | head /home/acme/bin/perf with build id b1ea544ac3746e7538972548a09aadecc5753868 not found, continuing without symbols # Samples: 2621434559 # # Overhead Command Shared Object Symbol # ........ ............... ............................. ...... # 7.91% init [kernel] [k] read_hpet 7.64% init [kernel] [k] mwait_idle_with_hints 7.60% swapper [kernel] [k] read_hpet 7.60% swapper [kernel] [k] mwait_idle_with_hints 3.65% init [kernel] [k] 0xffffffffa02339d9 [root@doppio ~]# In this case the 'perf' binary was an older one, vanished, so its symbols probably wouldn't match or would cause subtly different (and misleading) output. Next patches will support the kernel as well, reading the build id notes for it and the modules from /sys. Another patch should also introduce a new plumbing command: 'perf list-buildids' that will then be used in porcelain that is distro specific to fetch -debuginfo packages where such buildids are present. This will in turn allow for one to run 'perf record' in one machine and 'perf report' in another. Future work on having the buildid sent directly from the kernel in the PERF_RECORD_MMAP event is needed to close races, as the DSO can be changed during a 'perf record' session, but this patch at least helps with non-corner cases and current/older kernels. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Jim Keniston <jkenisto@us.ibm.com> Cc: K. Prasad <prasad@linux.vnet.ibm.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roland McGrath <roland@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Steven Rostedt <rostedt@goodmis.org> LKML-Reference: <1257367843-26224-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-04 20:50:43 +00:00
struct dso *dso;
ssize_t len;
if (read(input, &bev, sizeof(bev)) != sizeof(bev))
goto out;
len = bev.header.size - sizeof(bev);
if (read(input, filename, len) != len)
goto out;
dso = dsos__findnew(filename);
if (dso != NULL)
dso__set_build_id(dso, &bev.build_id);
offset += bev.header.size;
}
err = 0;
out:
return err;
}
int mmap_dispatch_perf_file(struct perf_header **pheader,
const char *input_name,
int force,
int full_paths,
int *cwdlen,
char **cwd)
{
int err;
struct perf_header *header;
unsigned long head, shift;
unsigned long offset = 0;
struct stat input_stat;
size_t page_size;
u64 sample_type;
event_t *event;
uint32_t size;
int input;
char *buf;
if (curr_handler == NULL) {
pr_debug("Forgot to register perf file handler\n");
return -EINVAL;
}
page_size = getpagesize();
input = open(input_name, O_RDONLY);
if (input < 0) {
pr_err("Failed to open file: %s", input_name);
if (!strcmp(input_name, "perf.data"))
pr_err(" (try 'perf record' first)");
pr_err("\n");
return -errno;
}
if (fstat(input, &input_stat) < 0) {
pr_err("failed to stat file");
err = -errno;
goto out_close;
}
err = -EACCES;
if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
pr_err("file: %s not owned by current user or root\n",
input_name);
goto out_close;
}
if (input_stat.st_size == 0) {
pr_info("zero-sized file, nothing to do!\n");
goto done;
}
err = -ENOMEM;
header = perf_header__new();
if (header == NULL)
goto out_close;
err = perf_header__read(header, input);
if (err < 0)
goto out_delete;
*pheader = header;
head = header->data_offset;
sample_type = perf_header__sample_type(header);
err = -EINVAL;
if (curr_handler->sample_type_check &&
curr_handler->sample_type_check(sample_type) < 0)
goto out_delete;
if (!full_paths) {
if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
pr_err("failed to get the current directory\n");
err = -errno;
goto out_delete;
}
*cwd = __cwd;
*cwdlen = strlen(*cwd);
} else {
*cwd = NULL;
*cwdlen = 0;
}
shift = page_size * (head / page_size);
offset += shift;
head -= shift;
remap:
buf = mmap(NULL, page_size * mmap_window, PROT_READ,
MAP_SHARED, input, offset);
if (buf == MAP_FAILED) {
pr_err("failed to mmap file\n");
err = -errno;
goto out_delete;
}
more:
event = (event_t *)(buf + head);
size = event->header.size;
if (!size)
size = 8;
if (head + event->header.size >= page_size * mmap_window) {
int munmap_ret;
shift = page_size * (head / page_size);
munmap_ret = munmap(buf, page_size * mmap_window);
assert(munmap_ret == 0);
offset += shift;
head -= shift;
goto remap;
}
size = event->header.size;
dump_printf("\n%p [%p]: event: %d\n",
(void *)(offset + head),
(void *)(long)event->header.size,
event->header.type);
if (!size || process_event(event, offset, head) < 0) {
dump_printf("%p [%p]: skipping unknown header type: %d\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->header.type);
/*
* assume we lost track of the stream, check alignment, and
* increment a single u64 in the hope to catch on again 'soon'.
*/
if (unlikely(head & 7))
head &= ~7ULL;
size = 8;
}
head += size;
if (offset + head >= header->data_offset + header->data_size)
goto done;
if (offset + head < (unsigned long)input_stat.st_size)
goto more;
done:
err = 0;
out_close:
close(input);
return err;
out_delete:
perf_header__delete(header);
goto out_close;
}