linux/tools/perf/util/session.h
Frederic Weisbecker d6b17bebd7 perf: Provide a new deterministic events reordering algorithm
The current events reordering algorithm is based on a heuristic that
gets broken once we deal with a very fast flow of events.

Indeed the time period based flushing is not suitable anymore
in the following case, assuming we have a flush period of two
seconds.

    CPU 0           |        CPU 1
                    |
  cnt1 timestamps   |      cnt1 timestamps
                    |
    0               |         0
    1               |         1
    2               |         2
    3               |         3
    [...]           |        [...]
    4 seconds later

If we spend too much time to read the buffers (case of a lot of
events to record in each buffers or when we have a lot of CPU buffers
to read), in the next pass the CPU 0 buffer could contain a slice
of several seconds of events. We'll read them all and notice we've
reached the period to flush. In the above example we flush the first
half of the CPU 0 buffer, then we read the CPU 1 buffer where we
have events that were on the flush slice and then the reordering
fails.

It's simple to reproduce with:

	perf lock record perf bench sched messaging

To solve this, we use a new solution that doesn't rely on an
heuristical time slice period anymore but on a deterministic basis
based on how perf record does its job.

perf record saves the buffers through passes. A pass is a tour
on every buffers from every CPUs. This is made in order: for
each CPU we read the buffers of every counters. So the more
buffers we visit, the later will be the timstamps of their events.

When perf record finishes a pass it records a
PERF_RECORD_FINISHED_ROUND pseudo event.
We record the max timestamp t found in the pass n. Assuming these
timestamps are monotonic across cpus, we know that if a buffer
still has events with timestamps below t, they will be all available
and then read in the pass n + 1.
Hence when we start to read the pass n + 2, we can safely flush every
events with timestamps below t.

      ============ PASS n =================
         CPU 0         |   CPU 1
                       |
      cnt1 timestamps  |   cnt2 timestamps
            1          |         2
            2          |         3
            -          |         4  <--- max recorded

      ============ PASS n + 1 ==============
         CPU 0         |   CPU 1
                       |
      cnt1 timestamps  |   cnt2 timestamps
            3          |         5
            4          |         6
            5          |         7 <---- max recorded

        Flush every events below timestamp 4

      ============ PASS n + 2 ==============
         CPU 0         |   CPU 1
                       |
      cnt1 timestamps  |   cnt2 timestamps
            6          |         8
            7          |         9
            -          |         10

        Flush every events below timestamp 7
        etc...

It also works on perf.data versions that don't have
PERF_RECORD_FINISHED_ROUND pseudo events. The difference is that
the events will be only flushed in the end of the perf.data
processing. It will then consume more memory and scale less with
large perf.data files.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
2010-05-09 13:43:42 +02:00

151 lines
3.7 KiB
C

#ifndef __PERF_SESSION_H
#define __PERF_SESSION_H
#include "event.h"
#include "header.h"
#include "symbol.h"
#include "thread.h"
#include <linux/rbtree.h>
#include "../../../include/linux/perf_event.h"
struct sample_queue;
struct ip_callchain;
struct thread;
struct ordered_samples {
u64 last_flush;
u64 next_flush;
u64 max_timestamp;
struct list_head samples_head;
struct sample_queue *last_inserted;
};
struct perf_session {
struct perf_header header;
unsigned long size;
unsigned long mmap_window;
struct rb_root threads;
struct thread *last_match;
struct rb_root machines;
struct events_stats events_stats;
struct rb_root stats_by_id;
unsigned long event_total[PERF_RECORD_MAX];
unsigned long unknown_events;
struct rb_root hists;
u64 sample_type;
int fd;
bool fd_pipe;
bool repipe;
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
char filename[0];
};
struct perf_event_ops;
typedef int (*event_op)(event_t *self, struct perf_session *session);
typedef int (*event_op2)(event_t *self, struct perf_session *session,
struct perf_event_ops *ops);
struct perf_event_ops {
event_op sample,
mmap,
comm,
fork,
exit,
lost,
read,
throttle,
unthrottle,
attr,
event_type,
tracing_data,
build_id;
event_op2 finished_round;
bool ordered_samples;
};
struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
void perf_session__delete(struct perf_session *self);
void perf_event_header__bswap(struct perf_event_header *self);
int __perf_session__process_events(struct perf_session *self,
u64 data_offset, u64 data_size, u64 size,
struct perf_event_ops *ops);
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
struct thread *thread,
struct ip_callchain *chain,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
const char *symbol_name,
u64 addr);
void mem_bswap_64(void *src, int byte_size);
int perf_session__create_kernel_maps(struct perf_session *self);
int do_read(int fd, void *buf, size_t size);
void perf_session__update_sample_type(struct perf_session *self);
#ifdef NO_NEWT_SUPPORT
static inline int perf_session__browse_hists(struct rb_root *hists __used,
u64 nr_hists __used,
u64 session_total __used,
const char *helpline __used,
const char *input_name __used)
{
return 0;
}
#else
int perf_session__browse_hists(struct rb_root *hists, u64 nr_hists,
u64 session_total, const char *helpline,
const char *input_name);
#endif
static inline
struct machine *perf_session__find_host_machine(struct perf_session *self)
{
return machines__find_host(&self->machines);
}
static inline
struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid)
{
return machines__find(&self->machines, pid);
}
static inline
struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid)
{
return machines__findnew(&self->machines, pid);
}
static inline
void perf_session__process_machines(struct perf_session *self,
machine__process_t process)
{
return machines__process(&self->machines, process, self);
}
static inline
size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
return machines__fprintf_dsos(&self->machines, fp);
}
static inline
size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
bool with_hits)
{
return machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
#endif /* __PERF_SESSION_H */