perf events: Precalculate the header space for PERF_SAMPLE_ fields

PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others
can be precalculated, reducing a bit the per sample cost.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Arnaldo Carvalho de Melo 2010-10-20 12:50:11 -02:00
parent 068ffaa8bf
commit c320c7b7d3
2 changed files with 93 additions and 59 deletions

View File

@ -758,6 +758,8 @@ struct perf_event {
u64 shadow_ctx_time;
struct perf_event_attr attr;
u16 header_size;
u16 read_size;
struct hw_perf_event hw;
struct perf_event_context *ctx;

View File

@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_stat++;
}
/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
static void perf_event__read_size(struct perf_event *event)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_GROUP) {
nr += event->group_leader->nr_siblings;
size += sizeof(u64);
}
size += entry * nr;
event->read_size = size;
}
static void perf_event__header_size(struct perf_event *event)
{
struct perf_sample_data *data;
u64 sample_type = event->attr.sample_type;
u16 size = 0;
perf_event__read_size(event);
if (sample_type & PERF_SAMPLE_IP)
size += sizeof(data->ip);
if (sample_type & PERF_SAMPLE_TID)
size += sizeof(data->tid_entry);
if (sample_type & PERF_SAMPLE_TIME)
size += sizeof(data->time);
if (sample_type & PERF_SAMPLE_ADDR)
size += sizeof(data->addr);
if (sample_type & PERF_SAMPLE_ID)
size += sizeof(data->id);
if (sample_type & PERF_SAMPLE_STREAM_ID)
size += sizeof(data->stream_id);
if (sample_type & PERF_SAMPLE_CPU)
size += sizeof(data->cpu_entry);
if (sample_type & PERF_SAMPLE_PERIOD)
size += sizeof(data->period);
if (sample_type & PERF_SAMPLE_READ)
size += event->read_size;
event->header_size = size;
}
static void perf_group_attach(struct perf_event *event)
{
struct perf_event *group_leader = event->group_leader;
struct perf_event *group_leader = event->group_leader, *pos;
/*
* We can have double attach due to group movement in perf_event_open.
@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
perf_event__header_size(group_leader);
list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
perf_event__header_size(pos);
}
/*
@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
if (event->group_leader != event) {
list_del_init(&event->group_entry);
event->group_leader->nr_siblings--;
return;
goto out;
}
if (!list_empty(&event->group_entry))
@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
/* Inherit group flags from the previous leader */
sibling->group_flags = event->group_flags;
}
out:
perf_event__header_size(event->group_leader);
list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
perf_event__header_size(tmp);
}
static inline int
@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
return perf_event_release_kernel(event);
}
static int perf_event_read_size(struct perf_event *event)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_GROUP) {
nr += event->group_leader->nr_siblings;
size += sizeof(u64);
}
size += entry * nr;
return size;
}
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
struct perf_event *child;
@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
if (event->state == PERF_EVENT_STATE_ERROR)
return 0;
if (count < perf_event_read_size(event))
if (count < event->read_size)
return -ENOSPC;
WARN_ON_ONCE(event->ctx->parent_ctx);
@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
data->type = sample_type;
header->type = PERF_RECORD_SAMPLE;
header->size = sizeof(*header);
header->size = sizeof(*header) + event->header_size;
header->misc = 0;
header->misc |= perf_misc_flags(regs);
if (sample_type & PERF_SAMPLE_IP) {
if (sample_type & PERF_SAMPLE_IP)
data->ip = perf_instruction_pointer(regs);
header->size += sizeof(data->ip);
}
if (sample_type & PERF_SAMPLE_TID) {
/* namespace issues */
data->tid_entry.pid = perf_event_pid(event, current);
data->tid_entry.tid = perf_event_tid(event, current);
header->size += sizeof(data->tid_entry);
}
if (sample_type & PERF_SAMPLE_TIME) {
if (sample_type & PERF_SAMPLE_TIME)
data->time = perf_clock();
header->size += sizeof(data->time);
}
if (sample_type & PERF_SAMPLE_ADDR)
header->size += sizeof(data->addr);
if (sample_type & PERF_SAMPLE_ID) {
if (sample_type & PERF_SAMPLE_ID)
data->id = primary_event_id(event);
header->size += sizeof(data->id);
}
if (sample_type & PERF_SAMPLE_STREAM_ID) {
if (sample_type & PERF_SAMPLE_STREAM_ID)
data->stream_id = event->id;
header->size += sizeof(data->stream_id);
}
if (sample_type & PERF_SAMPLE_CPU) {
data->cpu_entry.cpu = raw_smp_processor_id();
data->cpu_entry.reserved = 0;
header->size += sizeof(data->cpu_entry);
}
if (sample_type & PERF_SAMPLE_PERIOD)
header->size += sizeof(data->period);
if (sample_type & PERF_SAMPLE_READ)
header->size += perf_event_read_size(event);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
.header = {
.type = PERF_RECORD_READ,
.misc = 0,
.size = sizeof(read_event) + perf_event_read_size(event),
.size = sizeof(read_event) + event->read_size,
},
.pid = perf_event_pid(event, task),
.tid = perf_event_tid(event, task),
@ -5714,6 +5741,11 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
/*
* Precalculate sample_data sizes
*/
perf_event__header_size(event);
/*
* Drop the reference on the group_event after placing the
* new event on the sibling_list. This ensures destruction