perf data: Allow to use stdio functions for pipe mode
When perf data is in a pipe, it reads each event separately using
read(2) syscall. This is a huge performance bottleneck when
processing large data like in perf inject. Also perf inject needs to
use write(2) syscall for the output.
So convert it to use buffer I/O functions in stdio library for pipe
data. This makes inject-build-id bench time drops from 20ms to 8ms.
$ perf bench internals inject-build-id
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 8.074 msec (+- 0.013 msec)
Average time per event: 0.792 usec (+- 0.001 usec)
Average memory usage: 8328 KB (+- 0 KB)
Average build-id-all injection took: 5.490 msec (+- 0.008 msec)
Average time per event: 0.538 usec (+- 0.001 usec)
Average memory usage: 7563 KB (+- 0 KB)
This patch enables it just for perf inject when used with pipe (it's a
default behavior). Maybe we could do it for perf record and/or report
later..
Committer testing:
Before:
$ perf stat -r 5 perf bench internals inject-build-id
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 13.605 msec (+- 0.064 msec)
Average time per event: 1.334 usec (+- 0.006 usec)
Average memory usage: 12220 KB (+- 7 KB)
Average build-id-all injection took: 11.458 msec (+- 0.058 msec)
Average time per event: 1.123 usec (+- 0.006 usec)
Average memory usage: 11546 KB (+- 8 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 13.673 msec (+- 0.057 msec)
Average time per event: 1.341 usec (+- 0.006 usec)
Average memory usage: 12508 KB (+- 8 KB)
Average build-id-all injection took: 11.437 msec (+- 0.046 msec)
Average time per event: 1.121 usec (+- 0.004 usec)
Average memory usage: 11812 KB (+- 7 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 13.641 msec (+- 0.069 msec)
Average time per event: 1.337 usec (+- 0.007 usec)
Average memory usage: 12302 KB (+- 8 KB)
Average build-id-all injection took: 10.820 msec (+- 0.106 msec)
Average time per event: 1.061 usec (+- 0.010 usec)
Average memory usage: 11616 KB (+- 7 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 13.379 msec (+- 0.074 msec)
Average time per event: 1.312 usec (+- 0.007 usec)
Average memory usage: 12334 KB (+- 8 KB)
Average build-id-all injection took: 11.288 msec (+- 0.071 msec)
Average time per event: 1.107 usec (+- 0.007 usec)
Average memory usage: 11657 KB (+- 8 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 13.534 msec (+- 0.058 msec)
Average time per event: 1.327 usec (+- 0.006 usec)
Average memory usage: 12264 KB (+- 8 KB)
Average build-id-all injection took: 11.557 msec (+- 0.076 msec)
Average time per event: 1.133 usec (+- 0.007 usec)
Average memory usage: 11593 KB (+- 8 KB)
Performance counter stats for 'perf bench internals inject-build-id' (5 runs):
4,060.05 msec task-clock:u # 1.566 CPUs utilized ( +- 0.65% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
101,888 page-faults:u # 0.025 M/sec ( +- 0.12% )
3,745,833,163 cycles:u # 0.923 GHz ( +- 0.10% ) (83.22%)
194,346,613 stalled-cycles-frontend:u # 5.19% frontend cycles idle ( +- 0.57% ) (83.30%)
708,495,034 stalled-cycles-backend:u # 18.91% backend cycles idle ( +- 0.48% ) (83.48%)
5,629,328,628 instructions:u # 1.50 insn per cycle
# 0.13 stalled cycles per insn ( +- 0.21% ) (83.57%)
1,236,697,927 branches:u # 304.602 M/sec ( +- 0.16% ) (83.44%)
17,564,877 branch-misses:u # 1.42% of all branches ( +- 0.23% ) (82.99%)
2.5934 +- 0.0128 seconds time elapsed ( +- 0.49% )
$
After:
$ perf stat -r 5 perf bench internals inject-build-id
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 8.560 msec (+- 0.125 msec)
Average time per event: 0.839 usec (+- 0.012 usec)
Average memory usage: 12520 KB (+- 8 KB)
Average build-id-all injection took: 5.789 msec (+- 0.054 msec)
Average time per event: 0.568 usec (+- 0.005 usec)
Average memory usage: 11919 KB (+- 9 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 8.639 msec (+- 0.111 msec)
Average time per event: 0.847 usec (+- 0.011 usec)
Average memory usage: 12732 KB (+- 8 KB)
Average build-id-all injection took: 5.647 msec (+- 0.069 msec)
Average time per event: 0.554 usec (+- 0.007 usec)
Average memory usage: 12093 KB (+- 7 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 8.551 msec (+- 0.096 msec)
Average time per event: 0.838 usec (+- 0.009 usec)
Average memory usage: 12739 KB (+- 8 KB)
Average build-id-all injection took: 5.617 msec (+- 0.061 msec)
Average time per event: 0.551 usec (+- 0.006 usec)
Average memory usage: 12105 KB (+- 7 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 8.403 msec (+- 0.097 msec)
Average time per event: 0.824 usec (+- 0.010 usec)
Average memory usage: 12770 KB (+- 8 KB)
Average build-id-all injection took: 5.611 msec (+- 0.085 msec)
Average time per event: 0.550 usec (+- 0.008 usec)
Average memory usage: 12134 KB (+- 8 KB)
# Running 'internals/inject-build-id' benchmark:
Average build-id injection took: 8.518 msec (+- 0.102 msec)
Average time per event: 0.835 usec (+- 0.010 usec)
Average memory usage: 12518 KB (+- 10 KB)
Average build-id-all injection took: 5.503 msec (+- 0.073 msec)
Average time per event: 0.540 usec (+- 0.007 usec)
Average memory usage: 11882 KB (+- 8 KB)
Performance counter stats for 'perf bench internals inject-build-id' (5 runs):
2,394.88 msec task-clock:u # 1.577 CPUs utilized ( +- 0.83% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
103,181 page-faults:u # 0.043 M/sec ( +- 0.11% )
3,548,172,030 cycles:u # 1.482 GHz ( +- 0.30% ) (83.26%)
81,537,700 stalled-cycles-frontend:u # 2.30% frontend cycles idle ( +- 1.54% ) (83.24%)
876,631,544 stalled-cycles-backend:u # 24.71% backend cycles idle ( +- 1.14% ) (83.45%)
5,960,361,707 instructions:u # 1.68 insn per cycle
# 0.15 stalled cycles per insn ( +- 0.27% ) (83.26%)
1,269,413,491 branches:u # 530.054 M/sec ( +- 0.10% ) (83.48%)
11,372,453 branch-misses:u # 0.90% of all branches ( +- 0.52% ) (83.31%)
1.51874 +- 0.00642 seconds time elapsed ( +- 0.42% )
$
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201030054742.87740-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
committed by
Arnaldo Carvalho de Melo
parent
0a04244cab
commit
601366678c
@@ -853,10 +853,12 @@ int cmd_inject(int argc, const char **argv)
|
|||||||
.output = {
|
.output = {
|
||||||
.path = "-",
|
.path = "-",
|
||||||
.mode = PERF_DATA_MODE_WRITE,
|
.mode = PERF_DATA_MODE_WRITE,
|
||||||
|
.use_stdio = true,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
struct perf_data data = {
|
struct perf_data data = {
|
||||||
.mode = PERF_DATA_MODE_READ,
|
.mode = PERF_DATA_MODE_READ,
|
||||||
|
.use_stdio = true,
|
||||||
};
|
};
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
|||||||
@@ -174,8 +174,21 @@ static bool check_pipe(struct perf_data *data)
|
|||||||
is_pipe = true;
|
is_pipe = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_pipe)
|
if (is_pipe) {
|
||||||
data->file.fd = fd;
|
if (data->use_stdio) {
|
||||||
|
const char *mode;
|
||||||
|
|
||||||
|
mode = perf_data__is_read(data) ? "r" : "w";
|
||||||
|
data->file.fptr = fdopen(fd, mode);
|
||||||
|
|
||||||
|
if (data->file.fptr == NULL) {
|
||||||
|
data->file.fd = fd;
|
||||||
|
data->use_stdio = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
data->file.fd = fd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return data->is_pipe = is_pipe;
|
return data->is_pipe = is_pipe;
|
||||||
}
|
}
|
||||||
@@ -334,6 +347,9 @@ int perf_data__open(struct perf_data *data)
|
|||||||
if (check_pipe(data))
|
if (check_pipe(data))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/* currently it allows stdio for pipe only */
|
||||||
|
data->use_stdio = false;
|
||||||
|
|
||||||
if (!data->path)
|
if (!data->path)
|
||||||
data->path = "perf.data";
|
data->path = "perf.data";
|
||||||
|
|
||||||
@@ -353,7 +369,21 @@ void perf_data__close(struct perf_data *data)
|
|||||||
perf_data__close_dir(data);
|
perf_data__close_dir(data);
|
||||||
|
|
||||||
zfree(&data->file.path);
|
zfree(&data->file.path);
|
||||||
close(data->file.fd);
|
|
||||||
|
if (data->use_stdio)
|
||||||
|
fclose(data->file.fptr);
|
||||||
|
else
|
||||||
|
close(data->file.fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
|
||||||
|
{
|
||||||
|
if (data->use_stdio) {
|
||||||
|
if (fread(buf, size, 1, data->file.fptr) == 1)
|
||||||
|
return size;
|
||||||
|
return feof(data->file.fptr) ? 0 : -1;
|
||||||
|
}
|
||||||
|
return readn(data->file.fd, buf, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t perf_data_file__write(struct perf_data_file *file,
|
ssize_t perf_data_file__write(struct perf_data_file *file,
|
||||||
@@ -365,6 +395,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
|
|||||||
ssize_t perf_data__write(struct perf_data *data,
|
ssize_t perf_data__write(struct perf_data *data,
|
||||||
void *buf, size_t size)
|
void *buf, size_t size)
|
||||||
{
|
{
|
||||||
|
if (data->use_stdio) {
|
||||||
|
if (fwrite(buf, size, 1, data->file.fptr) == 1)
|
||||||
|
return size;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return perf_data_file__write(&data->file, buf, size);
|
return perf_data_file__write(&data->file, buf, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
#ifndef __PERF_DATA_H
|
#ifndef __PERF_DATA_H
|
||||||
#define __PERF_DATA_H
|
#define __PERF_DATA_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
enum perf_data_mode {
|
enum perf_data_mode {
|
||||||
@@ -16,7 +17,10 @@ enum perf_dir_version {
|
|||||||
|
|
||||||
struct perf_data_file {
|
struct perf_data_file {
|
||||||
char *path;
|
char *path;
|
||||||
int fd;
|
union {
|
||||||
|
int fd;
|
||||||
|
FILE *fptr;
|
||||||
|
};
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -26,6 +30,7 @@ struct perf_data {
|
|||||||
bool is_pipe;
|
bool is_pipe;
|
||||||
bool is_dir;
|
bool is_dir;
|
||||||
bool force;
|
bool force;
|
||||||
|
bool use_stdio;
|
||||||
enum perf_data_mode mode;
|
enum perf_data_mode mode;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
@@ -62,11 +67,15 @@ static inline bool perf_data__is_single_file(struct perf_data *data)
|
|||||||
|
|
||||||
static inline int perf_data__fd(struct perf_data *data)
|
static inline int perf_data__fd(struct perf_data *data)
|
||||||
{
|
{
|
||||||
|
if (data->use_stdio)
|
||||||
|
return fileno(data->file.fptr);
|
||||||
|
|
||||||
return data->file.fd;
|
return data->file.fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
int perf_data__open(struct perf_data *data);
|
int perf_data__open(struct perf_data *data);
|
||||||
void perf_data__close(struct perf_data *data);
|
void perf_data__close(struct perf_data *data);
|
||||||
|
ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
|
||||||
ssize_t perf_data__write(struct perf_data *data,
|
ssize_t perf_data__write(struct perf_data *data,
|
||||||
void *buf, size_t size);
|
void *buf, size_t size);
|
||||||
ssize_t perf_data_file__write(struct perf_data_file *file,
|
ssize_t perf_data_file__write(struct perf_data_file *file,
|
||||||
|
|||||||
@@ -3647,7 +3647,8 @@ static int perf_file_section__process(struct perf_file_section *section,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
|
static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
|
||||||
struct perf_header *ph, int fd,
|
struct perf_header *ph,
|
||||||
|
struct perf_data* data,
|
||||||
bool repipe)
|
bool repipe)
|
||||||
{
|
{
|
||||||
struct feat_fd ff = {
|
struct feat_fd ff = {
|
||||||
@@ -3656,7 +3657,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
|
|||||||
};
|
};
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
ret = readn(fd, header, sizeof(*header));
|
ret = perf_data__read(data, header, sizeof(*header));
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -3679,8 +3680,7 @@ static int perf_header__read_pipe(struct perf_session *session)
|
|||||||
struct perf_header *header = &session->header;
|
struct perf_header *header = &session->header;
|
||||||
struct perf_pipe_file_header f_header;
|
struct perf_pipe_file_header f_header;
|
||||||
|
|
||||||
if (perf_file_header__read_pipe(&f_header, header,
|
if (perf_file_header__read_pipe(&f_header, header, session->data,
|
||||||
perf_data__fd(session->data),
|
|
||||||
session->repipe) < 0) {
|
session->repipe) < 0) {
|
||||||
pr_debug("incompatible file format\n");
|
pr_debug("incompatible file format\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|||||||
@@ -1937,7 +1937,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
|
|||||||
{
|
{
|
||||||
struct ordered_events *oe = &session->ordered_events;
|
struct ordered_events *oe = &session->ordered_events;
|
||||||
struct perf_tool *tool = session->tool;
|
struct perf_tool *tool = session->tool;
|
||||||
int fd = perf_data__fd(session->data);
|
|
||||||
union perf_event *event;
|
union perf_event *event;
|
||||||
uint32_t size, cur_size = 0;
|
uint32_t size, cur_size = 0;
|
||||||
void *buf = NULL;
|
void *buf = NULL;
|
||||||
@@ -1957,7 +1956,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
|
|||||||
ordered_events__set_copy_on_queue(oe, true);
|
ordered_events__set_copy_on_queue(oe, true);
|
||||||
more:
|
more:
|
||||||
event = buf;
|
event = buf;
|
||||||
err = readn(fd, event, sizeof(struct perf_event_header));
|
err = perf_data__read(session->data, event,
|
||||||
|
sizeof(struct perf_event_header));
|
||||||
if (err <= 0) {
|
if (err <= 0) {
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
goto done;
|
goto done;
|
||||||
@@ -1989,7 +1989,8 @@ more:
|
|||||||
p += sizeof(struct perf_event_header);
|
p += sizeof(struct perf_event_header);
|
||||||
|
|
||||||
if (size - sizeof(struct perf_event_header)) {
|
if (size - sizeof(struct perf_event_header)) {
|
||||||
err = readn(fd, p, size - sizeof(struct perf_event_header));
|
err = perf_data__read(session->data, p,
|
||||||
|
size - sizeof(struct perf_event_header));
|
||||||
if (err <= 0) {
|
if (err <= 0) {
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
pr_err("unexpected end of event stream\n");
|
pr_err("unexpected end of event stream\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user