forked from Minki/linux
Merge branch 'bpf-libbpf-perf-rb-api'
Andrii Nakryiko says: ==================== This patchset adds a high-level API for setting up and polling perf buffers associated with BPF_MAP_TYPE_PERF_EVENT_ARRAY map. Details of APIs are described in corresponding commit. Patch #1 adds a set of APIs to set up and work with perf buffer. Patch #2 enhances libbpf to support auto-setting PERF_EVENT_ARRAY map size. Patch #3 adds test. Patch #4 converts bpftool map event_pipe to new API. Patch #5 updates README to mention perf_buffer_ prefix. v6->v7: - __x64_ syscall prefix (Yonghong); v5->v6: - fix C99 for loop variable initialization usage (Yonghong); v4->v5: - initialize perf_buffer_raw_opts in bpftool map event_pipe (Jakub); - add perf_buffer_ to README; v3->v4: - fixed bpftool event_pipe cmd error handling (Jakub); v2->v3: - added perf_buffer__new_raw for more low-level control; - converted bpftool map event_pipe to new API (Daniel); - fixed bug with error handling in create_maps (Song); v1->v2: - add auto-sizing of PERF_EVENT_ARRAY maps; ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
commit
d2850ce0bd
@ -28,7 +28,7 @@
|
||||
|
||||
#define MMAP_PAGE_CNT 16
|
||||
|
||||
static bool stop;
|
||||
static volatile bool stop;
|
||||
|
||||
struct event_ring_info {
|
||||
int fd;
|
||||
@ -44,32 +44,44 @@ struct perf_event_sample {
|
||||
unsigned char data[];
|
||||
};
|
||||
|
||||
struct perf_event_lost {
|
||||
struct perf_event_header header;
|
||||
__u64 id;
|
||||
__u64 lost;
|
||||
};
|
||||
|
||||
static void int_exit(int signo)
|
||||
{
|
||||
fprintf(stderr, "Stopping...\n");
|
||||
stop = true;
|
||||
}
|
||||
|
||||
struct event_pipe_ctx {
|
||||
bool all_cpus;
|
||||
int cpu;
|
||||
int idx;
|
||||
};
|
||||
|
||||
static enum bpf_perf_event_ret
|
||||
print_bpf_output(struct perf_event_header *event, void *private_data)
|
||||
print_bpf_output(void *private_data, int cpu, struct perf_event_header *event)
|
||||
{
|
||||
struct perf_event_sample *e = container_of(event, struct perf_event_sample,
|
||||
struct perf_event_sample *e = container_of(event,
|
||||
struct perf_event_sample,
|
||||
header);
|
||||
struct event_ring_info *ring = private_data;
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
__u64 id;
|
||||
__u64 lost;
|
||||
} *lost = (typeof(lost))event;
|
||||
struct perf_event_lost *lost = container_of(event,
|
||||
struct perf_event_lost,
|
||||
header);
|
||||
struct event_pipe_ctx *ctx = private_data;
|
||||
int idx = ctx->all_cpus ? cpu : ctx->idx;
|
||||
|
||||
if (json_output) {
|
||||
jsonw_start_object(json_wtr);
|
||||
jsonw_name(json_wtr, "type");
|
||||
jsonw_uint(json_wtr, e->header.type);
|
||||
jsonw_name(json_wtr, "cpu");
|
||||
jsonw_uint(json_wtr, ring->cpu);
|
||||
jsonw_uint(json_wtr, cpu);
|
||||
jsonw_name(json_wtr, "index");
|
||||
jsonw_uint(json_wtr, ring->key);
|
||||
jsonw_uint(json_wtr, idx);
|
||||
if (e->header.type == PERF_RECORD_SAMPLE) {
|
||||
jsonw_name(json_wtr, "timestamp");
|
||||
jsonw_uint(json_wtr, e->time);
|
||||
@ -89,7 +101,7 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
|
||||
if (e->header.type == PERF_RECORD_SAMPLE) {
|
||||
printf("== @%lld.%09lld CPU: %d index: %d =====\n",
|
||||
e->time / 1000000000ULL, e->time % 1000000000ULL,
|
||||
ring->cpu, ring->key);
|
||||
cpu, idx);
|
||||
fprint_hex(stdout, e->data, e->size, " ");
|
||||
printf("\n");
|
||||
} else if (e->header.type == PERF_RECORD_LOST) {
|
||||
@ -103,87 +115,25 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
|
||||
return LIBBPF_PERF_EVENT_CONT;
|
||||
}
|
||||
|
||||
static void
|
||||
perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
|
||||
int do_event_pipe(int argc, char **argv)
|
||||
{
|
||||
enum bpf_perf_event_ret ret;
|
||||
|
||||
ret = bpf_perf_event_read_simple(ring->mem,
|
||||
MMAP_PAGE_CNT * get_page_size(),
|
||||
get_page_size(), buf, buf_len,
|
||||
print_bpf_output, ring);
|
||||
if (ret != LIBBPF_PERF_EVENT_CONT) {
|
||||
fprintf(stderr, "perf read loop failed with %d\n", ret);
|
||||
stop = true;
|
||||
}
|
||||
}
|
||||
|
||||
static int perf_mmap_size(void)
|
||||
{
|
||||
return get_page_size() * (MMAP_PAGE_CNT + 1);
|
||||
}
|
||||
|
||||
static void *perf_event_mmap(int fd)
|
||||
{
|
||||
int mmap_size = perf_mmap_size();
|
||||
void *base;
|
||||
|
||||
base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (base == MAP_FAILED) {
|
||||
p_err("event mmap failed: %s\n", strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
static void perf_event_unmap(void *mem)
|
||||
{
|
||||
if (munmap(mem, perf_mmap_size()))
|
||||
fprintf(stderr, "Can't unmap ring memory!\n");
|
||||
}
|
||||
|
||||
static int bpf_perf_event_open(int map_fd, int key, int cpu)
|
||||
{
|
||||
struct perf_event_attr attr = {
|
||||
struct perf_event_attr perf_attr = {
|
||||
.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
|
||||
.type = PERF_TYPE_SOFTWARE,
|
||||
.config = PERF_COUNT_SW_BPF_OUTPUT,
|
||||
.sample_period = 1,
|
||||
.wakeup_events = 1,
|
||||
};
|
||||
int pmu_fd;
|
||||
|
||||
pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
|
||||
if (pmu_fd < 0) {
|
||||
p_err("failed to open perf event %d for CPU %d", key, cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
|
||||
p_err("failed to update map for event %d for CPU %d", key, cpu);
|
||||
goto err_close;
|
||||
}
|
||||
if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
|
||||
p_err("failed to enable event %d for CPU %d", key, cpu);
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
return pmu_fd;
|
||||
|
||||
err_close:
|
||||
close(pmu_fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int do_event_pipe(int argc, char **argv)
|
||||
{
|
||||
int i, nfds, map_fd, index = -1, cpu = -1;
|
||||
struct bpf_map_info map_info = {};
|
||||
struct event_ring_info *rings;
|
||||
size_t tmp_buf_sz = 0;
|
||||
void *tmp_buf = NULL;
|
||||
struct pollfd *pfds;
|
||||
struct perf_buffer_raw_opts opts = {};
|
||||
struct event_pipe_ctx ctx = {
|
||||
.all_cpus = true,
|
||||
.cpu = -1,
|
||||
.idx = -1,
|
||||
};
|
||||
struct perf_buffer *pb;
|
||||
__u32 map_info_len;
|
||||
bool do_all = true;
|
||||
int err, map_fd;
|
||||
|
||||
map_info_len = sizeof(map_info);
|
||||
map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
|
||||
@ -205,7 +155,7 @@ int do_event_pipe(int argc, char **argv)
|
||||
char *endptr;
|
||||
|
||||
NEXT_ARG();
|
||||
cpu = strtoul(*argv, &endptr, 0);
|
||||
ctx.cpu = strtoul(*argv, &endptr, 0);
|
||||
if (*endptr) {
|
||||
p_err("can't parse %s as CPU ID", **argv);
|
||||
goto err_close_map;
|
||||
@ -216,7 +166,7 @@ int do_event_pipe(int argc, char **argv)
|
||||
char *endptr;
|
||||
|
||||
NEXT_ARG();
|
||||
index = strtoul(*argv, &endptr, 0);
|
||||
ctx.idx = strtoul(*argv, &endptr, 0);
|
||||
if (*endptr) {
|
||||
p_err("can't parse %s as index", **argv);
|
||||
goto err_close_map;
|
||||
@ -228,45 +178,32 @@ int do_event_pipe(int argc, char **argv)
|
||||
goto err_close_map;
|
||||
}
|
||||
|
||||
do_all = false;
|
||||
ctx.all_cpus = false;
|
||||
}
|
||||
|
||||
if (!do_all) {
|
||||
if (index == -1 || cpu == -1) {
|
||||
if (!ctx.all_cpus) {
|
||||
if (ctx.idx == -1 || ctx.cpu == -1) {
|
||||
p_err("cpu and index must be specified together");
|
||||
goto err_close_map;
|
||||
}
|
||||
|
||||
nfds = 1;
|
||||
} else {
|
||||
nfds = min(get_possible_cpus(), map_info.max_entries);
|
||||
cpu = 0;
|
||||
index = 0;
|
||||
ctx.cpu = 0;
|
||||
ctx.idx = 0;
|
||||
}
|
||||
|
||||
rings = calloc(nfds, sizeof(rings[0]));
|
||||
if (!rings)
|
||||
opts.attr = &perf_attr;
|
||||
opts.event_cb = print_bpf_output;
|
||||
opts.ctx = &ctx;
|
||||
opts.cpu_cnt = ctx.all_cpus ? 0 : 1;
|
||||
opts.cpus = &ctx.cpu;
|
||||
opts.map_keys = &ctx.idx;
|
||||
|
||||
pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts);
|
||||
err = libbpf_get_error(pb);
|
||||
if (err) {
|
||||
p_err("failed to create perf buffer: %s (%d)",
|
||||
strerror(err), err);
|
||||
goto err_close_map;
|
||||
|
||||
pfds = calloc(nfds, sizeof(pfds[0]));
|
||||
if (!pfds)
|
||||
goto err_free_rings;
|
||||
|
||||
for (i = 0; i < nfds; i++) {
|
||||
rings[i].cpu = cpu + i;
|
||||
rings[i].key = index + i;
|
||||
|
||||
rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
|
||||
rings[i].cpu);
|
||||
if (rings[i].fd < 0)
|
||||
goto err_close_fds_prev;
|
||||
|
||||
rings[i].mem = perf_event_mmap(rings[i].fd);
|
||||
if (!rings[i].mem)
|
||||
goto err_close_fds_current;
|
||||
|
||||
pfds[i].fd = rings[i].fd;
|
||||
pfds[i].events = POLLIN;
|
||||
}
|
||||
|
||||
signal(SIGINT, int_exit);
|
||||
@ -277,34 +214,24 @@ int do_event_pipe(int argc, char **argv)
|
||||
jsonw_start_array(json_wtr);
|
||||
|
||||
while (!stop) {
|
||||
poll(pfds, nfds, 200);
|
||||
for (i = 0; i < nfds; i++)
|
||||
perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
|
||||
err = perf_buffer__poll(pb, 200);
|
||||
if (err < 0 && err != -EINTR) {
|
||||
p_err("perf buffer polling failed: %s (%d)",
|
||||
strerror(err), err);
|
||||
goto err_close_pb;
|
||||
}
|
||||
}
|
||||
free(tmp_buf);
|
||||
|
||||
if (json_output)
|
||||
jsonw_end_array(json_wtr);
|
||||
|
||||
for (i = 0; i < nfds; i++) {
|
||||
perf_event_unmap(rings[i].mem);
|
||||
close(rings[i].fd);
|
||||
}
|
||||
free(pfds);
|
||||
free(rings);
|
||||
perf_buffer__free(pb);
|
||||
close(map_fd);
|
||||
|
||||
return 0;
|
||||
|
||||
err_close_fds_prev:
|
||||
while (i--) {
|
||||
perf_event_unmap(rings[i].mem);
|
||||
err_close_fds_current:
|
||||
close(rings[i].fd);
|
||||
}
|
||||
free(pfds);
|
||||
err_free_rings:
|
||||
free(rings);
|
||||
err_close_pb:
|
||||
perf_buffer__free(pb);
|
||||
err_close_map:
|
||||
close(map_fd);
|
||||
return -1;
|
||||
|
@ -9,7 +9,8 @@ described here. It's recommended to follow these conventions whenever a
|
||||
new function or type is added to keep libbpf API clean and consistent.
|
||||
|
||||
All types and functions provided by libbpf API should have one of the
|
||||
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``.
|
||||
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
|
||||
``perf_buffer_``.
|
||||
|
||||
System call wrappers
|
||||
--------------------
|
||||
|
@ -32,7 +32,9 @@
|
||||
#include <linux/limits.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/vfs.h>
|
||||
@ -2114,6 +2116,7 @@ static int
|
||||
bpf_object__create_maps(struct bpf_object *obj)
|
||||
{
|
||||
struct bpf_create_map_attr create_attr = {};
|
||||
int nr_cpus = 0;
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
@ -2136,7 +2139,22 @@ bpf_object__create_maps(struct bpf_object *obj)
|
||||
create_attr.map_flags = def->map_flags;
|
||||
create_attr.key_size = def->key_size;
|
||||
create_attr.value_size = def->value_size;
|
||||
create_attr.max_entries = def->max_entries;
|
||||
if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
|
||||
!def->max_entries) {
|
||||
if (!nr_cpus)
|
||||
nr_cpus = libbpf_num_possible_cpus();
|
||||
if (nr_cpus < 0) {
|
||||
pr_warning("failed to determine number of system CPUs: %d\n",
|
||||
nr_cpus);
|
||||
err = nr_cpus;
|
||||
goto err_out;
|
||||
}
|
||||
pr_debug("map '%s': setting size to %d\n",
|
||||
map->name, nr_cpus);
|
||||
create_attr.max_entries = nr_cpus;
|
||||
} else {
|
||||
create_attr.max_entries = def->max_entries;
|
||||
}
|
||||
create_attr.btf_fd = 0;
|
||||
create_attr.btf_key_type_id = 0;
|
||||
create_attr.btf_value_type_id = 0;
|
||||
@ -2153,9 +2171,10 @@ bpf_object__create_maps(struct bpf_object *obj)
|
||||
*pfd = bpf_create_map_xattr(&create_attr);
|
||||
if (*pfd < 0 && (create_attr.btf_key_type_id ||
|
||||
create_attr.btf_value_type_id)) {
|
||||
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
|
||||
err = -errno;
|
||||
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
|
||||
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
|
||||
map->name, cp, errno);
|
||||
map->name, cp, err);
|
||||
create_attr.btf_fd = 0;
|
||||
create_attr.btf_key_type_id = 0;
|
||||
create_attr.btf_value_type_id = 0;
|
||||
@ -2167,11 +2186,11 @@ bpf_object__create_maps(struct bpf_object *obj)
|
||||
if (*pfd < 0) {
|
||||
size_t j;
|
||||
|
||||
err = *pfd;
|
||||
err = -errno;
|
||||
err_out:
|
||||
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
|
||||
pr_warning("failed to create map (name: '%s'): %s\n",
|
||||
map->name, cp);
|
||||
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
|
||||
pr_warning("failed to create map (name: '%s'): %s(%d)\n",
|
||||
map->name, cp, err);
|
||||
for (j = 0; j < i; j++)
|
||||
zclose(obj->maps[j].fd);
|
||||
return err;
|
||||
@ -4354,6 +4373,370 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct perf_buffer;
|
||||
|
||||
struct perf_buffer_params {
|
||||
struct perf_event_attr *attr;
|
||||
/* if event_cb is specified, it takes precendence */
|
||||
perf_buffer_event_fn event_cb;
|
||||
/* sample_cb and lost_cb are higher-level common-case callbacks */
|
||||
perf_buffer_sample_fn sample_cb;
|
||||
perf_buffer_lost_fn lost_cb;
|
||||
void *ctx;
|
||||
int cpu_cnt;
|
||||
int *cpus;
|
||||
int *map_keys;
|
||||
};
|
||||
|
||||
struct perf_cpu_buf {
|
||||
struct perf_buffer *pb;
|
||||
void *base; /* mmap()'ed memory */
|
||||
void *buf; /* for reconstructing segmented data */
|
||||
size_t buf_size;
|
||||
int fd;
|
||||
int cpu;
|
||||
int map_key;
|
||||
};
|
||||
|
||||
struct perf_buffer {
|
||||
perf_buffer_event_fn event_cb;
|
||||
perf_buffer_sample_fn sample_cb;
|
||||
perf_buffer_lost_fn lost_cb;
|
||||
void *ctx; /* passed into callbacks */
|
||||
|
||||
size_t page_size;
|
||||
size_t mmap_size;
|
||||
struct perf_cpu_buf **cpu_bufs;
|
||||
struct epoll_event *events;
|
||||
int cpu_cnt;
|
||||
int epoll_fd; /* perf event FD */
|
||||
int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
|
||||
};
|
||||
|
||||
static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
|
||||
struct perf_cpu_buf *cpu_buf)
|
||||
{
|
||||
if (!cpu_buf)
|
||||
return;
|
||||
if (cpu_buf->base &&
|
||||
munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
|
||||
pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
|
||||
if (cpu_buf->fd >= 0) {
|
||||
ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
close(cpu_buf->fd);
|
||||
}
|
||||
free(cpu_buf->buf);
|
||||
free(cpu_buf);
|
||||
}
|
||||
|
||||
void perf_buffer__free(struct perf_buffer *pb)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!pb)
|
||||
return;
|
||||
if (pb->cpu_bufs) {
|
||||
for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
|
||||
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
|
||||
|
||||
bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
|
||||
perf_buffer__free_cpu_buf(pb, cpu_buf);
|
||||
}
|
||||
free(pb->cpu_bufs);
|
||||
}
|
||||
if (pb->epoll_fd >= 0)
|
||||
close(pb->epoll_fd);
|
||||
free(pb->events);
|
||||
free(pb);
|
||||
}
|
||||
|
||||
static struct perf_cpu_buf *
|
||||
perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
|
||||
int cpu, int map_key)
|
||||
{
|
||||
struct perf_cpu_buf *cpu_buf;
|
||||
char msg[STRERR_BUFSIZE];
|
||||
int err;
|
||||
|
||||
cpu_buf = calloc(1, sizeof(*cpu_buf));
|
||||
if (!cpu_buf)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
cpu_buf->pb = pb;
|
||||
cpu_buf->cpu = cpu;
|
||||
cpu_buf->map_key = map_key;
|
||||
|
||||
cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
|
||||
-1, PERF_FLAG_FD_CLOEXEC);
|
||||
if (cpu_buf->fd < 0) {
|
||||
err = -errno;
|
||||
pr_warning("failed to open perf buffer event on cpu #%d: %s\n",
|
||||
cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
goto error;
|
||||
}
|
||||
|
||||
cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
cpu_buf->fd, 0);
|
||||
if (cpu_buf->base == MAP_FAILED) {
|
||||
cpu_buf->base = NULL;
|
||||
err = -errno;
|
||||
pr_warning("failed to mmap perf buffer on cpu #%d: %s\n",
|
||||
cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
|
||||
err = -errno;
|
||||
pr_warning("failed to enable perf buffer event on cpu #%d: %s\n",
|
||||
cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
goto error;
|
||||
}
|
||||
|
||||
return cpu_buf;
|
||||
|
||||
error:
|
||||
perf_buffer__free_cpu_buf(pb, cpu_buf);
|
||||
return (struct perf_cpu_buf *)ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
|
||||
struct perf_buffer_params *p);
|
||||
|
||||
struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
|
||||
const struct perf_buffer_opts *opts)
|
||||
{
|
||||
struct perf_buffer_params p = {};
|
||||
struct perf_event_attr attr = {
|
||||
.config = PERF_COUNT_SW_BPF_OUTPUT,
|
||||
.type = PERF_TYPE_SOFTWARE,
|
||||
.sample_type = PERF_SAMPLE_RAW,
|
||||
.sample_period = 1,
|
||||
.wakeup_events = 1,
|
||||
};
|
||||
|
||||
p.attr = &attr;
|
||||
p.sample_cb = opts ? opts->sample_cb : NULL;
|
||||
p.lost_cb = opts ? opts->lost_cb : NULL;
|
||||
p.ctx = opts ? opts->ctx : NULL;
|
||||
|
||||
return __perf_buffer__new(map_fd, page_cnt, &p);
|
||||
}
|
||||
|
||||
struct perf_buffer *
|
||||
perf_buffer__new_raw(int map_fd, size_t page_cnt,
|
||||
const struct perf_buffer_raw_opts *opts)
|
||||
{
|
||||
struct perf_buffer_params p = {};
|
||||
|
||||
p.attr = opts->attr;
|
||||
p.event_cb = opts->event_cb;
|
||||
p.ctx = opts->ctx;
|
||||
p.cpu_cnt = opts->cpu_cnt;
|
||||
p.cpus = opts->cpus;
|
||||
p.map_keys = opts->map_keys;
|
||||
|
||||
return __perf_buffer__new(map_fd, page_cnt, &p);
|
||||
}
|
||||
|
||||
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
|
||||
struct perf_buffer_params *p)
|
||||
{
|
||||
struct bpf_map_info map = {};
|
||||
char msg[STRERR_BUFSIZE];
|
||||
struct perf_buffer *pb;
|
||||
__u32 map_info_len;
|
||||
int err, i;
|
||||
|
||||
if (page_cnt & (page_cnt - 1)) {
|
||||
pr_warning("page count should be power of two, but is %zu\n",
|
||||
page_cnt);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
map_info_len = sizeof(map);
|
||||
err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
pr_warning("failed to get map info for map FD %d: %s\n",
|
||||
map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
|
||||
pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
|
||||
map.name);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
pb = calloc(1, sizeof(*pb));
|
||||
if (!pb)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
pb->event_cb = p->event_cb;
|
||||
pb->sample_cb = p->sample_cb;
|
||||
pb->lost_cb = p->lost_cb;
|
||||
pb->ctx = p->ctx;
|
||||
|
||||
pb->page_size = getpagesize();
|
||||
pb->mmap_size = pb->page_size * page_cnt;
|
||||
pb->map_fd = map_fd;
|
||||
|
||||
pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
|
||||
if (pb->epoll_fd < 0) {
|
||||
err = -errno;
|
||||
pr_warning("failed to create epoll instance: %s\n",
|
||||
libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (p->cpu_cnt > 0) {
|
||||
pb->cpu_cnt = p->cpu_cnt;
|
||||
} else {
|
||||
pb->cpu_cnt = libbpf_num_possible_cpus();
|
||||
if (pb->cpu_cnt < 0) {
|
||||
err = pb->cpu_cnt;
|
||||
goto error;
|
||||
}
|
||||
if (map.max_entries < pb->cpu_cnt)
|
||||
pb->cpu_cnt = map.max_entries;
|
||||
}
|
||||
|
||||
pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
|
||||
if (!pb->events) {
|
||||
err = -ENOMEM;
|
||||
pr_warning("failed to allocate events: out of memory\n");
|
||||
goto error;
|
||||
}
|
||||
pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
|
||||
if (!pb->cpu_bufs) {
|
||||
err = -ENOMEM;
|
||||
pr_warning("failed to allocate buffers: out of memory\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (i = 0; i < pb->cpu_cnt; i++) {
|
||||
struct perf_cpu_buf *cpu_buf;
|
||||
int cpu, map_key;
|
||||
|
||||
cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
|
||||
map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
|
||||
|
||||
cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
|
||||
if (IS_ERR(cpu_buf)) {
|
||||
err = PTR_ERR(cpu_buf);
|
||||
goto error;
|
||||
}
|
||||
|
||||
pb->cpu_bufs[i] = cpu_buf;
|
||||
|
||||
err = bpf_map_update_elem(pb->map_fd, &map_key,
|
||||
&cpu_buf->fd, 0);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
|
||||
cpu, map_key, cpu_buf->fd,
|
||||
libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
goto error;
|
||||
}
|
||||
|
||||
pb->events[i].events = EPOLLIN;
|
||||
pb->events[i].data.ptr = cpu_buf;
|
||||
if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
|
||||
&pb->events[i]) < 0) {
|
||||
err = -errno;
|
||||
pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
|
||||
cpu, cpu_buf->fd,
|
||||
libbpf_strerror_r(err, msg, sizeof(msg)));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return pb;
|
||||
|
||||
error:
|
||||
if (pb)
|
||||
perf_buffer__free(pb);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct perf_sample_raw {
|
||||
struct perf_event_header header;
|
||||
uint32_t size;
|
||||
char data[0];
|
||||
};
|
||||
|
||||
struct perf_sample_lost {
|
||||
struct perf_event_header header;
|
||||
uint64_t id;
|
||||
uint64_t lost;
|
||||
uint64_t sample_id;
|
||||
};
|
||||
|
||||
static enum bpf_perf_event_ret
|
||||
perf_buffer__process_record(struct perf_event_header *e, void *ctx)
|
||||
{
|
||||
struct perf_cpu_buf *cpu_buf = ctx;
|
||||
struct perf_buffer *pb = cpu_buf->pb;
|
||||
void *data = e;
|
||||
|
||||
/* user wants full control over parsing perf event */
|
||||
if (pb->event_cb)
|
||||
return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
|
||||
|
||||
switch (e->type) {
|
||||
case PERF_RECORD_SAMPLE: {
|
||||
struct perf_sample_raw *s = data;
|
||||
|
||||
if (pb->sample_cb)
|
||||
pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
|
||||
break;
|
||||
}
|
||||
case PERF_RECORD_LOST: {
|
||||
struct perf_sample_lost *s = data;
|
||||
|
||||
if (pb->lost_cb)
|
||||
pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
pr_warning("unknown perf sample type %d\n", e->type);
|
||||
return LIBBPF_PERF_EVENT_ERROR;
|
||||
}
|
||||
return LIBBPF_PERF_EVENT_CONT;
|
||||
}
|
||||
|
||||
static int perf_buffer__process_records(struct perf_buffer *pb,
|
||||
struct perf_cpu_buf *cpu_buf)
|
||||
{
|
||||
enum bpf_perf_event_ret ret;
|
||||
|
||||
ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
|
||||
pb->page_size, &cpu_buf->buf,
|
||||
&cpu_buf->buf_size,
|
||||
perf_buffer__process_record, cpu_buf);
|
||||
if (ret != LIBBPF_PERF_EVENT_CONT)
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
|
||||
{
|
||||
int i, cnt, err;
|
||||
|
||||
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
|
||||
for (i = 0; i < cnt; i++) {
|
||||
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
|
||||
|
||||
err = perf_buffer__process_records(pb, cpu_buf);
|
||||
if (err) {
|
||||
pr_warning("error while processing records: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
return cnt < 0 ? -errno : cnt;
|
||||
}
|
||||
|
||||
struct bpf_prog_info_array_desc {
|
||||
int array_offset; /* e.g. offset of jited_prog_insns */
|
||||
int count_offset; /* e.g. offset of jited_prog_len */
|
||||
|
@ -358,6 +358,26 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
|
||||
LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
|
||||
LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
|
||||
|
||||
struct perf_buffer;
|
||||
|
||||
typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
|
||||
void *data, __u32 size);
|
||||
typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
|
||||
|
||||
/* common use perf buffer options */
|
||||
struct perf_buffer_opts {
|
||||
/* if specified, sample_cb is called for each sample */
|
||||
perf_buffer_sample_fn sample_cb;
|
||||
/* if specified, lost_cb is called for each batch of lost samples */
|
||||
perf_buffer_lost_fn lost_cb;
|
||||
/* ctx is provided to sample_cb and lost_cb */
|
||||
void *ctx;
|
||||
};
|
||||
|
||||
LIBBPF_API struct perf_buffer *
|
||||
perf_buffer__new(int map_fd, size_t page_cnt,
|
||||
const struct perf_buffer_opts *opts);
|
||||
|
||||
enum bpf_perf_event_ret {
|
||||
LIBBPF_PERF_EVENT_DONE = 0,
|
||||
LIBBPF_PERF_EVENT_ERROR = -1,
|
||||
@ -365,6 +385,35 @@ enum bpf_perf_event_ret {
|
||||
};
|
||||
|
||||
struct perf_event_header;
|
||||
|
||||
typedef enum bpf_perf_event_ret
|
||||
(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
|
||||
|
||||
/* raw perf buffer options, giving most power and control */
|
||||
struct perf_buffer_raw_opts {
|
||||
/* perf event attrs passed directly into perf_event_open() */
|
||||
struct perf_event_attr *attr;
|
||||
/* raw event callback */
|
||||
perf_buffer_event_fn event_cb;
|
||||
/* ctx is provided to event_cb */
|
||||
void *ctx;
|
||||
/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
|
||||
* max_entries of given PERF_EVENT_ARRAY map)
|
||||
*/
|
||||
int cpu_cnt;
|
||||
/* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
|
||||
int *cpus;
|
||||
/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
|
||||
int *map_keys;
|
||||
};
|
||||
|
||||
LIBBPF_API struct perf_buffer *
|
||||
perf_buffer__new_raw(int map_fd, size_t page_cnt,
|
||||
const struct perf_buffer_raw_opts *opts);
|
||||
|
||||
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
|
||||
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
|
||||
|
||||
typedef enum bpf_perf_event_ret
|
||||
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
|
||||
void *private_data);
|
||||
|
@ -179,4 +179,8 @@ LIBBPF_0.0.4 {
|
||||
btf_dump__new;
|
||||
btf__parse_elf;
|
||||
libbpf_num_possible_cpus;
|
||||
perf_buffer__free;
|
||||
perf_buffer__new;
|
||||
perf_buffer__new_raw;
|
||||
perf_buffer__poll;
|
||||
} LIBBPF_0.0.3;
|
||||
|
100
tools/testing/selftests/bpf/prog_tests/perf_buffer.c
Normal file
100
tools/testing/selftests/bpf/prog_tests/perf_buffer.c
Normal file
@ -0,0 +1,100 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define _GNU_SOURCE
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <sys/socket.h>
|
||||
#include <test_progs.h>
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
|
||||
#else
|
||||
#define SYS_KPROBE_NAME "sys_nanosleep"
|
||||
#endif
|
||||
|
||||
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
|
||||
{
|
||||
int cpu_data = *(int *)data, duration = 0;
|
||||
cpu_set_t *cpu_seen = ctx;
|
||||
|
||||
if (cpu_data != cpu)
|
||||
CHECK(cpu_data != cpu, "check_cpu_data",
|
||||
"cpu_data %d != cpu %d\n", cpu_data, cpu);
|
||||
|
||||
CPU_SET(cpu, cpu_seen);
|
||||
}
|
||||
|
||||
void test_perf_buffer(void)
|
||||
{
|
||||
int err, prog_fd, nr_cpus, i, duration = 0;
|
||||
const char *prog_name = "kprobe/sys_nanosleep";
|
||||
const char *file = "./test_perf_buffer.o";
|
||||
struct perf_buffer_opts pb_opts = {};
|
||||
struct bpf_map *perf_buf_map;
|
||||
cpu_set_t cpu_set, cpu_seen;
|
||||
struct bpf_program *prog;
|
||||
struct bpf_object *obj;
|
||||
struct perf_buffer *pb;
|
||||
struct bpf_link *link;
|
||||
|
||||
nr_cpus = libbpf_num_possible_cpus();
|
||||
if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
|
||||
return;
|
||||
|
||||
/* load program */
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
|
||||
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
|
||||
return;
|
||||
|
||||
prog = bpf_object__find_program_by_title(obj, prog_name);
|
||||
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
|
||||
goto out_close;
|
||||
|
||||
/* load map */
|
||||
perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
|
||||
if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
|
||||
goto out_close;
|
||||
|
||||
/* attach kprobe */
|
||||
link = bpf_program__attach_kprobe(prog, false /* retprobe */,
|
||||
SYS_KPROBE_NAME);
|
||||
if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
|
||||
goto out_close;
|
||||
|
||||
/* set up perf buffer */
|
||||
pb_opts.sample_cb = on_sample;
|
||||
pb_opts.ctx = &cpu_seen;
|
||||
pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
|
||||
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
|
||||
goto out_detach;
|
||||
|
||||
/* trigger kprobe on every CPU */
|
||||
CPU_ZERO(&cpu_seen);
|
||||
for (i = 0; i < nr_cpus; i++) {
|
||||
CPU_ZERO(&cpu_set);
|
||||
CPU_SET(i, &cpu_set);
|
||||
|
||||
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
|
||||
&cpu_set);
|
||||
if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
|
||||
i, err))
|
||||
goto out_detach;
|
||||
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
/* read perf buffer */
|
||||
err = perf_buffer__poll(pb, 100);
|
||||
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
|
||||
goto out_free_pb;
|
||||
|
||||
if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
|
||||
"expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
|
||||
goto out_free_pb;
|
||||
|
||||
out_free_pb:
|
||||
perf_buffer__free(pb);
|
||||
out_detach:
|
||||
bpf_link__destroy(link);
|
||||
out_close:
|
||||
bpf_object__close(obj);
|
||||
}
|
25
tools/testing/selftests/bpf/progs/test_perf_buffer.c
Normal file
25
tools/testing/selftests/bpf/progs/test_perf_buffer.c
Normal file
@ -0,0 +1,25 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2019 Facebook
|
||||
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(int));
|
||||
__uint(value_size, sizeof(int));
|
||||
} perf_buf_map SEC(".maps");
|
||||
|
||||
SEC("kprobe/sys_nanosleep")
|
||||
int handle_sys_nanosleep_entry(struct pt_regs *ctx)
|
||||
{
|
||||
int cpu = bpf_get_smp_processor_id();
|
||||
|
||||
bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
|
||||
&cpu, sizeof(cpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
__u32 _version SEC("version") = 1;
|
Loading…
Reference in New Issue
Block a user