7630b3e28d
A new syntax is added to the parser so that the user can access
predefined perf events in BPF objects.
After this patch, BPF programs for perf are finally able to utilize
bpf_perf_event_read() introduced in commit 35578d7984
("bpf: Implement
function bpf_perf_event_read() that get the selected hardware PMU
counter").
Test result:
# cat test_bpf_map_2.c
/************************ BEGIN **************************/
#include <uapi/linux/bpf.h>
#define SEC(NAME) __attribute__((section(NAME), used))
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static int (*trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)BPF_FUNC_trace_printk;
static int (*get_smp_processor_id)(void) =
(void *)BPF_FUNC_get_smp_processor_id;
static int (*perf_event_read)(struct bpf_map_def *, int) =
(void *)BPF_FUNC_perf_event_read;
struct bpf_map_def SEC("maps") pmu_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = __NR_CPUS__,
};
SEC("func_write=sys_write")
int func_write(void *ctx)
{
unsigned long long val;
char fmt[] = "sys_write: pmu=%llu\n";
val = perf_event_read(&pmu_map, get_smp_processor_id());
trace_printk(fmt, sizeof(fmt), val);
return 0;
}
SEC("func_write_return=sys_write%return")
int func_write_return(void *ctx)
{
unsigned long long val = 0;
char fmt[] = "sys_write_return: pmu=%llu\n";
val = perf_event_read(&pmu_map, get_smp_processor_id());
trace_printk(fmt, sizeof(fmt), val);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/
Normal case:
# echo "" > /sys/kernel/debug/tracing/trace
# perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327
ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218
ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922
ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445
Normal case (system wide):
# echo "" > /sys/kernel/debug/tracing/trace
# perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ]
# cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20
[SNIP]
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373
gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696
gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658
gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572
Error case 1:
# perf record -e './test_bpf_map_2.c' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.014 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614
ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614
ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614
ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614
(18446744073709551614 is 0xfffffffffffffffe (-2))
Error case 2:
# perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a
event syntax error: '..ps:pmu_map.event=evt/'
\___ Event not found for map setting
Hint: Valid config terms:
map:[<arraymap>].value=[value]
map:[<eventmap>].event=[event]
[SNIP]
Error case 3:
# ls /proc/2348/task/
2348 2505 2506 2507 2508
# perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348
ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing
Error case 4:
# perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls /
ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit)
Error case 5:
# perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls
ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map
Error case 6:
# perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls /
event syntax error: '.._map.event=123/'
\___ Incorrect value type for map
[SNIP]
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Cody P Schafer <dev@codyps.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
Cc: Kirill Smelkov <kirr@nexedi.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com
Signed-off-by: He Kuang <hekuang@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
301 lines
9.8 KiB
C
301 lines
9.8 KiB
C
#ifndef __PERF_EVLIST_H
|
|
#define __PERF_EVLIST_H 1
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/list.h>
|
|
#include <api/fd/array.h>
|
|
#include <stdio.h>
|
|
#include "../perf.h"
|
|
#include "event.h"
|
|
#include "evsel.h"
|
|
#include "util.h"
|
|
#include "auxtrace.h"
|
|
#include <unistd.h>
|
|
|
|
struct pollfd;
|
|
struct thread_map;
|
|
struct cpu_map;
|
|
struct record_opts;
|
|
|
|
#define PERF_EVLIST__HLIST_BITS 8
|
|
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
|
|
|
|
/**
|
|
* struct perf_mmap - perf's ring buffer mmap details
|
|
*
|
|
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
|
|
*/
|
|
struct perf_mmap {
|
|
void *base;
|
|
int mask;
|
|
atomic_t refcnt;
|
|
u64 prev;
|
|
struct auxtrace_mmap auxtrace_mmap;
|
|
char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
|
|
};
|
|
|
|
struct perf_evlist {
|
|
struct list_head entries;
|
|
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
|
|
int nr_entries;
|
|
int nr_groups;
|
|
int nr_mmaps;
|
|
bool overwrite;
|
|
bool enabled;
|
|
bool has_user_cpus;
|
|
size_t mmap_len;
|
|
int id_pos;
|
|
int is_pos;
|
|
u64 combined_sample_type;
|
|
struct {
|
|
int cork_fd;
|
|
pid_t pid;
|
|
} workload;
|
|
struct fdarray pollfd;
|
|
struct perf_mmap *mmap;
|
|
struct thread_map *threads;
|
|
struct cpu_map *cpus;
|
|
struct perf_evsel *selected;
|
|
struct events_stats stats;
|
|
struct perf_env *env;
|
|
};
|
|
|
|
struct perf_evsel_str_handler {
|
|
const char *name;
|
|
void *handler;
|
|
};
|
|
|
|
struct perf_evlist *perf_evlist__new(void);
|
|
struct perf_evlist *perf_evlist__new_default(void);
|
|
struct perf_evlist *perf_evlist__new_dummy(void);
|
|
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
|
|
struct thread_map *threads);
|
|
void perf_evlist__exit(struct perf_evlist *evlist);
|
|
void perf_evlist__delete(struct perf_evlist *evlist);
|
|
|
|
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
|
|
void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel);
|
|
int perf_evlist__add_default(struct perf_evlist *evlist);
|
|
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
|
|
struct perf_event_attr *attrs, size_t nr_attrs);
|
|
|
|
#define perf_evlist__add_default_attrs(evlist, array) \
|
|
__perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
|
|
|
|
int perf_evlist__add_dummy(struct perf_evlist *evlist);
|
|
|
|
int perf_evlist__add_newtp(struct perf_evlist *evlist,
|
|
const char *sys, const char *name, void *handler);
|
|
|
|
int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
|
|
int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
|
|
int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
|
|
|
|
struct perf_evsel *
|
|
perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
|
|
|
|
struct perf_evsel *
|
|
perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
|
|
const char *name);
|
|
|
|
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
|
|
int cpu, int thread, u64 id);
|
|
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
|
|
struct perf_evsel *evsel,
|
|
int cpu, int thread, int fd);
|
|
|
|
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
|
|
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
|
|
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask);
|
|
|
|
int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
|
|
|
|
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
|
|
struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
|
|
u64 id);
|
|
|
|
struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
|
|
|
|
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
|
|
|
|
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
|
|
|
|
int perf_evlist__open(struct perf_evlist *evlist);
|
|
void perf_evlist__close(struct perf_evlist *evlist);
|
|
|
|
void perf_evlist__set_id_pos(struct perf_evlist *evlist);
|
|
bool perf_can_sample_identifier(void);
|
|
bool perf_can_record_switch_events(void);
|
|
bool perf_can_record_cpu_wide(void);
|
|
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
|
|
int record_opts__config(struct record_opts *opts);
|
|
|
|
int perf_evlist__prepare_workload(struct perf_evlist *evlist,
|
|
struct target *target,
|
|
const char *argv[], bool pipe_output,
|
|
void (*exec_error)(int signo, siginfo_t *info,
|
|
void *ucontext));
|
|
int perf_evlist__start_workload(struct perf_evlist *evlist);
|
|
|
|
struct option;
|
|
|
|
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
|
|
int perf_evlist__parse_mmap_pages(const struct option *opt,
|
|
const char *str,
|
|
int unset);
|
|
|
|
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
|
|
bool overwrite, unsigned int auxtrace_pages,
|
|
bool auxtrace_overwrite);
|
|
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
|
|
bool overwrite);
|
|
void perf_evlist__munmap(struct perf_evlist *evlist);
|
|
|
|
void perf_evlist__disable(struct perf_evlist *evlist);
|
|
void perf_evlist__enable(struct perf_evlist *evlist);
|
|
void perf_evlist__toggle_enable(struct perf_evlist *evlist);
|
|
|
|
int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
|
|
struct perf_evsel *evsel, int idx);
|
|
|
|
void perf_evlist__set_selected(struct perf_evlist *evlist,
|
|
struct perf_evsel *evsel);
|
|
|
|
void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
|
|
struct thread_map *threads);
|
|
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
|
|
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
|
|
|
|
void __perf_evlist__set_leader(struct list_head *list);
|
|
void perf_evlist__set_leader(struct perf_evlist *evlist);
|
|
|
|
u64 perf_evlist__read_format(struct perf_evlist *evlist);
|
|
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
|
|
u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
|
|
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
|
|
bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
|
|
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
|
|
|
|
int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
|
|
struct perf_sample *sample);
|
|
|
|
bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);
|
|
bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);
|
|
bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
|
|
|
|
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
|
|
struct list_head *list);
|
|
|
|
static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
|
|
{
|
|
return list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
}
|
|
|
|
static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
|
|
{
|
|
return list_entry(evlist->entries.prev, struct perf_evsel, node);
|
|
}
|
|
|
|
size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
|
|
|
|
int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
|
|
int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
|
|
|
|
static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
|
|
{
|
|
struct perf_event_mmap_page *pc = mm->base;
|
|
u64 head = ACCESS_ONCE(pc->data_head);
|
|
rmb();
|
|
return head;
|
|
}
|
|
|
|
static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
|
|
{
|
|
struct perf_event_mmap_page *pc = md->base;
|
|
|
|
/*
|
|
* ensure all reads are done before we write the tail out.
|
|
*/
|
|
mb();
|
|
pc->data_tail = tail;
|
|
}
|
|
|
|
bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str);
|
|
void perf_evlist__to_front(struct perf_evlist *evlist,
|
|
struct perf_evsel *move_evsel);
|
|
|
|
/**
|
|
* __evlist__for_each - iterate thru all the evsels
|
|
* @list: list_head instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define __evlist__for_each(list, evsel) \
|
|
list_for_each_entry(evsel, list, node)
|
|
|
|
/**
|
|
* evlist__for_each - iterate thru all the evsels
|
|
* @evlist: evlist instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define evlist__for_each(evlist, evsel) \
|
|
__evlist__for_each(&(evlist)->entries, evsel)
|
|
|
|
/**
|
|
* __evlist__for_each_continue - continue iteration thru all the evsels
|
|
* @list: list_head instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define __evlist__for_each_continue(list, evsel) \
|
|
list_for_each_entry_continue(evsel, list, node)
|
|
|
|
/**
|
|
* evlist__for_each_continue - continue iteration thru all the evsels
|
|
* @evlist: evlist instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define evlist__for_each_continue(evlist, evsel) \
|
|
__evlist__for_each_continue(&(evlist)->entries, evsel)
|
|
|
|
/**
|
|
* __evlist__for_each_reverse - iterate thru all the evsels in reverse order
|
|
* @list: list_head instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define __evlist__for_each_reverse(list, evsel) \
|
|
list_for_each_entry_reverse(evsel, list, node)
|
|
|
|
/**
|
|
* evlist__for_each_reverse - iterate thru all the evsels in reverse order
|
|
* @evlist: evlist instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define evlist__for_each_reverse(evlist, evsel) \
|
|
__evlist__for_each_reverse(&(evlist)->entries, evsel)
|
|
|
|
/**
|
|
* __evlist__for_each_safe - safely iterate thru all the evsels
|
|
* @list: list_head instance to iterate
|
|
* @tmp: struct evsel temp iterator
|
|
* @evsel: struct evsel iterator
|
|
*/
|
|
#define __evlist__for_each_safe(list, tmp, evsel) \
|
|
list_for_each_entry_safe(evsel, tmp, list, node)
|
|
|
|
/**
|
|
* evlist__for_each_safe - safely iterate thru all the evsels
|
|
* @evlist: evlist instance to iterate
|
|
* @evsel: struct evsel iterator
|
|
* @tmp: struct evsel temp iterator
|
|
*/
|
|
#define evlist__for_each_safe(evlist, tmp, evsel) \
|
|
__evlist__for_each_safe(&(evlist)->entries, tmp, evsel)
|
|
|
|
void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
|
|
struct perf_evsel *tracking_evsel);
|
|
|
|
void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
|
|
|
|
struct perf_evsel *
|
|
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
|
|
#endif /* __PERF_EVLIST_H */
|