mirror of
https://github.com/torvalds/linux.git
synced 2024-12-01 00:21:32 +00:00
perf/core: Implement the 'perf_kprobe' PMU
A new PMU type, perf_kprobe is added. Based on attr from perf_event_open(), perf_kprobe creates a kprobe (or kretprobe) for the perf_event. This kprobe is private to this perf_event, and thus not added to global lists, and not available in tracefs. Two functions, create_local_trace_kprobe() and destroy_local_trace_kprobe() are added to created and destroy these local trace_kprobe. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Yonghong Song <yhs@fb.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Cc: <daniel@iogearbox.net> Cc: <davem@davemloft.net> Cc: <kernel-team@fb.com> Cc: <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20171206224518.3598254-6-songliubraving@fb.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
0d8dd67be0
commit
e12f03d703
@ -533,6 +533,10 @@ extern int perf_trace_init(struct perf_event *event);
|
|||||||
extern void perf_trace_destroy(struct perf_event *event);
|
extern void perf_trace_destroy(struct perf_event *event);
|
||||||
extern int perf_trace_add(struct perf_event *event, int flags);
|
extern int perf_trace_add(struct perf_event *event, int flags);
|
||||||
extern void perf_trace_del(struct perf_event *event, int flags);
|
extern void perf_trace_del(struct perf_event *event, int flags);
|
||||||
|
#ifdef CONFIG_KPROBE_EVENTS
|
||||||
|
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
|
||||||
|
extern void perf_kprobe_destroy(struct perf_event *event);
|
||||||
|
#endif
|
||||||
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
|
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
|
||||||
char *filter_str);
|
char *filter_str);
|
||||||
extern void ftrace_profile_free_filter(struct perf_event *event);
|
extern void ftrace_profile_free_filter(struct perf_event *event);
|
||||||
|
@ -7992,9 +7992,77 @@ static struct pmu perf_tracepoint = {
|
|||||||
.read = perf_swevent_read,
|
.read = perf_swevent_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_KPROBE_EVENTS
|
||||||
|
/*
|
||||||
|
* Flags in config, used by dynamic PMU kprobe and uprobe
|
||||||
|
* The flags should match following PMU_FORMAT_ATTR().
|
||||||
|
*
|
||||||
|
* PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
|
||||||
|
* if not set, create kprobe/uprobe
|
||||||
|
*/
|
||||||
|
enum perf_probe_config {
|
||||||
|
PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
|
||||||
|
};
|
||||||
|
|
||||||
|
PMU_FORMAT_ATTR(retprobe, "config:0");
|
||||||
|
|
||||||
|
static struct attribute *probe_attrs[] = {
|
||||||
|
&format_attr_retprobe.attr,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct attribute_group probe_format_group = {
|
||||||
|
.name = "format",
|
||||||
|
.attrs = probe_attrs,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct attribute_group *probe_attr_groups[] = {
|
||||||
|
&probe_format_group,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int perf_kprobe_event_init(struct perf_event *event);
|
||||||
|
static struct pmu perf_kprobe = {
|
||||||
|
.task_ctx_nr = perf_sw_context,
|
||||||
|
.event_init = perf_kprobe_event_init,
|
||||||
|
.add = perf_trace_add,
|
||||||
|
.del = perf_trace_del,
|
||||||
|
.start = perf_swevent_start,
|
||||||
|
.stop = perf_swevent_stop,
|
||||||
|
.read = perf_swevent_read,
|
||||||
|
.attr_groups = probe_attr_groups,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int perf_kprobe_event_init(struct perf_event *event)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
bool is_retprobe;
|
||||||
|
|
||||||
|
if (event->attr.type != perf_kprobe.type)
|
||||||
|
return -ENOENT;
|
||||||
|
/*
|
||||||
|
* no branch sampling for probe events
|
||||||
|
*/
|
||||||
|
if (has_branch_stack(event))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
|
||||||
|
err = perf_kprobe_init(event, is_retprobe);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
event->destroy = perf_kprobe_destroy;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_KPROBE_EVENTS */
|
||||||
|
|
||||||
static inline void perf_tp_register(void)
|
static inline void perf_tp_register(void)
|
||||||
{
|
{
|
||||||
perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
|
perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
|
||||||
|
#ifdef CONFIG_KPROBE_EVENTS
|
||||||
|
perf_pmu_register(&perf_kprobe, "kprobe", -1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perf_event_free_filter(struct perf_event *event)
|
static void perf_event_free_filter(struct perf_event *event)
|
||||||
@ -8071,13 +8139,28 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* returns true if the event is a tracepoint, or a kprobe/upprobe created
|
||||||
|
* with perf_event_open()
|
||||||
|
*/
|
||||||
|
static inline bool perf_event_is_tracing(struct perf_event *event)
|
||||||
|
{
|
||||||
|
if (event->pmu == &perf_tracepoint)
|
||||||
|
return true;
|
||||||
|
#ifdef CONFIG_KPROBE_EVENTS
|
||||||
|
if (event->pmu == &perf_kprobe)
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
||||||
{
|
{
|
||||||
bool is_kprobe, is_tracepoint, is_syscall_tp;
|
bool is_kprobe, is_tracepoint, is_syscall_tp;
|
||||||
struct bpf_prog *prog;
|
struct bpf_prog *prog;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
if (!perf_event_is_tracing(event))
|
||||||
return perf_event_set_bpf_handler(event, prog_fd);
|
return perf_event_set_bpf_handler(event, prog_fd);
|
||||||
|
|
||||||
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
|
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
|
||||||
@ -8116,7 +8199,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
|||||||
|
|
||||||
static void perf_event_free_bpf_prog(struct perf_event *event)
|
static void perf_event_free_bpf_prog(struct perf_event *event)
|
||||||
{
|
{
|
||||||
if (event->attr.type != PERF_TYPE_TRACEPOINT) {
|
if (!perf_event_is_tracing(event)) {
|
||||||
perf_event_free_bpf_handler(event);
|
perf_event_free_bpf_handler(event);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -8535,47 +8618,36 @@ fail_clear_files:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
perf_tracepoint_set_filter(struct perf_event *event, char *filter_str)
|
|
||||||
{
|
|
||||||
struct perf_event_context *ctx = event->ctx;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Beware, here be dragons!!
|
|
||||||
*
|
|
||||||
* the tracepoint muck will deadlock against ctx->mutex, but the tracepoint
|
|
||||||
* stuff does not actually need it. So temporarily drop ctx->mutex. As per
|
|
||||||
* perf_event_ctx_lock() we already have a reference on ctx.
|
|
||||||
*
|
|
||||||
* This can result in event getting moved to a different ctx, but that
|
|
||||||
* does not affect the tracepoint state.
|
|
||||||
*/
|
|
||||||
mutex_unlock(&ctx->mutex);
|
|
||||||
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
|
|
||||||
mutex_lock(&ctx->mutex);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
|
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
|
||||||
{
|
{
|
||||||
char *filter_str;
|
|
||||||
int ret = -EINVAL;
|
int ret = -EINVAL;
|
||||||
|
char *filter_str;
|
||||||
if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
|
|
||||||
!IS_ENABLED(CONFIG_EVENT_TRACING)) &&
|
|
||||||
!has_addr_filter(event))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
filter_str = strndup_user(arg, PAGE_SIZE);
|
filter_str = strndup_user(arg, PAGE_SIZE);
|
||||||
if (IS_ERR(filter_str))
|
if (IS_ERR(filter_str))
|
||||||
return PTR_ERR(filter_str);
|
return PTR_ERR(filter_str);
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
|
#ifdef CONFIG_EVENT_TRACING
|
||||||
event->attr.type == PERF_TYPE_TRACEPOINT)
|
if (perf_event_is_tracing(event)) {
|
||||||
ret = perf_tracepoint_set_filter(event, filter_str);
|
struct perf_event_context *ctx = event->ctx;
|
||||||
else if (has_addr_filter(event))
|
|
||||||
|
/*
|
||||||
|
* Beware, here be dragons!!
|
||||||
|
*
|
||||||
|
* the tracepoint muck will deadlock against ctx->mutex, but
|
||||||
|
* the tracepoint stuff does not actually need it. So
|
||||||
|
* temporarily drop ctx->mutex. As per perf_event_ctx_lock() we
|
||||||
|
* already have a reference on ctx.
|
||||||
|
*
|
||||||
|
* This can result in event getting moved to a different ctx,
|
||||||
|
* but that does not affect the tracepoint state.
|
||||||
|
*/
|
||||||
|
mutex_unlock(&ctx->mutex);
|
||||||
|
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
|
||||||
|
mutex_lock(&ctx->mutex);
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
if (has_addr_filter(event))
|
||||||
ret = perf_event_set_addr_filter(event, filter_str);
|
ret = perf_event_set_addr_filter(event, filter_str);
|
||||||
|
|
||||||
kfree(filter_str);
|
kfree(filter_str);
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/kprobes.h>
|
#include <linux/kprobes.h>
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
#include "trace_probe.h"
|
||||||
|
|
||||||
static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
|
static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
|
||||||
|
|
||||||
@ -237,6 +238,54 @@ void perf_trace_destroy(struct perf_event *p_event)
|
|||||||
mutex_unlock(&event_mutex);
|
mutex_unlock(&event_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_KPROBE_EVENTS
|
||||||
|
int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
char *func = NULL;
|
||||||
|
struct trace_event_call *tp_event;
|
||||||
|
|
||||||
|
if (p_event->attr.kprobe_func) {
|
||||||
|
func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
|
||||||
|
if (!func)
|
||||||
|
return -ENOMEM;
|
||||||
|
ret = strncpy_from_user(
|
||||||
|
func, u64_to_user_ptr(p_event->attr.kprobe_func),
|
||||||
|
KSYM_NAME_LEN);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (func[0] == '\0') {
|
||||||
|
kfree(func);
|
||||||
|
func = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tp_event = create_local_trace_kprobe(
|
||||||
|
func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
|
||||||
|
p_event->attr.probe_offset, is_retprobe);
|
||||||
|
if (IS_ERR(tp_event)) {
|
||||||
|
ret = PTR_ERR(tp_event);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = perf_trace_event_init(tp_event, p_event);
|
||||||
|
if (ret)
|
||||||
|
destroy_local_trace_kprobe(tp_event);
|
||||||
|
out:
|
||||||
|
kfree(func);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_kprobe_destroy(struct perf_event *p_event)
|
||||||
|
{
|
||||||
|
perf_trace_event_close(p_event);
|
||||||
|
perf_trace_event_unreg(p_event);
|
||||||
|
|
||||||
|
destroy_local_trace_kprobe(p_event->tp_event);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_KPROBE_EVENTS */
|
||||||
|
|
||||||
int perf_trace_add(struct perf_event *p_event, int flags)
|
int perf_trace_add(struct perf_event *p_event, int flags)
|
||||||
{
|
{
|
||||||
struct trace_event_call *tp_event = p_event->tp_event;
|
struct trace_event_call *tp_event = p_event->tp_event;
|
||||||
|
@ -438,6 +438,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
|
|||||||
disable_kprobe(&tk->rp.kp);
|
disable_kprobe(&tk->rp.kp);
|
||||||
wait = 1;
|
wait = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if tk is not added to any list, it must be a local trace_kprobe
|
||||||
|
* created with perf_event_open. We don't need to wait for these
|
||||||
|
* trace_kprobes
|
||||||
|
*/
|
||||||
|
if (list_empty(&tk->list))
|
||||||
|
wait = 0;
|
||||||
out:
|
out:
|
||||||
if (wait) {
|
if (wait) {
|
||||||
/*
|
/*
|
||||||
@ -1313,12 +1321,9 @@ static struct trace_event_functions kprobe_funcs = {
|
|||||||
.trace = print_kprobe_event
|
.trace = print_kprobe_event
|
||||||
};
|
};
|
||||||
|
|
||||||
static int register_kprobe_event(struct trace_kprobe *tk)
|
static inline void init_trace_event_call(struct trace_kprobe *tk,
|
||||||
|
struct trace_event_call *call)
|
||||||
{
|
{
|
||||||
struct trace_event_call *call = &tk->tp.call;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/* Initialize trace_event_call */
|
|
||||||
INIT_LIST_HEAD(&call->class->fields);
|
INIT_LIST_HEAD(&call->class->fields);
|
||||||
if (trace_kprobe_is_return(tk)) {
|
if (trace_kprobe_is_return(tk)) {
|
||||||
call->event.funcs = &kretprobe_funcs;
|
call->event.funcs = &kretprobe_funcs;
|
||||||
@ -1327,6 +1332,19 @@ static int register_kprobe_event(struct trace_kprobe *tk)
|
|||||||
call->event.funcs = &kprobe_funcs;
|
call->event.funcs = &kprobe_funcs;
|
||||||
call->class->define_fields = kprobe_event_define_fields;
|
call->class->define_fields = kprobe_event_define_fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
call->flags = TRACE_EVENT_FL_KPROBE;
|
||||||
|
call->class->reg = kprobe_register;
|
||||||
|
call->data = tk;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int register_kprobe_event(struct trace_kprobe *tk)
|
||||||
|
{
|
||||||
|
struct trace_event_call *call = &tk->tp.call;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
init_trace_event_call(tk, call);
|
||||||
|
|
||||||
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
|
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
ret = register_trace_event(&call->event);
|
ret = register_trace_event(&call->event);
|
||||||
@ -1334,9 +1352,6 @@ static int register_kprobe_event(struct trace_kprobe *tk)
|
|||||||
kfree(call->print_fmt);
|
kfree(call->print_fmt);
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
call->flags = TRACE_EVENT_FL_KPROBE;
|
|
||||||
call->class->reg = kprobe_register;
|
|
||||||
call->data = tk;
|
|
||||||
ret = trace_add_event_call(call);
|
ret = trace_add_event_call(call);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_info("Failed to register kprobe event: %s\n",
|
pr_info("Failed to register kprobe event: %s\n",
|
||||||
@ -1358,6 +1373,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
|
/* create a trace_kprobe, but don't add it to global lists */
|
||||||
|
struct trace_event_call *
|
||||||
|
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
|
||||||
|
bool is_return)
|
||||||
|
{
|
||||||
|
struct trace_kprobe *tk;
|
||||||
|
int ret;
|
||||||
|
char *event;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* local trace_kprobes are not added to probe_list, so they are never
|
||||||
|
* searched in find_trace_kprobe(). Therefore, there is no concern of
|
||||||
|
* duplicated name here.
|
||||||
|
*/
|
||||||
|
event = func ? func : "DUMMY_EVENT";
|
||||||
|
|
||||||
|
tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
|
||||||
|
offs, 0 /* maxactive */, 0 /* nargs */,
|
||||||
|
is_return);
|
||||||
|
|
||||||
|
if (IS_ERR(tk)) {
|
||||||
|
pr_info("Failed to allocate trace_probe.(%d)\n",
|
||||||
|
(int)PTR_ERR(tk));
|
||||||
|
return ERR_CAST(tk);
|
||||||
|
}
|
||||||
|
|
||||||
|
init_trace_event_call(tk, &tk->tp.call);
|
||||||
|
|
||||||
|
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = __register_trace_kprobe(tk);
|
||||||
|
if (ret < 0)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
return &tk->tp.call;
|
||||||
|
error:
|
||||||
|
free_trace_kprobe(tk);
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroy_local_trace_kprobe(struct trace_event_call *event_call)
|
||||||
|
{
|
||||||
|
struct trace_kprobe *tk;
|
||||||
|
|
||||||
|
tk = container_of(event_call, struct trace_kprobe, tp.call);
|
||||||
|
|
||||||
|
if (trace_probe_is_enabled(&tk->tp)) {
|
||||||
|
WARN_ON(1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__unregister_trace_kprobe(tk);
|
||||||
|
free_trace_kprobe(tk);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PERF_EVENTS */
|
||||||
|
|
||||||
/* Make a tracefs interface for controlling probe points */
|
/* Make a tracefs interface for controlling probe points */
|
||||||
static __init int init_kprobe_trace(void)
|
static __init int init_kprobe_trace(void)
|
||||||
{
|
{
|
||||||
|
@ -404,3 +404,10 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int set_print_fmt(struct trace_probe *tp, bool is_return);
|
extern int set_print_fmt(struct trace_probe *tp, bool is_return);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
|
extern struct trace_event_call *
|
||||||
|
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
|
||||||
|
bool is_return);
|
||||||
|
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
|
||||||
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user