mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
bpf: support deferring bpf_link dealloc to after RCU grace period
BPF link for some program types is passed as a "context" which can be used by those BPF programs to look up additional information. E.g., for multi-kprobes and multi-uprobes, link is used to fetch BPF cookie values. Because of this runtime dependency, when bpf_link refcnt drops to zero there could still be active BPF programs running accessing link data. This patch adds generic support to defer bpf_link dealloc callback to after RCU GP, if requested. This is done by exposing two different deallocation callbacks, one synchronous and one deferred. If deferred one is provided, bpf_link_free() will schedule dealloc_deferred() callback to happen after RCU GP. BPF is using two flavors of RCU: "classic" non-sleepable one and RCU tasks trace one. The latter is used when sleepable BPF programs are used. bpf_link_free() accommodates that by checking underlying BPF program's sleepable flag, and goes either through normal RCU GP only for non-sleepable, or through RCU tasks trace GP *and* then normal RCU GP (taking into account rcu_trace_implies_rcu_gp() optimization), if BPF program is sleepable. We use this for multi-kprobe and multi-uprobe links, which dereference link during program run. We also preventively switch raw_tp link to use deferred dealloc callback, as upcoming changes in bpf-next tree expose raw_tp link data (specifically, cookie value) to BPF program at runtime as well. Fixes:0dcac27254
("bpf: Add multi kprobe link") Fixes:89ae89f53d
("bpf: Add multi uprobe link") Reported-by: syzbot+981935d9485a560bfbcb@syzkaller.appspotmail.com Reported-by: syzbot+2cb5a6c573e98db598cc@syzkaller.appspotmail.com Reported-by: syzbot+62d8b26793e8a2bd0516@syzkaller.appspotmail.com Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: https://lore.kernel.org/r/20240328052426.3042617-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
e9c856cabe
commit
1a80dbcb2d
@ -1574,12 +1574,26 @@ struct bpf_link {
|
||||
enum bpf_link_type type;
|
||||
const struct bpf_link_ops *ops;
|
||||
struct bpf_prog *prog;
|
||||
struct work_struct work;
|
||||
/* rcu is used before freeing, work can be used to schedule that
|
||||
* RCU-based freeing before that, so they never overlap
|
||||
*/
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct work;
|
||||
};
|
||||
};
|
||||
|
||||
struct bpf_link_ops {
|
||||
void (*release)(struct bpf_link *link);
|
||||
/* deallocate link resources callback, called without RCU grace period
|
||||
* waiting
|
||||
*/
|
||||
void (*dealloc)(struct bpf_link *link);
|
||||
/* deallocate link resources callback, called after RCU grace period;
|
||||
* if underlying BPF program is sleepable we go through tasks trace
|
||||
* RCU GP and then "classic" RCU GP
|
||||
*/
|
||||
void (*dealloc_deferred)(struct bpf_link *link);
|
||||
int (*detach)(struct bpf_link *link);
|
||||
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
|
||||
struct bpf_prog *old_prog);
|
||||
|
@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
|
||||
atomic64_inc(&link->refcnt);
|
||||
}
|
||||
|
||||
static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
|
||||
|
||||
/* free bpf_link and its containing memory */
|
||||
link->ops->dealloc_deferred(link);
|
||||
}
|
||||
|
||||
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
|
||||
{
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
bpf_link_defer_dealloc_rcu_gp(rcu);
|
||||
else
|
||||
call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
|
||||
}
|
||||
|
||||
/* bpf_link_free is guaranteed to be called from process context */
|
||||
static void bpf_link_free(struct bpf_link *link)
|
||||
{
|
||||
bool sleepable = false;
|
||||
|
||||
bpf_link_free_id(link->id);
|
||||
if (link->prog) {
|
||||
sleepable = link->prog->sleepable;
|
||||
/* detach BPF program, clean up used resources */
|
||||
link->ops->release(link);
|
||||
bpf_prog_put(link->prog);
|
||||
}
|
||||
/* free bpf_link and its containing memory */
|
||||
link->ops->dealloc(link);
|
||||
if (link->ops->dealloc_deferred) {
|
||||
/* schedule BPF link deallocation; if underlying BPF program
|
||||
* is sleepable, we need to first wait for RCU tasks trace
|
||||
* sync, then go through "classic" RCU grace period
|
||||
*/
|
||||
if (sleepable)
|
||||
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
|
||||
else
|
||||
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
|
||||
}
|
||||
if (link->ops->dealloc)
|
||||
link->ops->dealloc(link);
|
||||
}
|
||||
|
||||
static void bpf_link_put_deferred(struct work_struct *work)
|
||||
@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
|
||||
|
||||
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
|
||||
.release = bpf_raw_tp_link_release,
|
||||
.dealloc = bpf_raw_tp_link_dealloc,
|
||||
.dealloc_deferred = bpf_raw_tp_link_dealloc,
|
||||
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
|
||||
.fill_link_info = bpf_raw_tp_link_fill_link_info,
|
||||
};
|
||||
|
@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
|
||||
|
||||
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
|
||||
.release = bpf_kprobe_multi_link_release,
|
||||
.dealloc = bpf_kprobe_multi_link_dealloc,
|
||||
.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
|
||||
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
|
||||
};
|
||||
|
||||
@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
|
||||
|
||||
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
|
||||
.release = bpf_uprobe_multi_link_release,
|
||||
.dealloc = bpf_uprobe_multi_link_dealloc,
|
||||
.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
|
||||
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user