mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 14:12:06 +00:00
bpf: Add poke dependency tracking for prog array maps
This work adds program tracking to prog array maps. This is needed such that upon prog array updates/deletions we can fix up all programs which make use of this tail call map. We add ops->map_poke_{un,}track() helpers to maps to maintain the list of programs and ops->map_poke_run() for triggering the actual update. bpf_array_aux is extended to contain the list head and poke_mutex in order to serialize program patching during updates/deletions. bpf_free_used_maps() will untrack the program shortly before dropping the reference to the map. For clearing out the prog array once all urefs are dropped we need to use schedule_work() to have a sleepable context. The prog_array_map_poke_run() is triggered during updates/deletions and walks the maintained prog list. It checks in their poke_tabs whether the map and key is matching and runs the actual bpf_arch_text_poke() for patching in the nop or new jmp location. Depending on the type of update, we use one of BPF_MOD_{NOP_TO_JUMP,JUMP_TO_NOP,JUMP_TO_JUMP}. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andriin@fb.com> Link: https://lore.kernel.org/bpf/1fb364bb3c565b3e415d5ea348f036ff379e779d.1574452833.git.daniel@iogearbox.net
This commit is contained in:
parent
a66886fe6c
commit
da765a2f59
@ -22,6 +22,7 @@ struct bpf_verifier_env;
|
||||
struct bpf_verifier_log;
|
||||
struct perf_event;
|
||||
struct bpf_prog;
|
||||
struct bpf_prog_aux;
|
||||
struct bpf_map;
|
||||
struct sock;
|
||||
struct seq_file;
|
||||
@ -64,6 +65,12 @@ struct bpf_map_ops {
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type);
|
||||
|
||||
/* Prog poke tracking helpers. */
|
||||
int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux);
|
||||
void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux);
|
||||
void (*map_poke_run)(struct bpf_map *map, u32 key, struct bpf_prog *old,
|
||||
struct bpf_prog *new);
|
||||
|
||||
/* Direct value access helpers. */
|
||||
int (*map_direct_value_addr)(const struct bpf_map *map,
|
||||
u64 *imm, u32 off);
|
||||
@ -588,6 +595,11 @@ struct bpf_array_aux {
|
||||
*/
|
||||
enum bpf_prog_type type;
|
||||
bool jited;
|
||||
/* Programs with direct jumps into programs part of this array. */
|
||||
struct list_head poke_progs;
|
||||
struct bpf_map *map;
|
||||
struct mutex poke_mutex;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct bpf_array {
|
||||
|
@ -586,10 +586,17 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
|
||||
if (IS_ERR(new_ptr))
|
||||
return PTR_ERR(new_ptr);
|
||||
|
||||
if (map->ops->map_poke_run) {
|
||||
mutex_lock(&array->aux->poke_mutex);
|
||||
old_ptr = xchg(array->ptrs + index, new_ptr);
|
||||
map->ops->map_poke_run(map, index, old_ptr, new_ptr);
|
||||
mutex_unlock(&array->aux->poke_mutex);
|
||||
} else {
|
||||
old_ptr = xchg(array->ptrs + index, new_ptr);
|
||||
}
|
||||
|
||||
if (old_ptr)
|
||||
map->ops->map_fd_put_ptr(old_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -602,7 +609,15 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
|
||||
if (index >= array->map.max_entries)
|
||||
return -E2BIG;
|
||||
|
||||
if (map->ops->map_poke_run) {
|
||||
mutex_lock(&array->aux->poke_mutex);
|
||||
old_ptr = xchg(array->ptrs + index, NULL);
|
||||
map->ops->map_poke_run(map, index, old_ptr, NULL);
|
||||
mutex_unlock(&array->aux->poke_mutex);
|
||||
} else {
|
||||
old_ptr = xchg(array->ptrs + index, NULL);
|
||||
}
|
||||
|
||||
if (old_ptr) {
|
||||
map->ops->map_fd_put_ptr(old_ptr);
|
||||
return 0;
|
||||
@ -671,6 +686,152 @@ static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct prog_poke_elem {
|
||||
struct list_head list;
|
||||
struct bpf_prog_aux *aux;
|
||||
};
|
||||
|
||||
static int prog_array_map_poke_track(struct bpf_map *map,
|
||||
struct bpf_prog_aux *prog_aux)
|
||||
{
|
||||
struct prog_poke_elem *elem;
|
||||
struct bpf_array_aux *aux;
|
||||
int ret = 0;
|
||||
|
||||
aux = container_of(map, struct bpf_array, map)->aux;
|
||||
mutex_lock(&aux->poke_mutex);
|
||||
list_for_each_entry(elem, &aux->poke_progs, list) {
|
||||
if (elem->aux == prog_aux)
|
||||
goto out;
|
||||
}
|
||||
|
||||
elem = kmalloc(sizeof(*elem), GFP_KERNEL);
|
||||
if (!elem) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&elem->list);
|
||||
/* We must track the program's aux info at this point in time
|
||||
* since the program pointer itself may not be stable yet, see
|
||||
* also comment in prog_array_map_poke_run().
|
||||
*/
|
||||
elem->aux = prog_aux;
|
||||
|
||||
list_add_tail(&elem->list, &aux->poke_progs);
|
||||
out:
|
||||
mutex_unlock(&aux->poke_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void prog_array_map_poke_untrack(struct bpf_map *map,
|
||||
struct bpf_prog_aux *prog_aux)
|
||||
{
|
||||
struct prog_poke_elem *elem, *tmp;
|
||||
struct bpf_array_aux *aux;
|
||||
|
||||
aux = container_of(map, struct bpf_array, map)->aux;
|
||||
mutex_lock(&aux->poke_mutex);
|
||||
list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
|
||||
if (elem->aux == prog_aux) {
|
||||
list_del_init(&elem->list);
|
||||
kfree(elem);
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&aux->poke_mutex);
|
||||
}
|
||||
|
||||
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
|
||||
struct bpf_prog *old,
|
||||
struct bpf_prog *new)
|
||||
{
|
||||
enum bpf_text_poke_type type;
|
||||
struct prog_poke_elem *elem;
|
||||
struct bpf_array_aux *aux;
|
||||
|
||||
if (!old && new)
|
||||
type = BPF_MOD_NOP_TO_JUMP;
|
||||
else if (old && !new)
|
||||
type = BPF_MOD_JUMP_TO_NOP;
|
||||
else if (old && new)
|
||||
type = BPF_MOD_JUMP_TO_JUMP;
|
||||
else
|
||||
return;
|
||||
|
||||
aux = container_of(map, struct bpf_array, map)->aux;
|
||||
WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
|
||||
|
||||
list_for_each_entry(elem, &aux->poke_progs, list) {
|
||||
struct bpf_jit_poke_descriptor *poke;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < elem->aux->size_poke_tab; i++) {
|
||||
poke = &elem->aux->poke_tab[i];
|
||||
|
||||
/* Few things to be aware of:
|
||||
*
|
||||
* 1) We can only ever access aux in this context, but
|
||||
* not aux->prog since it might not be stable yet and
|
||||
* there could be danger of use after free otherwise.
|
||||
* 2) Initially when we start tracking aux, the program
|
||||
* is not JITed yet and also does not have a kallsyms
|
||||
* entry. We skip these as poke->ip_stable is not
|
||||
* active yet. The JIT will do the final fixup before
|
||||
* setting it stable. The various poke->ip_stable are
|
||||
* successively activated, so tail call updates can
|
||||
* arrive from here while JIT is still finishing its
|
||||
* final fixup for non-activated poke entries.
|
||||
* 3) On program teardown, the program's kallsym entry gets
|
||||
* removed out of RCU callback, but we can only untrack
|
||||
* from sleepable context, therefore bpf_arch_text_poke()
|
||||
* might not see that this is in BPF text section and
|
||||
* bails out with -EINVAL. As these are unreachable since
|
||||
* RCU grace period already passed, we simply skip them.
|
||||
* 4) Also programs reaching refcount of zero while patching
|
||||
* is in progress is okay since we're protected under
|
||||
* poke_mutex and untrack the programs before the JIT
|
||||
* buffer is freed. When we're still in the middle of
|
||||
* patching and suddenly kallsyms entry of the program
|
||||
* gets evicted, we just skip the rest which is fine due
|
||||
* to point 3).
|
||||
* 5) Any other error happening below from bpf_arch_text_poke()
|
||||
* is a unexpected bug.
|
||||
*/
|
||||
if (!READ_ONCE(poke->ip_stable))
|
||||
continue;
|
||||
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
|
||||
continue;
|
||||
if (poke->tail_call.map != map ||
|
||||
poke->tail_call.key != key)
|
||||
continue;
|
||||
|
||||
ret = bpf_arch_text_poke(poke->ip, type,
|
||||
old ? (u8 *)old->bpf_func +
|
||||
poke->adj_off : NULL,
|
||||
new ? (u8 *)new->bpf_func +
|
||||
poke->adj_off : NULL);
|
||||
BUG_ON(ret < 0 && ret != -EINVAL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void prog_array_map_clear_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_map *map = container_of(work, struct bpf_array_aux,
|
||||
work)->map;
|
||||
bpf_fd_array_map_clear(map);
|
||||
bpf_map_put(map);
|
||||
}
|
||||
|
||||
static void prog_array_map_clear(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_array_aux *aux = container_of(map, struct bpf_array,
|
||||
map)->aux;
|
||||
bpf_map_inc(map);
|
||||
schedule_work(&aux->work);
|
||||
}
|
||||
|
||||
static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_array_aux *aux;
|
||||
@ -680,6 +841,10 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
|
||||
if (!aux)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
|
||||
INIT_LIST_HEAD(&aux->poke_progs);
|
||||
mutex_init(&aux->poke_mutex);
|
||||
|
||||
map = array_map_alloc(attr);
|
||||
if (IS_ERR(map)) {
|
||||
kfree(aux);
|
||||
@ -687,14 +852,21 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
|
||||
}
|
||||
|
||||
container_of(map, struct bpf_array, map)->aux = aux;
|
||||
aux->map = map;
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
static void prog_array_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct prog_poke_elem *elem, *tmp;
|
||||
struct bpf_array_aux *aux;
|
||||
|
||||
aux = container_of(map, struct bpf_array, map)->aux;
|
||||
list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
|
||||
list_del_init(&elem->list);
|
||||
kfree(elem);
|
||||
}
|
||||
kfree(aux);
|
||||
fd_array_map_free(map);
|
||||
}
|
||||
@ -703,13 +875,16 @@ const struct bpf_map_ops prog_array_map_ops = {
|
||||
.map_alloc_check = fd_array_map_alloc_check,
|
||||
.map_alloc = prog_array_map_alloc,
|
||||
.map_free = prog_array_map_free,
|
||||
.map_poke_track = prog_array_map_poke_track,
|
||||
.map_poke_untrack = prog_array_map_poke_untrack,
|
||||
.map_poke_run = prog_array_map_poke_run,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_lookup_elem = fd_array_map_lookup_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = prog_fd_array_get_ptr,
|
||||
.map_fd_put_ptr = prog_fd_array_put_ptr,
|
||||
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
|
||||
.map_release_uref = bpf_fd_array_map_clear,
|
||||
.map_release_uref = prog_array_map_clear,
|
||||
.map_seq_show_elem = prog_array_map_seq_show_elem,
|
||||
};
|
||||
|
||||
|
@ -2050,11 +2050,16 @@ static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux)
|
||||
|
||||
static void bpf_free_used_maps(struct bpf_prog_aux *aux)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
int i;
|
||||
|
||||
bpf_free_cgroup_storage(aux);
|
||||
for (i = 0; i < aux->used_map_cnt; i++)
|
||||
bpf_map_put(aux->used_maps[i]);
|
||||
for (i = 0; i < aux->used_map_cnt; i++) {
|
||||
map = aux->used_maps[i];
|
||||
if (map->ops->map_poke_untrack)
|
||||
map->ops->map_poke_untrack(map, aux);
|
||||
bpf_map_put(map);
|
||||
}
|
||||
kfree(aux->used_maps);
|
||||
}
|
||||
|
||||
|
@ -25,12 +25,13 @@
|
||||
#include <linux/nospec.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
|
||||
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
|
||||
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
|
||||
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
|
||||
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
|
||||
(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
|
||||
#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
|
||||
#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
|
||||
#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
|
||||
#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
|
||||
IS_FD_HASH(map))
|
||||
|
||||
#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
|
||||
|
||||
@ -877,7 +878,7 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
err = bpf_percpu_cgroup_storage_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
||||
err = bpf_stackmap_copy(map, key, value);
|
||||
} else if (IS_FD_ARRAY(map)) {
|
||||
} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
|
||||
err = bpf_fd_array_map_lookup_elem(map, key, value);
|
||||
} else if (IS_FD_HASH(map)) {
|
||||
err = bpf_fd_htab_map_lookup_elem(map, key, value);
|
||||
@ -1004,6 +1005,10 @@ static int map_update_elem(union bpf_attr *attr)
|
||||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
|
||||
err = map->ops->map_update_elem(map, key, value, attr->flags);
|
||||
goto out;
|
||||
} else if (IS_FD_PROG_ARRAY(map)) {
|
||||
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
|
||||
attr->flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
|
||||
@ -1086,6 +1091,9 @@ static int map_delete_elem(union bpf_attr *attr)
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
err = bpf_map_offload_delete_elem(map, key);
|
||||
goto out;
|
||||
} else if (IS_FD_PROG_ARRAY(map)) {
|
||||
err = map->ops->map_delete_elem(map, key);
|
||||
goto out;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
|
Loading…
Reference in New Issue
Block a user