libbpf: use stable map placeholder FDs

Move map creation to later during BPF object loading by pre-creating
stable placeholder FDs (utilizing memfd_create()). Use dup2()
syscall to then atomically make those placeholder FDs point to real
kernel BPF map objects.

This change allows to delay BPF map creation to after all the BPF
program relocations. That, in turn, allows to delay BTF finalization and
loading into kernel to after all the relocations as well. We'll take
advantage of the latter in subsequent patches to allow libbpf to adjust
BTF in a way that helps with BPF global function usage.

Clean up a few places where we close map->fd, which now shouldn't
happen, because map->fd should be a valid FD regardless of whether map
was created or not. Surprisingly and nicely it simplifies a bunch of
error handling code. If this change doesn't backfire, I'm tempted to
pre-create such stable FDs for other entities (progs, maybe even BTF).
We previously did some manipulations to make gen_loader work with fake
map FDs, with stable map FDs this hack is not necessary for maps (we
still have it for BTF, but I left it as is for now).

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Andrii Nakryiko 2024-01-03 17:38:42 -08:00 committed by Alexei Starovoitov
parent f08c18e083
commit dac645b950
2 changed files with 77 additions and 38 deletions

View File

@ -1503,6 +1503,16 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
return ERR_PTR(-ENOENT);
}
static int create_placeholder_fd(void)
{
int fd;
fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
if (fd < 0)
return -errno;
return fd;
}
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
{
struct bpf_map *map;
@ -1515,7 +1525,21 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
map = &obj->maps[obj->nr_maps++];
map->obj = obj;
map->fd = -1;
/* Preallocate map FD without actually creating BPF map just yet.
* These map FD "placeholders" will be reused later without changing
* FD value when map is actually created in the kernel.
*
* This is useful to be able to perform BPF program relocations
* without having to create BPF maps before that step. This allows us
* to finalize and load BTF very late in BPF object's loading phase,
* right before BPF maps have to be created and BPF programs have to
* be loaded. By having these map FD placeholders we can perform all
* the sanitizations, relocations, and any other adjustments before we
* start creating actual BPF kernel objects (BTF, maps, progs).
*/
map->fd = create_placeholder_fd();
if (map->fd < 0)
return ERR_PTR(map->fd);
map->inner_map_fd = -1;
map->autocreate = true;
@ -2607,7 +2631,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
map->inner_map = calloc(1, sizeof(*map->inner_map));
if (!map->inner_map)
return -ENOMEM;
map->inner_map->fd = -1;
map->inner_map->fd = create_placeholder_fd();
if (map->inner_map->fd < 0)
return map->inner_map->fd;
map->inner_map->sec_idx = sec_idx;
map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
if (!map->inner_map->name)
@ -4549,14 +4575,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
goto err_free_new_name;
}
err = zclose(map->fd);
if (err) {
err = -errno;
goto err_close_new_fd;
}
err = reuse_fd(map->fd, new_fd);
if (err)
goto err_free_new_name;
free(map->name);
map->fd = new_fd;
map->name = new_name;
map->def.type = info.type;
map->def.key_size = info.key_size;
@ -4570,8 +4594,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
return 0;
err_close_new_fd:
close(new_fd);
err_free_new_name:
free(new_name);
return libbpf_err(err);
@ -5210,7 +5232,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
struct bpf_map_def *def = &map->def;
const char *map_name = NULL;
int err = 0;
int err = 0, map_fd;
if (kernel_supports(obj, FEAT_PROG_NAME))
map_name = map->name;
@ -5269,17 +5291,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
bpf_gen__map_create(obj->gen_loader, def->type, map_name,
def->key_size, def->value_size, def->max_entries,
&create_attr, is_inner ? -1 : map - obj->maps);
/* Pretend to have valid FD to pass various fd >= 0 checks.
* This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
/* We keep pretenting we have valid FD to pass various fd >= 0
* checks by just keeping original placeholder FDs in place.
* See bpf_object__add_map() comment.
* This placeholder fd will not be used with any syscall and
* will be reset to -1 eventually.
*/
map->fd = 0;
map_fd = map->fd;
} else {
map->fd = bpf_map_create(def->type, map_name,
def->key_size, def->value_size,
def->max_entries, &create_attr);
map_fd = bpf_map_create(def->type, map_name,
def->key_size, def->value_size,
def->max_entries, &create_attr);
}
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
err = -errno;
@ -5291,13 +5315,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
map->btf_value_type_id = 0;
map->fd = bpf_map_create(def->type, map_name,
def->key_size, def->value_size,
def->max_entries, &create_attr);
map_fd = bpf_map_create(def->type, map_name,
def->key_size, def->value_size,
def->max_entries, &create_attr);
}
err = map->fd < 0 ? -errno : 0;
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
if (obj->gen_loader)
map->inner_map->fd = -1;
@ -5305,7 +5327,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
zfree(&map->inner_map);
}
return err;
if (map_fd < 0)
return map_fd;
/* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
if (map->fd == map_fd)
return 0;
/* Keep placeholder FD value but now point it to the BPF map object.
* This way everything that relied on this map's FD (e.g., relocated
* ldimm64 instructions) will stay valid and won't need adjustments.
* map->fd stays valid but now point to what map_fd points to.
*/
return reuse_fd(map->fd, map_fd);
}
static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
@ -5389,10 +5423,8 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj)
continue;
err = init_prog_array_slots(obj, map);
if (err < 0) {
zclose(map->fd);
if (err < 0)
return err;
}
}
return 0;
}
@ -5483,25 +5515,20 @@ retry:
if (bpf_map__is_internal(map)) {
err = bpf_object__populate_internal_map(obj, map);
if (err < 0) {
zclose(map->fd);
if (err < 0)
goto err_out;
}
}
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
err = init_map_in_map_slots(obj, map);
if (err < 0) {
zclose(map->fd);
if (err < 0)
goto err_out;
}
}
}
if (map->pin_path && !map->pinned) {
err = bpf_map__pin(map, NULL);
if (err) {
zclose(map->fd);
if (!retried && err == -EEXIST) {
retried = true;
goto retry;
@ -8075,8 +8102,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
err = err ? : bpf_object__create_maps(obj);
err = err ? : bpf_object__load_progs(obj, extra_log_level);
err = err ? : bpf_object_init_prog_arrays(obj);
err = err ? : bpf_object_prepare_struct_ops(obj);
@ -8085,8 +8112,6 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
/* reset FDs */
if (obj->btf)
btf__set_fd(obj->btf, -1);
for (i = 0; i < obj->nr_maps; i++)
obj->maps[i].fd = -1;
if (!err)
err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
}

View File

@ -555,6 +555,20 @@ static inline int ensure_good_fd(int fd)
return fd;
}
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
* Regardless of success, *tmp_fd* is closed.
* Whatever *fixed_fd* pointed to is closed silently.
*/
static inline int reuse_fd(int fixed_fd, int tmp_fd)
{
int err;
err = dup2(tmp_fd, fixed_fd);
err = err < 0 ? -errno : 0;
close(tmp_fd); /* clean up temporary FD */
return err;
}
/* The following two functions are exposed to bpftool */
int bpf_core_add_cands(struct bpf_core_cand *local_cand,
size_t local_essent_len,