libbpf: move global data mmap()'ing into bpf_object__load()

Since BPF skeleton inception libbpf has been doing mmap()'ing of global
data ARRAY maps in bpf_object__load_skeleton() API, which is used by
code generated .skel.h files (i.e., by BPF skeletons only).

This is wrong because if BPF object is loaded through generic
bpf_object__load() API, global data maps won't be re-mmap()'ed after
load step, and memory pointers returned from bpf_map__initial_value()
would be wrong and won't reflect the actual memory shared between BPF
program and user space.

bpf_map__initial_value() return result is rarely used after load, so
this went unnoticed for a really long time, until bpftrace project
attempted to load BPF object through generic bpf_object__load() API and
then used BPF subskeleton instantiated from such bpf_object. It turned
out that .data/.rodata/.bss data updates through such subskeleton was
"blackholed", all because libbpf wouldn't re-mmap() those maps during
bpf_object__load() phase.

Long story short, this step should be done by libbpf regardless of BPF
skeleton usage, right after BPF map is created in the kernel. This patch
moves this functionality into bpf_object__populate_internal_map() to
achieve this. And bpf_object__load_skeleton() is now simple and almost
trivial, only propagating these mmap()'ed pointers into user-supplied
skeleton structs.

We also do trivial adjustments to error reporting inside
bpf_object__populate_internal_map() for consistency with the rest of
libbpf's map-handling code.

Reported-by: Alastair Robertson <ajor@meta.com>
Reported-by: Jonathan Wiepert <jwiepert@meta.com>
Fixes: d66562fba1 ("libbpf: Add BPF object skeleton support")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20241023043908.3834423-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Andrii Nakryiko 2024-10-22 21:39:07 -07:00 committed by Alexei Starovoitov
parent 1b2bfc2969
commit 137978f422

View File

@ -5122,6 +5122,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
enum libbpf_map_type map_type = map->libbpf_type;
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
size_t mmap_sz;
if (obj->gen_loader) {
bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
@ -5135,8 +5136,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
if (err) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error setting initial map(%s) contents: %s\n",
map->name, cp);
pr_warn("map '%s': failed to set initial contents: %s\n",
bpf_map__name(map), cp);
return err;
}
@ -5146,11 +5147,43 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
if (err) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error freezing map(%s) as read-only: %s\n",
map->name, cp);
pr_warn("map '%s': failed to freeze as read-only: %s\n",
bpf_map__name(map), cp);
return err;
}
}
/* Remap anonymous mmap()-ed "map initialization image" as
* a BPF map-backed mmap()-ed memory, but preserving the same
* memory address. This will cause kernel to change process'
* page table to point to a different piece of kernel memory,
* but from userspace point of view memory address (and its
* contents, being identical at this point) will stay the
* same. This mapping will be released by bpf_object__close()
* as per normal clean up procedure.
*/
mmap_sz = bpf_map_mmap_sz(map);
if (map->def.map_flags & BPF_F_MMAPABLE) {
void *mmaped;
int prot;
if (map->def.map_flags & BPF_F_RDONLY_PROG)
prot = PROT_READ;
else
prot = PROT_READ | PROT_WRITE;
mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0);
if (mmaped == MAP_FAILED) {
err = -errno;
pr_warn("map '%s': failed to re-mmap() contents: %d\n",
bpf_map__name(map), err);
return err;
}
map->mmaped = mmaped;
} else if (map->mmaped) {
munmap(map->mmaped, mmap_sz);
map->mmaped = NULL;
}
return 0;
}
@ -5467,8 +5500,7 @@ retry:
err = bpf_object__populate_internal_map(obj, map);
if (err < 0)
goto err_out;
}
if (map->def.type == BPF_MAP_TYPE_ARENA) {
} else if (map->def.type == BPF_MAP_TYPE_ARENA) {
map->mmaped = mmap((void *)(long)map->map_extra,
bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
@ -13916,46 +13948,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->map_cnt; i++) {
struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
struct bpf_map *map = *map_skel->map;
size_t mmap_sz = bpf_map_mmap_sz(map);
int prot, map_fd = map->fd;
void **mmaped = map_skel->mmaped;
if (!mmaped)
if (!map_skel->mmaped)
continue;
if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
*mmaped = NULL;
continue;
}
if (map->def.type == BPF_MAP_TYPE_ARENA) {
*mmaped = map->mmaped;
continue;
}
if (map->def.map_flags & BPF_F_RDONLY_PROG)
prot = PROT_READ;
else
prot = PROT_READ | PROT_WRITE;
/* Remap anonymous mmap()-ed "map initialization image" as
* a BPF map-backed mmap()-ed memory, but preserving the same
* memory address. This will cause kernel to change process'
* page table to point to a different piece of kernel memory,
* but from userspace point of view memory address (and its
* contents, being identical at this point) will stay the
* same. This mapping will be released by bpf_object__close()
* as per normal clean up procedure, so we don't need to worry
* about it from skeleton's clean up perspective.
*/
*mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
if (*mmaped == MAP_FAILED) {
err = -errno;
*mmaped = NULL;
pr_warn("failed to re-mmap() map '%s': %d\n",
bpf_map__name(map), err);
return libbpf_err(err);
}
*map_skel->mmaped = map->mmaped;
}
return 0;