forked from Minki/linux
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-11-02 The following pull-request contains BPF updates for your *net-next* tree. We've added 30 non-merge commits during the last 7 day(s) which contain a total of 41 files changed, 1864 insertions(+), 474 deletions(-). The main changes are: 1) Fix long standing user vs kernel access issue by introducing bpf_probe_read_user() and bpf_probe_read_kernel() helpers, from Daniel. 2) Accelerated xskmap lookup, from Björn and Maciej. 3) Support for automatic map pinning in libbpf, from Toke. 4) Cleanup of BTF-enabled raw tracepoints, from Alexei. 5) Various fixes to libbpf and selftests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
ae8a76fb8b
@ -47,6 +47,15 @@ Program types
|
||||
prog_flow_dissector
|
||||
|
||||
|
||||
Testing BPF
|
||||
===========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
s390
|
||||
|
||||
|
||||
.. Links:
|
||||
.. _Documentation/networking/filter.txt: ../networking/filter.txt
|
||||
.. _man-pages: https://www.kernel.org/doc/man-pages/
|
||||
|
205
Documentation/bpf/s390.rst
Normal file
205
Documentation/bpf/s390.rst
Normal file
@ -0,0 +1,205 @@
|
||||
===================
|
||||
Testing BPF on s390
|
||||
===================
|
||||
|
||||
1. Introduction
|
||||
***************
|
||||
|
||||
IBM Z are mainframe computers, which are descendants of IBM System/360 from
|
||||
year 1964. They are supported by the Linux kernel under the name "s390". This
|
||||
document describes how to test BPF in an s390 QEMU guest.
|
||||
|
||||
2. One-time setup
|
||||
*****************
|
||||
|
||||
The following is required to build and run the test suite:
|
||||
|
||||
* s390 GCC
|
||||
* s390 development headers and libraries
|
||||
* Clang with BPF support
|
||||
* QEMU with s390 support
|
||||
* Disk image with s390 rootfs
|
||||
|
||||
Debian supports installing compiler and libraries for s390 out of the box.
|
||||
Users of other distros may use debootstrap in order to set up a Debian chroot::
|
||||
|
||||
sudo debootstrap \
|
||||
--variant=minbase \
|
||||
--include=sudo \
|
||||
testing \
|
||||
./s390-toolchain
|
||||
sudo mount --rbind /dev ./s390-toolchain/dev
|
||||
sudo mount --rbind /proc ./s390-toolchain/proc
|
||||
sudo mount --rbind /sys ./s390-toolchain/sys
|
||||
sudo chroot ./s390-toolchain
|
||||
|
||||
Once on Debian, the build prerequisites can be installed as follows::
|
||||
|
||||
sudo dpkg --add-architecture s390x
|
||||
sudo apt-get update
|
||||
sudo apt-get install \
|
||||
bc \
|
||||
bison \
|
||||
cmake \
|
||||
debootstrap \
|
||||
dwarves \
|
||||
flex \
|
||||
g++ \
|
||||
gcc \
|
||||
g++-s390x-linux-gnu \
|
||||
gcc-s390x-linux-gnu \
|
||||
gdb-multiarch \
|
||||
git \
|
||||
make \
|
||||
python3 \
|
||||
qemu-system-misc \
|
||||
qemu-utils \
|
||||
rsync \
|
||||
libcap-dev:s390x \
|
||||
libelf-dev:s390x \
|
||||
libncurses-dev
|
||||
|
||||
Latest Clang targeting BPF can be installed as follows::
|
||||
|
||||
git clone https://github.com/llvm/llvm-project.git
|
||||
ln -s ../../clang llvm-project/llvm/tools/
|
||||
mkdir llvm-project-build
|
||||
cd llvm-project-build
|
||||
cmake \
|
||||
-DLLVM_TARGETS_TO_BUILD=BPF \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=/opt/clang-bpf \
|
||||
../llvm-project/llvm
|
||||
make
|
||||
sudo make install
|
||||
export PATH=/opt/clang-bpf/bin:$PATH
|
||||
|
||||
The disk image can be prepared using a loopback mount and debootstrap::
|
||||
|
||||
qemu-img create -f raw ./s390.img 1G
|
||||
sudo losetup -f ./s390.img
|
||||
sudo mkfs.ext4 /dev/loopX
|
||||
mkdir ./s390.rootfs
|
||||
sudo mount /dev/loopX ./s390.rootfs
|
||||
sudo debootstrap \
|
||||
--foreign \
|
||||
--arch=s390x \
|
||||
--variant=minbase \
|
||||
--include=" \
|
||||
iproute2, \
|
||||
iputils-ping, \
|
||||
isc-dhcp-client, \
|
||||
kmod, \
|
||||
libcap2, \
|
||||
libelf1, \
|
||||
netcat, \
|
||||
procps" \
|
||||
testing \
|
||||
./s390.rootfs
|
||||
sudo umount ./s390.rootfs
|
||||
sudo losetup -d /dev/loopX
|
||||
|
||||
3. Compilation
|
||||
**************
|
||||
|
||||
In addition to the usual Kconfig options required to run the BPF test suite, it
|
||||
is also helpful to select::
|
||||
|
||||
CONFIG_NET_9P=y
|
||||
CONFIG_9P_FS=y
|
||||
CONFIG_NET_9P_VIRTIO=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
|
||||
as that would enable a very easy way to share files with the s390 virtual
|
||||
machine.
|
||||
|
||||
Compiling kernel, modules and testsuite, as well as preparing gdb scripts to
|
||||
simplify debugging, can be done using the following commands::
|
||||
|
||||
make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- menuconfig
|
||||
make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- bzImage modules scripts_gdb
|
||||
make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- \
|
||||
-C tools/testing/selftests \
|
||||
TARGETS=bpf \
|
||||
INSTALL_PATH=$PWD/tools/testing/selftests/kselftest_install \
|
||||
install
|
||||
|
||||
4. Running the test suite
|
||||
*************************
|
||||
|
||||
The virtual machine can be started as follows::
|
||||
|
||||
qemu-system-s390x \
|
||||
-cpu max,zpci=on \
|
||||
-smp 2 \
|
||||
-m 4G \
|
||||
-kernel linux/arch/s390/boot/compressed/vmlinux \
|
||||
-drive file=./s390.img,if=virtio,format=raw \
|
||||
-nographic \
|
||||
-append 'root=/dev/vda rw console=ttyS1' \
|
||||
-virtfs local,path=./linux,security_model=none,mount_tag=linux \
|
||||
-object rng-random,filename=/dev/urandom,id=rng0 \
|
||||
-device virtio-rng-ccw,rng=rng0 \
|
||||
-netdev user,id=net0 \
|
||||
-device virtio-net-ccw,netdev=net0
|
||||
|
||||
When using this on a real IBM Z, ``-enable-kvm`` may be added for better
|
||||
performance. When starting the virtual machine for the first time, disk image
|
||||
setup must be finalized using the following command::
|
||||
|
||||
/debootstrap/debootstrap --second-stage
|
||||
|
||||
Directory with the code built on the host as well as ``/proc`` and ``/sys``
|
||||
need to be mounted as follows::
|
||||
|
||||
mkdir -p /linux
|
||||
mount -t 9p linux /linux
|
||||
mount -t proc proc /proc
|
||||
mount -t sysfs sys /sys
|
||||
|
||||
After that, the test suite can be run using the following commands::
|
||||
|
||||
cd /linux/tools/testing/selftests/kselftest_install
|
||||
./run_kselftest.sh
|
||||
|
||||
As usual, tests can be also run individually::
|
||||
|
||||
cd /linux/tools/testing/selftests/bpf
|
||||
./test_verifier
|
||||
|
||||
5. Debugging
|
||||
************
|
||||
|
||||
It is possible to debug the s390 kernel using QEMU GDB stub, which is activated
|
||||
by passing ``-s`` to QEMU.
|
||||
|
||||
It is preferable to turn KASLR off, so that gdb would know where to find the
|
||||
kernel image in memory, by building the kernel with::
|
||||
|
||||
RANDOMIZE_BASE=n
|
||||
|
||||
GDB can then be attached using the following command::
|
||||
|
||||
gdb-multiarch -ex 'target remote localhost:1234' ./vmlinux
|
||||
|
||||
6. Network
|
||||
**********
|
||||
|
||||
In case one needs to use the network in the virtual machine in order to e.g.
|
||||
install additional packages, it can be configured using::
|
||||
|
||||
dhclient eth0
|
||||
|
||||
7. Links
|
||||
********
|
||||
|
||||
This document is a compilation of techniques, whose more comprehensive
|
||||
descriptions can be found by following these links:
|
||||
|
||||
- `Debootstrap <https://wiki.debian.org/EmDebian/CrossDebootstrap>`_
|
||||
- `Multiarch <https://wiki.debian.org/Multiarch/HOWTO>`_
|
||||
- `Building LLVM <https://llvm.org/docs/CMake.html>`_
|
||||
- `Cross-compiling the kernel <https://wiki.gentoo.org/wiki/Embedded_Handbook/General/Cross-compiling_the_kernel>`_
|
||||
- `QEMU s390x Guest Support <https://wiki.qemu.org/Documentation/Platforms/S390X>`_
|
||||
- `Plan 9 folder sharing over Virtio <https://wiki.qemu.org/Documentation/9psetup>`_
|
||||
- `Using GDB with QEMU <https://wiki.osdev.org/Kernel_Debugging#Use_GDB_with_QEMU>`_
|
@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
|
||||
endif
|
||||
|
||||
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
|
||||
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
|
||||
|
||||
# Make sure __phys_addr has no stackprotector
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
|
43
arch/x86/mm/maccess.c
Normal file
43
arch/x86/mm/maccess.c
Normal file
@ -0,0 +1,43 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
|
||||
{
|
||||
return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
|
||||
}
|
||||
|
||||
static __always_inline bool invalid_probe_range(u64 vaddr)
|
||||
{
|
||||
/*
|
||||
* Range covering the highest possible canonical userspace address
|
||||
* as well as non-canonical address range. For the canonical range
|
||||
* we also need to include the userspace guard page.
|
||||
*/
|
||||
return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
|
||||
canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
|
||||
}
|
||||
#else
|
||||
static __always_inline bool invalid_probe_range(u64 vaddr)
|
||||
{
|
||||
return vaddr < TASK_SIZE_MAX;
|
||||
}
|
||||
#endif
|
||||
|
||||
long probe_kernel_read_strict(void *dst, const void *src, size_t size)
|
||||
{
|
||||
if (unlikely(invalid_probe_range((unsigned long)src)))
|
||||
return -EFAULT;
|
||||
|
||||
return __probe_kernel_read(dst, src, size);
|
||||
}
|
||||
|
||||
long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
|
||||
{
|
||||
if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
|
||||
return -EFAULT;
|
||||
|
||||
return __strncpy_from_unsafe(dst, unsafe_addr, count);
|
||||
}
|
@ -373,6 +373,11 @@ enum bpf_cgroup_storage_type {
|
||||
|
||||
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
|
||||
|
||||
/* The longest tracepoint has 12 args.
|
||||
* See include/trace/bpf_probe.h
|
||||
*/
|
||||
#define MAX_BPF_FUNC_ARGS 12
|
||||
|
||||
struct bpf_prog_stats {
|
||||
u64 cnt;
|
||||
u64 nsecs;
|
||||
@ -1004,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_XDP_SOCKETS)
|
||||
struct xdp_sock;
|
||||
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs);
|
||||
void __xsk_map_flush(struct bpf_map *map);
|
||||
#else
|
||||
struct xdp_sock;
|
||||
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void __xsk_map_flush(struct bpf_map *map)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
||||
void bpf_sk_reuseport_detach(struct sock *sk);
|
||||
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
||||
|
@ -26,6 +26,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
||||
|
@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
|
||||
* happens, handle that and return -EFAULT.
|
||||
*/
|
||||
extern long probe_kernel_read(void *dst, const void *src, size_t size);
|
||||
extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
|
||||
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
|
||||
|
||||
/*
|
||||
@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
|
||||
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
|
||||
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
|
||||
|
||||
/*
|
||||
* probe_user_write(): safely attempt to write to a location in user space
|
||||
* @dst: address to write to
|
||||
* @src: pointer to the data that shall be written
|
||||
* @size: size of the data chunk
|
||||
*
|
||||
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
||||
* happens, handle that and return -EFAULT.
|
||||
*/
|
||||
extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
|
||||
extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
|
||||
|
||||
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
|
||||
extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
|
||||
long count);
|
||||
extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
|
||||
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
|
||||
long count);
|
||||
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
|
||||
|
@ -69,7 +69,14 @@ struct xdp_umem {
|
||||
/* Nodes are linked in the struct xdp_sock map_list field, and used to
|
||||
* track which maps a certain socket reside in.
|
||||
*/
|
||||
struct xsk_map;
|
||||
|
||||
struct xsk_map {
|
||||
struct bpf_map map;
|
||||
struct list_head __percpu *flush_list;
|
||||
spinlock_t lock; /* Synchronize map updates */
|
||||
struct xdp_sock *xsk_map[];
|
||||
};
|
||||
|
||||
struct xsk_map_node {
|
||||
struct list_head node;
|
||||
struct xsk_map *map;
|
||||
@ -109,8 +116,6 @@ struct xdp_sock {
|
||||
struct xdp_buff;
|
||||
#ifdef CONFIG_XDP_SOCKETS
|
||||
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||
void xsk_flush(struct xdp_sock *xs);
|
||||
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
|
||||
/* Used from netdev driver */
|
||||
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
|
||||
@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
|
||||
struct xdp_sock **map_entry);
|
||||
int xsk_map_inc(struct xsk_map *map);
|
||||
void xsk_map_put(struct xsk_map *map);
|
||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs);
|
||||
void __xsk_map_flush(struct bpf_map *map);
|
||||
|
||||
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct xdp_sock *xs;
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
xs = READ_ONCE(m->xsk_map[key]);
|
||||
return xs;
|
||||
}
|
||||
|
||||
static inline u64 xsk_umem_extract_addr(u64 addr)
|
||||
{
|
||||
@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static inline void xsk_flush(struct xdp_sock *xs)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
||||
{
|
||||
return false;
|
||||
@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void __xsk_map_flush(struct bpf_map *map)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_XDP_SOCKETS */
|
||||
|
||||
#endif /* _LINUX_XDP_SOCK_H */
|
||||
|
@ -173,6 +173,7 @@ enum bpf_prog_type {
|
||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
||||
BPF_PROG_TYPE_TRACING,
|
||||
};
|
||||
|
||||
enum bpf_attach_type {
|
||||
@ -199,6 +200,7 @@ enum bpf_attach_type {
|
||||
BPF_CGROUP_UDP6_RECVMSG,
|
||||
BPF_CGROUP_GETSOCKOPT,
|
||||
BPF_CGROUP_SETSOCKOPT,
|
||||
BPF_TRACE_RAW_TP,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
@ -561,10 +563,13 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read(void *dst, u32 size, const void *src)
|
||||
* int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* For tracing programs, safely attempt to read *size* bytes from
|
||||
* address *src* and store the data in *dst*.
|
||||
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||
* instead.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@ -1426,45 +1431,14 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
|
||||
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe address
|
||||
* *unsafe_ptr* to *dst*. The *size* should include the
|
||||
* terminating NUL byte. In case the string length is smaller than
|
||||
* *size*, the target is not padded with further NUL bytes. If the
|
||||
* string length is larger than *size*, just *size*-1 bytes are
|
||||
* copied and the last byte is set to NUL.
|
||||
* Copy a NUL terminated string from an unsafe kernel address
|
||||
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||
* more details.
|
||||
*
|
||||
* On success, the length of the copied string is returned. This
|
||||
* makes this helper useful in tracing programs for reading
|
||||
* strings, and more importantly to get its length at runtime. See
|
||||
* the following snippet:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* SEC("kprobe/sys_open")
|
||||
* void bpf_sys_open(struct pt_regs *ctx)
|
||||
* {
|
||||
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
||||
* int res = bpf_probe_read_str(buf, sizeof(buf),
|
||||
* ctx->di);
|
||||
*
|
||||
* // Consume buf, for example push it to
|
||||
* // userspace via bpf_perf_event_output(); we
|
||||
* // can use res (the string length) as event
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read()** helper here instead
|
||||
* to read the string would require to estimate the length at
|
||||
* compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
*
|
||||
* Another useful use case is when parsing individual process
|
||||
* arguments or individual environment variables navigating
|
||||
* *current*\ **->mm->arg_start** and *current*\
|
||||
* **->mm->env_start**: using this helper and the return value,
|
||||
* one can quickly iterate at the right offset of the memory area.
|
||||
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||
* instead.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
@ -2775,6 +2749,72 @@ union bpf_attr {
|
||||
* restricted to raw_tracepoint bpf programs.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Safely attempt to read *size* bytes from user space address
|
||||
* *unsafe_ptr* and store the data in *dst*.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Safely attempt to read *size* bytes from kernel space address
|
||||
* *unsafe_ptr* and store the data in *dst*.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe user address
|
||||
* *unsafe_ptr* to *dst*. The *size* should include the
|
||||
* terminating NUL byte. In case the string length is smaller than
|
||||
* *size*, the target is not padded with further NUL bytes. If the
|
||||
* string length is larger than *size*, just *size*-1 bytes are
|
||||
* copied and the last byte is set to NUL.
|
||||
*
|
||||
* On success, the length of the copied string is returned. This
|
||||
* makes this helper useful in tracing programs for reading
|
||||
* strings, and more importantly to get its length at runtime. See
|
||||
* the following snippet:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* SEC("kprobe/sys_open")
|
||||
* void bpf_sys_open(struct pt_regs *ctx)
|
||||
* {
|
||||
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
||||
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
|
||||
* ctx->di);
|
||||
*
|
||||
* // Consume buf, for example push it to
|
||||
* // userspace via bpf_perf_event_output(); we
|
||||
* // can use res (the string length) as event
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read_user()** helper here
|
||||
* instead to read the string would require to estimate the length
|
||||
* at compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
*
|
||||
* Another useful use case is when parsing individual process
|
||||
* arguments or individual environment variables navigating
|
||||
* *current*\ **->mm->arg_start** and *current*\
|
||||
* **->mm->env_start**: using this helper and the return value,
|
||||
* one can quickly iterate at the right offset of the memory area.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
* value.
|
||||
*
|
||||
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string, including
|
||||
* the trailing NUL character. On error, a negative value.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -2888,7 +2928,11 @@ union bpf_attr {
|
||||
FN(sk_storage_delete), \
|
||||
FN(send_signal), \
|
||||
FN(tcp_gen_syncookie), \
|
||||
FN(skb_output),
|
||||
FN(skb_output), \
|
||||
FN(probe_read_user), \
|
||||
FN(probe_read_kernel), \
|
||||
FN(probe_read_user_str), \
|
||||
FN(probe_read_kernel_str),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -668,9 +668,6 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
|
||||
{
|
||||
struct latch_tree_node *n;
|
||||
|
||||
if (!bpf_jit_kallsyms_enabled())
|
||||
return NULL;
|
||||
|
||||
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
|
||||
return n ?
|
||||
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
|
||||
@ -1309,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
|
||||
}
|
||||
|
||||
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
|
||||
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||
{
|
||||
memset(dst, 0, size);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
* __bpf_prog_run - run eBPF program on a given context
|
||||
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
|
||||
@ -1569,9 +1567,9 @@ out:
|
||||
LDST(W, u32)
|
||||
LDST(DW, u64)
|
||||
#undef LDST
|
||||
#define LDX_PROBE(SIZEOP, SIZE) \
|
||||
LDX_PROBE_MEM_##SIZEOP: \
|
||||
bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \
|
||||
#define LDX_PROBE(SIZEOP, SIZE) \
|
||||
LDX_PROBE_MEM_##SIZEOP: \
|
||||
bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
|
||||
CONT;
|
||||
LDX_PROBE(B, 1)
|
||||
LDX_PROBE(H, 2)
|
||||
|
@ -1579,7 +1579,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
|
||||
u32 btf_id)
|
||||
{
|
||||
switch (prog_type) {
|
||||
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
||||
case BPF_PROG_TYPE_TRACING:
|
||||
if (btf_id > BTF_MAX_TYPE)
|
||||
return -EINVAL;
|
||||
break;
|
||||
@ -1842,13 +1842,13 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
return PTR_ERR(prog);
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
||||
prog->type != BPF_PROG_TYPE_TRACING &&
|
||||
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
|
||||
err = -EINVAL;
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
||||
prog->aux->attach_btf_id) {
|
||||
if (prog->type == BPF_PROG_TYPE_TRACING) {
|
||||
if (attr->raw_tracepoint.name) {
|
||||
/* raw_tp name should not be specified in raw_tp
|
||||
* programs that were verified via in-kernel BTF info
|
||||
|
@ -6279,6 +6279,11 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||
break;
|
||||
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
||||
if (!env->prog->aux->attach_btf_id)
|
||||
return 0;
|
||||
range = tnum_const(0);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@ -9376,24 +9381,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_prog *prog = env->prog;
|
||||
u32 btf_id = prog->aux->attach_btf_id;
|
||||
const char prefix[] = "btf_trace_";
|
||||
const struct btf_type *t;
|
||||
const char *tname;
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT && btf_id) {
|
||||
const char prefix[] = "btf_trace_";
|
||||
if (prog->type != BPF_PROG_TYPE_TRACING)
|
||||
return 0;
|
||||
|
||||
t = btf_type_by_id(btf_vmlinux, btf_id);
|
||||
if (!t) {
|
||||
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!btf_id) {
|
||||
verbose(env, "Tracing programs must provide btf_id\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
t = btf_type_by_id(btf_vmlinux, btf_id);
|
||||
if (!t) {
|
||||
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||
if (!tname) {
|
||||
verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (prog->expected_attach_type) {
|
||||
case BPF_TRACE_RAW_TP:
|
||||
if (!btf_type_is_typedef(t)) {
|
||||
verbose(env, "attach_btf_id %u is not a typedef\n",
|
||||
btf_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||
if (!tname || strncmp(prefix, tname, sizeof(prefix) - 1)) {
|
||||
if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
|
||||
verbose(env, "attach_btf_id %u points to wrong type name %s\n",
|
||||
btf_id, tname);
|
||||
return -EINVAL;
|
||||
@ -9414,8 +9431,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
prog->aux->attach_func_name = tname;
|
||||
prog->aux->attach_func_proto = t;
|
||||
prog->aux->attach_btf_trace = true;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
||||
|
@ -9,13 +9,6 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
struct xsk_map {
|
||||
struct bpf_map map;
|
||||
struct xdp_sock **xsk_map;
|
||||
struct list_head __percpu *flush_list;
|
||||
spinlock_t lock; /* Synchronize map updates */
|
||||
};
|
||||
|
||||
int xsk_map_inc(struct xsk_map *map)
|
||||
{
|
||||
struct bpf_map *m = &map->map;
|
||||
@ -80,9 +73,10 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
|
||||
|
||||
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_map_memory mem;
|
||||
int cpu, err, numa_node;
|
||||
struct xsk_map *m;
|
||||
int cpu, err;
|
||||
u64 cost;
|
||||
u64 cost, size;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
@ -92,44 +86,35 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
||||
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
m = kzalloc(sizeof(*m), GFP_USER);
|
||||
if (!m)
|
||||
numa_node = bpf_map_attr_numa_node(attr);
|
||||
size = struct_size(m, xsk_map, attr->max_entries);
|
||||
cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
|
||||
|
||||
err = bpf_map_charge_init(&mem, cost);
|
||||
if (err < 0)
|
||||
return ERR_PTR(err);
|
||||
|
||||
m = bpf_map_area_alloc(size, numa_node);
|
||||
if (!m) {
|
||||
bpf_map_charge_finish(&mem);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
bpf_map_init_from_attr(&m->map, attr);
|
||||
bpf_map_charge_move(&m->map.memory, &mem);
|
||||
spin_lock_init(&m->lock);
|
||||
|
||||
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
|
||||
cost += sizeof(struct list_head) * num_possible_cpus();
|
||||
|
||||
/* Notice returns -EPERM on if map size is larger than memlock limit */
|
||||
err = bpf_map_charge_init(&m->map.memory, cost);
|
||||
if (err)
|
||||
goto free_m;
|
||||
|
||||
err = -ENOMEM;
|
||||
|
||||
m->flush_list = alloc_percpu(struct list_head);
|
||||
if (!m->flush_list)
|
||||
goto free_charge;
|
||||
if (!m->flush_list) {
|
||||
bpf_map_charge_finish(&m->map.memory);
|
||||
bpf_map_area_free(m);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
|
||||
|
||||
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
|
||||
sizeof(struct xdp_sock *),
|
||||
m->map.numa_node);
|
||||
if (!m->xsk_map)
|
||||
goto free_percpu;
|
||||
return &m->map;
|
||||
|
||||
free_percpu:
|
||||
free_percpu(m->flush_list);
|
||||
free_charge:
|
||||
bpf_map_charge_finish(&m->map.memory);
|
||||
free_m:
|
||||
kfree(m);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void xsk_map_free(struct bpf_map *map)
|
||||
@ -139,8 +124,7 @@ static void xsk_map_free(struct bpf_map *map)
|
||||
bpf_clear_redirect_map(map);
|
||||
synchronize_net();
|
||||
free_percpu(m->flush_list);
|
||||
bpf_map_area_free(m->xsk_map);
|
||||
kfree(m);
|
||||
bpf_map_area_free(m);
|
||||
}
|
||||
|
||||
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
@ -160,45 +144,20 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct xdp_sock *xs;
|
||||
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
xs = READ_ONCE(m->xsk_map[key]);
|
||||
return xs;
|
||||
}
|
||||
|
||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||
int err;
|
||||
|
||||
err = xsk_rcv(xs, xdp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!xs->flush_node.prev)
|
||||
list_add(&xs->flush_node, flush_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __xsk_map_flush(struct bpf_map *map)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||
struct xdp_sock *xs, *tmp;
|
||||
|
||||
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
|
||||
xsk_flush(xs);
|
||||
__list_del_clearprev(&xs->flush_node);
|
||||
}
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
|
||||
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
|
||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
|
||||
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
|
||||
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
|
||||
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
|
||||
*insn++ = BPF_MOV64_IMM(ret, 0);
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
@ -312,6 +271,7 @@ const struct bpf_map_ops xsk_map_ops = {
|
||||
.map_free = xsk_map_free,
|
||||
.map_get_next_key = xsk_map_get_next_key,
|
||||
.map_lookup_elem = xsk_map_lookup_elem,
|
||||
.map_gen_lookup = xsk_map_gen_lookup,
|
||||
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
|
||||
.map_update_elem = xsk_map_update_elem,
|
||||
.map_delete_elem = xsk_map_delete_elem,
|
||||
|
@ -138,24 +138,19 @@ static const struct bpf_func_proto bpf_override_return_proto = {
|
||||
};
|
||||
#endif
|
||||
|
||||
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
|
||||
BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
|
||||
const void __user *, unsafe_ptr)
|
||||
{
|
||||
int ret;
|
||||
int ret = probe_user_read(dst, unsafe_ptr, size);
|
||||
|
||||
ret = security_locked_down(LOCKDOWN_BPF_READ);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = probe_kernel_read(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
out:
|
||||
memset(dst, 0, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_proto = {
|
||||
.func = bpf_probe_read,
|
||||
static const struct bpf_func_proto bpf_probe_read_user_proto = {
|
||||
.func = bpf_probe_read_user,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
@ -163,7 +158,128 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
|
||||
BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
|
||||
const void __user *, unsafe_ptr)
|
||||
{
|
||||
int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
memset(dst, 0, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
|
||||
.func = bpf_probe_read_user_str,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static __always_inline int
|
||||
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
|
||||
const bool compat)
|
||||
{
|
||||
int ret = security_locked_down(LOCKDOWN_BPF_READ);
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
goto out;
|
||||
ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
|
||||
probe_kernel_read_strict(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
out:
|
||||
memset(dst, 0, size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
|
||||
const void *, unsafe_ptr)
|
||||
{
|
||||
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
|
||||
.func = bpf_probe_read_kernel,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
|
||||
const void *, unsafe_ptr)
|
||||
{
|
||||
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_compat_proto = {
|
||||
.func = bpf_probe_read_compat,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static __always_inline int
|
||||
bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
|
||||
const bool compat)
|
||||
{
|
||||
int ret = security_locked_down(LOCKDOWN_BPF_READ);
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
goto out;
|
||||
/*
|
||||
* The strncpy_from_unsafe_*() call will likely not fill the entire
|
||||
* buffer, but that's okay in this circumstance as we're probing
|
||||
* arbitrary memory anyway similar to bpf_probe_read_*() and might
|
||||
* as well probe the stack. Thus, memory is explicitly cleared
|
||||
* only in error case, so that improper users ignoring return
|
||||
* code altogether don't copy garbage; otherwise length of string
|
||||
* is returned that can be used for bpf_perf_event_output() et al.
|
||||
*/
|
||||
ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
|
||||
strncpy_from_unsafe_strict(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
out:
|
||||
memset(dst, 0, size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
|
||||
const void *, unsafe_ptr)
|
||||
{
|
||||
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
|
||||
.func = bpf_probe_read_kernel_str,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
|
||||
const void *, unsafe_ptr)
|
||||
{
|
||||
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
|
||||
.func = bpf_probe_read_compat_str,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
|
||||
u32, size)
|
||||
{
|
||||
/*
|
||||
@ -186,10 +302,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
|
||||
return -EPERM;
|
||||
if (unlikely(!nmi_uaccess_okay()))
|
||||
return -EPERM;
|
||||
if (!access_ok(unsafe_ptr, size))
|
||||
return -EPERM;
|
||||
|
||||
return probe_kernel_write(unsafe_ptr, src, size);
|
||||
return probe_user_write(unsafe_ptr, src, size);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_write_user_proto = {
|
||||
@ -585,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
|
||||
const void *, unsafe_ptr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = security_locked_down(LOCKDOWN_BPF_READ);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* The strncpy_from_unsafe() call will likely not fill the entire
|
||||
* buffer, but that's okay in this circumstance as we're probing
|
||||
* arbitrary memory anyway similar to bpf_probe_read() and might
|
||||
* as well probe the stack. Thus, memory is explicitly cleared
|
||||
* only in error case, so that improper users ignoring return
|
||||
* code altogether don't copy garbage; otherwise length of string
|
||||
* is returned that can be used for bpf_perf_event_output() et al.
|
||||
*/
|
||||
ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
out:
|
||||
memset(dst, 0, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_read_str_proto = {
|
||||
.func = bpf_probe_read_str,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
struct send_signal_irq_work {
|
||||
struct irq_work irq_work;
|
||||
struct task_struct *task;
|
||||
@ -699,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_map_pop_elem_proto;
|
||||
case BPF_FUNC_map_peek_elem:
|
||||
return &bpf_map_peek_elem_proto;
|
||||
case BPF_FUNC_probe_read:
|
||||
return &bpf_probe_read_proto;
|
||||
case BPF_FUNC_ktime_get_ns:
|
||||
return &bpf_ktime_get_ns_proto;
|
||||
case BPF_FUNC_tail_call:
|
||||
@ -727,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_current_task_under_cgroup_proto;
|
||||
case BPF_FUNC_get_prandom_u32:
|
||||
return &bpf_get_prandom_u32_proto;
|
||||
case BPF_FUNC_probe_read_user:
|
||||
return &bpf_probe_read_user_proto;
|
||||
case BPF_FUNC_probe_read_kernel:
|
||||
return &bpf_probe_read_kernel_proto;
|
||||
case BPF_FUNC_probe_read:
|
||||
return &bpf_probe_read_compat_proto;
|
||||
case BPF_FUNC_probe_read_user_str:
|
||||
return &bpf_probe_read_user_str_proto;
|
||||
case BPF_FUNC_probe_read_kernel_str:
|
||||
return &bpf_probe_read_kernel_str_proto;
|
||||
case BPF_FUNC_probe_read_str:
|
||||
return &bpf_probe_read_str_proto;
|
||||
return &bpf_probe_read_compat_str_proto;
|
||||
#ifdef CONFIG_CGROUPS
|
||||
case BPF_FUNC_get_current_cgroup_id:
|
||||
return &bpf_get_current_cgroup_id_proto;
|
||||
@ -1055,10 +1142,6 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_perf_event_output:
|
||||
return &bpf_perf_event_output_proto_raw_tp;
|
||||
#ifdef CONFIG_NET
|
||||
case BPF_FUNC_skb_output:
|
||||
return &bpf_skb_output_proto;
|
||||
#endif
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_raw_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
@ -1068,20 +1151,44 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
#ifdef CONFIG_NET
|
||||
case BPF_FUNC_skb_output:
|
||||
return &bpf_skb_output_proto;
|
||||
#endif
|
||||
default:
|
||||
return raw_tp_prog_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
|
||||
static bool raw_tp_prog_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
/* largest tracepoint in the kernel has 12 args */
|
||||
if (off < 0 || off >= sizeof(__u64) * 12)
|
||||
if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
|
||||
return false;
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tracing_prog_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
|
||||
return false;
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
if (!prog->aux->attach_btf_id)
|
||||
return true;
|
||||
return btf_ctx_access(off, size, type, prog, info);
|
||||
}
|
||||
|
||||
@ -1093,6 +1200,14 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops tracing_verifier_ops = {
|
||||
.get_func_proto = tracing_prog_func_proto,
|
||||
.is_valid_access = tracing_prog_is_valid_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops tracing_prog_ops = {
|
||||
};
|
||||
|
||||
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
|
112
lib/test_bpf.c
112
lib/test_bpf.c
@ -6859,34 +6859,128 @@ err_page0:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __init int test_skb_segment(void)
|
||||
static __init struct sk_buff *build_test_skb_linear_no_head_frag(void)
|
||||
{
|
||||
unsigned int alloc_size = 2000;
|
||||
unsigned int headroom = 102, doffset = 72, data_size = 1308;
|
||||
struct sk_buff *skb[2];
|
||||
int i;
|
||||
|
||||
/* skbs linked in a frag_list, both with linear data, with head_frag=0
|
||||
* (data allocated by kmalloc), both have tcp data of 1308 bytes
|
||||
* (total payload is 2616 bytes).
|
||||
* Data offset is 72 bytes (40 ipv6 hdr, 32 tcp hdr). Some headroom.
|
||||
*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
skb[i] = alloc_skb(alloc_size, GFP_KERNEL);
|
||||
if (!skb[i]) {
|
||||
if (i == 0)
|
||||
goto err_skb0;
|
||||
else
|
||||
goto err_skb1;
|
||||
}
|
||||
|
||||
skb[i]->protocol = htons(ETH_P_IPV6);
|
||||
skb_reserve(skb[i], headroom);
|
||||
skb_put(skb[i], doffset + data_size);
|
||||
skb_reset_network_header(skb[i]);
|
||||
if (i == 0)
|
||||
skb_reset_mac_header(skb[i]);
|
||||
else
|
||||
skb_set_mac_header(skb[i], -ETH_HLEN);
|
||||
__skb_pull(skb[i], doffset);
|
||||
}
|
||||
|
||||
/* setup shinfo.
|
||||
* mimic bpf_skb_proto_4_to_6, which resets gso_segs and assigns a
|
||||
* reduced gso_size.
|
||||
*/
|
||||
skb_shinfo(skb[0])->gso_size = 1288;
|
||||
skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV6 | SKB_GSO_DODGY;
|
||||
skb_shinfo(skb[0])->gso_segs = 0;
|
||||
skb_shinfo(skb[0])->frag_list = skb[1];
|
||||
|
||||
/* adjust skb[0]'s len */
|
||||
skb[0]->len += skb[1]->len;
|
||||
skb[0]->data_len += skb[1]->len;
|
||||
skb[0]->truesize += skb[1]->truesize;
|
||||
|
||||
return skb[0];
|
||||
|
||||
err_skb1:
|
||||
kfree_skb(skb[0]);
|
||||
err_skb0:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct skb_segment_test {
|
||||
const char *descr;
|
||||
struct sk_buff *(*build_skb)(void);
|
||||
netdev_features_t features;
|
||||
};
|
||||
|
||||
static struct skb_segment_test skb_segment_tests[] __initconst = {
|
||||
{
|
||||
.descr = "gso_with_rx_frags",
|
||||
.build_skb = build_test_skb,
|
||||
.features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
|
||||
NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM
|
||||
},
|
||||
{
|
||||
.descr = "gso_linear_no_head_frag",
|
||||
.build_skb = build_test_skb_linear_no_head_frag,
|
||||
.features = NETIF_F_SG | NETIF_F_FRAGLIST |
|
||||
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
|
||||
NETIF_F_LLTX_BIT | NETIF_F_GRO |
|
||||
NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
|
||||
NETIF_F_HW_VLAN_STAG_TX_BIT
|
||||
}
|
||||
};
|
||||
|
||||
static __init int test_skb_segment_single(const struct skb_segment_test *test)
|
||||
{
|
||||
struct sk_buff *skb, *segs;
|
||||
int ret = -1;
|
||||
|
||||
features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
|
||||
NETIF_F_IPV6_CSUM;
|
||||
features |= NETIF_F_RXCSUM;
|
||||
skb = build_test_skb();
|
||||
skb = test->build_skb();
|
||||
if (!skb) {
|
||||
pr_info("%s: failed to build_test_skb", __func__);
|
||||
goto done;
|
||||
}
|
||||
|
||||
segs = skb_segment(skb, features);
|
||||
segs = skb_segment(skb, test->features);
|
||||
if (!IS_ERR(segs)) {
|
||||
kfree_skb_list(segs);
|
||||
ret = 0;
|
||||
pr_info("%s: success in skb_segment!", __func__);
|
||||
} else {
|
||||
pr_info("%s: failed in skb_segment!", __func__);
|
||||
}
|
||||
kfree_skb(skb);
|
||||
done:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init int test_skb_segment(void)
|
||||
{
|
||||
int i, err_cnt = 0, pass_cnt = 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) {
|
||||
const struct skb_segment_test *test = &skb_segment_tests[i];
|
||||
|
||||
pr_info("#%d %s ", i, test->descr);
|
||||
|
||||
if (test_skb_segment_single(test)) {
|
||||
pr_cont("FAIL\n");
|
||||
err_cnt++;
|
||||
} else {
|
||||
pr_cont("PASS\n");
|
||||
pass_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("%s: Summary: %d PASSED, %d FAILED\n", __func__,
|
||||
pass_cnt, err_cnt);
|
||||
return err_cnt ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static __init int test_bpf(void)
|
||||
{
|
||||
int i, err_cnt = 0, pass_cnt = 0;
|
||||
|
70
mm/maccess.c
70
mm/maccess.c
@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size)
|
||||
return ret ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static __always_inline long
|
||||
probe_write_common(void __user *dst, const void *src, size_t size)
|
||||
{
|
||||
long ret;
|
||||
|
||||
pagefault_disable();
|
||||
ret = __copy_to_user_inatomic(dst, src, size);
|
||||
pagefault_enable();
|
||||
|
||||
return ret ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* probe_kernel_read(): safely attempt to read from a kernel-space location
|
||||
* @dst: pointer to the buffer that shall take the data
|
||||
@ -31,11 +43,20 @@ probe_read_common(void *dst, const void __user *src, size_t size)
|
||||
* do_page_fault() doesn't attempt to take mmap_sem. This makes
|
||||
* probe_kernel_read() suitable for use within regions where the caller
|
||||
* already holds mmap_sem, or other locks which nest inside mmap_sem.
|
||||
*
|
||||
* probe_kernel_read_strict() is the same as probe_kernel_read() except for
|
||||
* the case where architectures have non-overlapping user and kernel address
|
||||
* ranges: probe_kernel_read_strict() will additionally return -EFAULT for
|
||||
* probing memory on a user address range where probe_user_read() is supposed
|
||||
* to be used instead.
|
||||
*/
|
||||
|
||||
long __weak probe_kernel_read(void *dst, const void *src, size_t size)
|
||||
__attribute__((alias("__probe_kernel_read")));
|
||||
|
||||
long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
|
||||
__attribute__((alias("__probe_kernel_read")));
|
||||
|
||||
long __probe_kernel_read(void *dst, const void *src, size_t size)
|
||||
{
|
||||
long ret;
|
||||
@ -85,6 +106,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
|
||||
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
||||
* happens, handle that and return -EFAULT.
|
||||
*/
|
||||
|
||||
long __weak probe_kernel_write(void *dst, const void *src, size_t size)
|
||||
__attribute__((alias("__probe_kernel_write")));
|
||||
|
||||
@ -94,15 +116,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
pagefault_disable();
|
||||
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
|
||||
pagefault_enable();
|
||||
ret = probe_write_common((__force void __user *)dst, src, size);
|
||||
set_fs(old_fs);
|
||||
|
||||
return ret ? -EFAULT : 0;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(probe_kernel_write);
|
||||
|
||||
/**
|
||||
* probe_user_write(): safely attempt to write to a user-space location
|
||||
* @dst: address to write to
|
||||
* @src: pointer to the data that shall be written
|
||||
* @size: size of the data chunk
|
||||
*
|
||||
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
||||
* happens, handle that and return -EFAULT.
|
||||
*/
|
||||
|
||||
long __weak probe_user_write(void __user *dst, const void *src, size_t size)
|
||||
__attribute__((alias("__probe_user_write")));
|
||||
|
||||
long __probe_user_write(void __user *dst, const void *src, size_t size)
|
||||
{
|
||||
long ret = -EFAULT;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs(USER_DS);
|
||||
if (access_ok(dst, size))
|
||||
ret = probe_write_common(dst, src, size);
|
||||
set_fs(old_fs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(probe_user_write);
|
||||
|
||||
/**
|
||||
* strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
|
||||
@ -120,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_kernel_write);
|
||||
*
|
||||
* If @count is smaller than the length of the string, copies @count-1 bytes,
|
||||
* sets the last byte of @dst buffer to NUL and returns @count.
|
||||
*
|
||||
* strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
|
||||
* for the case where architectures have non-overlapping user and kernel address
|
||||
* ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
|
||||
* probing memory on a user address range where strncpy_from_unsafe_user() is
|
||||
* supposed to be used instead.
|
||||
*/
|
||||
long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
|
||||
|
||||
long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
|
||||
__attribute__((alias("__strncpy_from_unsafe")));
|
||||
|
||||
long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
|
||||
long count)
|
||||
__attribute__((alias("__strncpy_from_unsafe")));
|
||||
|
||||
long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
|
||||
{
|
||||
mm_segment_t old_fs = get_fs();
|
||||
const void *src = unsafe_addr;
|
||||
|
@ -196,7 +196,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
|
||||
return false;
|
||||
}
|
||||
|
||||
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
{
|
||||
u32 len;
|
||||
|
||||
@ -212,7 +212,7 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
|
||||
}
|
||||
|
||||
void xsk_flush(struct xdp_sock *xs)
|
||||
static void xsk_flush(struct xdp_sock *xs)
|
||||
{
|
||||
xskq_produce_flush_desc(xs->rx);
|
||||
xs->sk.sk_data_ready(&xs->sk);
|
||||
@ -264,6 +264,35 @@ out_unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||
int err;
|
||||
|
||||
err = xsk_rcv(xs, xdp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!xs->flush_node.prev)
|
||||
list_add(&xs->flush_node, flush_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __xsk_map_flush(struct bpf_map *map)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||
struct xdp_sock *xs, *tmp;
|
||||
|
||||
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
|
||||
xsk_flush(xs);
|
||||
__list_del_clearprev(&xs->flush_node);
|
||||
}
|
||||
}
|
||||
|
||||
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
|
||||
{
|
||||
xskq_produce_flush_addr_n(umem->cq, nb_entries);
|
||||
|
@ -181,8 +181,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
|
||||
if (addrlen != sizeof(*in6))
|
||||
return 0;
|
||||
|
||||
ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
|
||||
&in6->sin6_addr);
|
||||
ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
|
||||
&in6->sin6_addr);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
|
@ -118,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx)
|
||||
if (addrlen != sizeof(*in6))
|
||||
return 0;
|
||||
|
||||
ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
|
||||
ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
|
||||
if (ret) {
|
||||
inline_ret = ret;
|
||||
goto done;
|
||||
@ -129,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx)
|
||||
|
||||
test_case = dst6[7];
|
||||
|
||||
ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
|
||||
ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
|
||||
if (ret) {
|
||||
inline_ret = ret;
|
||||
goto done;
|
||||
|
@ -37,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx)
|
||||
if (sockaddr_len > sizeof(orig_addr))
|
||||
return 0;
|
||||
|
||||
if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
|
||||
if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
|
||||
return 0;
|
||||
|
||||
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
|
||||
|
@ -173,6 +173,7 @@ enum bpf_prog_type {
|
||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||
BPF_PROG_TYPE_CGROUP_SOCKOPT,
|
||||
BPF_PROG_TYPE_TRACING,
|
||||
};
|
||||
|
||||
enum bpf_attach_type {
|
||||
@ -199,6 +200,7 @@ enum bpf_attach_type {
|
||||
BPF_CGROUP_UDP6_RECVMSG,
|
||||
BPF_CGROUP_GETSOCKOPT,
|
||||
BPF_CGROUP_SETSOCKOPT,
|
||||
BPF_TRACE_RAW_TP,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
@ -561,10 +563,13 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read(void *dst, u32 size, const void *src)
|
||||
* int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* For tracing programs, safely attempt to read *size* bytes from
|
||||
* address *src* and store the data in *dst*.
|
||||
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||
* instead.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@ -1426,45 +1431,14 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
|
||||
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe address
|
||||
* *unsafe_ptr* to *dst*. The *size* should include the
|
||||
* terminating NUL byte. In case the string length is smaller than
|
||||
* *size*, the target is not padded with further NUL bytes. If the
|
||||
* string length is larger than *size*, just *size*-1 bytes are
|
||||
* copied and the last byte is set to NUL.
|
||||
* Copy a NUL terminated string from an unsafe kernel address
|
||||
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||
* more details.
|
||||
*
|
||||
* On success, the length of the copied string is returned. This
|
||||
* makes this helper useful in tracing programs for reading
|
||||
* strings, and more importantly to get its length at runtime. See
|
||||
* the following snippet:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* SEC("kprobe/sys_open")
|
||||
* void bpf_sys_open(struct pt_regs *ctx)
|
||||
* {
|
||||
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
||||
* int res = bpf_probe_read_str(buf, sizeof(buf),
|
||||
* ctx->di);
|
||||
*
|
||||
* // Consume buf, for example push it to
|
||||
* // userspace via bpf_perf_event_output(); we
|
||||
* // can use res (the string length) as event
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read()** helper here instead
|
||||
* to read the string would require to estimate the length at
|
||||
* compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
*
|
||||
* Another useful use case is when parsing individual process
|
||||
* arguments or individual environment variables navigating
|
||||
* *current*\ **->mm->arg_start** and *current*\
|
||||
* **->mm->env_start**: using this helper and the return value,
|
||||
* one can quickly iterate at the right offset of the memory area.
|
||||
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||
* instead.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
@ -2775,6 +2749,72 @@ union bpf_attr {
|
||||
* restricted to raw_tracepoint bpf programs.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Safely attempt to read *size* bytes from user space address
|
||||
* *unsafe_ptr* and store the data in *dst*.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Safely attempt to read *size* bytes from kernel space address
|
||||
* *unsafe_ptr* and store the data in *dst*.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe user address
|
||||
* *unsafe_ptr* to *dst*. The *size* should include the
|
||||
* terminating NUL byte. In case the string length is smaller than
|
||||
* *size*, the target is not padded with further NUL bytes. If the
|
||||
* string length is larger than *size*, just *size*-1 bytes are
|
||||
* copied and the last byte is set to NUL.
|
||||
*
|
||||
* On success, the length of the copied string is returned. This
|
||||
* makes this helper useful in tracing programs for reading
|
||||
* strings, and more importantly to get its length at runtime. See
|
||||
* the following snippet:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* SEC("kprobe/sys_open")
|
||||
* void bpf_sys_open(struct pt_regs *ctx)
|
||||
* {
|
||||
* char buf[PATHLEN]; // PATHLEN is defined to 256
|
||||
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
|
||||
* ctx->di);
|
||||
*
|
||||
* // Consume buf, for example push it to
|
||||
* // userspace via bpf_perf_event_output(); we
|
||||
* // can use res (the string length) as event
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read_user()** helper here
|
||||
* instead to read the string would require to estimate the length
|
||||
* at compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
*
|
||||
* Another useful use case is when parsing individual process
|
||||
* arguments or individual environment variables navigating
|
||||
* *current*\ **->mm->arg_start** and *current*\
|
||||
* **->mm->env_start**: using this helper and the return value,
|
||||
* one can quickly iterate at the right offset of the memory area.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
* value.
|
||||
*
|
||||
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string, including
|
||||
* the trailing NUL character. On error, a negative value.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -2888,7 +2928,11 @@ union bpf_attr {
|
||||
FN(sk_storage_delete), \
|
||||
FN(send_signal), \
|
||||
FN(tcp_gen_syncookie), \
|
||||
FN(skb_output),
|
||||
FN(skb_output), \
|
||||
FN(probe_read_user), \
|
||||
FN(probe_read_kernel), \
|
||||
FN(probe_read_user_str), \
|
||||
FN(probe_read_kernel_str),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -228,9 +228,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.prog_type = load_attr->prog_type;
|
||||
attr.expected_attach_type = load_attr->expected_attach_type;
|
||||
if (attr.prog_type == BPF_PROG_TYPE_RAW_TRACEPOINT)
|
||||
/* expected_attach_type is ignored for tracing progs */
|
||||
attr.attach_btf_id = attr.expected_attach_type;
|
||||
if (attr.prog_type == BPF_PROG_TYPE_TRACING)
|
||||
attr.attach_btf_id = load_attr->attach_btf_id;
|
||||
else
|
||||
attr.prog_ifindex = load_attr->prog_ifindex;
|
||||
attr.insn_cnt = (__u32)load_attr->insns_cnt;
|
||||
attr.insns = ptr_to_u64(load_attr->insns);
|
||||
attr.license = ptr_to_u64(load_attr->license);
|
||||
@ -245,7 +246,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
|
||||
}
|
||||
|
||||
attr.kern_version = load_attr->kern_version;
|
||||
attr.prog_ifindex = load_attr->prog_ifindex;
|
||||
attr.prog_btf_fd = load_attr->prog_btf_fd;
|
||||
attr.func_info_rec_size = load_attr->func_info_rec_size;
|
||||
attr.func_info_cnt = load_attr->func_info_cnt;
|
||||
|
@ -78,7 +78,10 @@ struct bpf_load_program_attr {
|
||||
size_t insns_cnt;
|
||||
const char *license;
|
||||
__u32 kern_version;
|
||||
__u32 prog_ifindex;
|
||||
union {
|
||||
__u32 prog_ifindex;
|
||||
__u32 attach_btf_id;
|
||||
};
|
||||
__u32 prog_btf_fd;
|
||||
__u32 func_info_rec_size;
|
||||
const void *func_info;
|
||||
|
@ -38,4 +38,10 @@ struct bpf_map_def {
|
||||
unsigned int map_flags;
|
||||
};
|
||||
|
||||
enum libbpf_pin_type {
|
||||
LIBBPF_PIN_NONE,
|
||||
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
||||
LIBBPF_PIN_BY_NAME,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -188,6 +188,7 @@ struct bpf_program {
|
||||
bpf_program_clear_priv_t clear_priv;
|
||||
|
||||
enum bpf_attach_type expected_attach_type;
|
||||
__u32 attach_btf_id;
|
||||
void *func_info;
|
||||
__u32 func_info_rec_size;
|
||||
__u32 func_info_cnt;
|
||||
@ -226,6 +227,8 @@ struct bpf_map {
|
||||
void *priv;
|
||||
bpf_map_clear_priv_t clear_priv;
|
||||
enum libbpf_map_type libbpf_type;
|
||||
char *pin_path;
|
||||
bool pinned;
|
||||
};
|
||||
|
||||
struct bpf_secdata {
|
||||
@ -1090,10 +1093,32 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
|
||||
return true;
|
||||
}
|
||||
|
||||
static int build_map_pin_path(struct bpf_map *map, const char *path)
|
||||
{
|
||||
char buf[PATH_MAX];
|
||||
int err, len;
|
||||
|
||||
if (!path)
|
||||
path = "/sys/fs/bpf";
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
|
||||
if (len < 0)
|
||||
return -EINVAL;
|
||||
else if (len >= PATH_MAX)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
err = bpf_map__set_pin_path(map, buf);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||
const struct btf_type *sec,
|
||||
int var_idx, int sec_idx,
|
||||
const Elf_Data *data, bool strict)
|
||||
const Elf_Data *data, bool strict,
|
||||
const char *pin_root_path)
|
||||
{
|
||||
const struct btf_type *var, *def, *t;
|
||||
const struct btf_var_secinfo *vi;
|
||||
@ -1268,6 +1293,30 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||
}
|
||||
map->def.value_size = sz;
|
||||
map->btf_value_type_id = t->type;
|
||||
} else if (strcmp(name, "pinning") == 0) {
|
||||
__u32 val;
|
||||
int err;
|
||||
|
||||
if (!get_map_field_int(map_name, obj->btf, def, m,
|
||||
&val))
|
||||
return -EINVAL;
|
||||
pr_debug("map '%s': found pinning = %u.\n",
|
||||
map_name, val);
|
||||
|
||||
if (val != LIBBPF_PIN_NONE &&
|
||||
val != LIBBPF_PIN_BY_NAME) {
|
||||
pr_warn("map '%s': invalid pinning value %u.\n",
|
||||
map_name, val);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (val == LIBBPF_PIN_BY_NAME) {
|
||||
err = build_map_pin_path(map, pin_root_path);
|
||||
if (err) {
|
||||
pr_warn("map '%s': couldn't build pin path.\n",
|
||||
map_name);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (strict) {
|
||||
pr_warn("map '%s': unknown field '%s'.\n",
|
||||
@ -1287,7 +1336,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
|
||||
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
|
||||
const char *pin_root_path)
|
||||
{
|
||||
const struct btf_type *sec = NULL;
|
||||
int nr_types, i, vlen, err;
|
||||
@ -1329,7 +1379,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
|
||||
for (i = 0; i < vlen; i++) {
|
||||
err = bpf_object__init_user_btf_map(obj, sec, i,
|
||||
obj->efile.btf_maps_shndx,
|
||||
data, strict);
|
||||
data, strict, pin_root_path);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -1337,7 +1387,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps)
|
||||
static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
|
||||
const char *pin_root_path)
|
||||
{
|
||||
bool strict = !relaxed_maps;
|
||||
int err;
|
||||
@ -1346,7 +1397,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = bpf_object__init_user_btf_maps(obj, strict);
|
||||
err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -1535,7 +1586,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps)
|
||||
static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
|
||||
const char *pin_root_path)
|
||||
{
|
||||
Elf *elf = obj->efile.elf;
|
||||
GElf_Ehdr *ep = &obj->efile.ehdr;
|
||||
@ -1664,13 +1716,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps)
|
||||
}
|
||||
}
|
||||
|
||||
if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
|
||||
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
|
||||
pr_warn("Corrupted ELF file: index of strtab invalid\n");
|
||||
return -LIBBPF_ERRNO__FORMAT;
|
||||
}
|
||||
err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
|
||||
if (!err)
|
||||
err = bpf_object__init_maps(obj, relaxed_maps);
|
||||
err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
|
||||
if (!err)
|
||||
err = bpf_object__sanitize_and_load_btf(obj);
|
||||
if (!err)
|
||||
@ -1916,16 +1968,22 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
|
||||
return -errno;
|
||||
|
||||
new_fd = open("/", O_RDONLY | O_CLOEXEC);
|
||||
if (new_fd < 0)
|
||||
if (new_fd < 0) {
|
||||
err = -errno;
|
||||
goto err_free_new_name;
|
||||
}
|
||||
|
||||
new_fd = dup3(fd, new_fd, O_CLOEXEC);
|
||||
if (new_fd < 0)
|
||||
if (new_fd < 0) {
|
||||
err = -errno;
|
||||
goto err_close_new_fd;
|
||||
}
|
||||
|
||||
err = zclose(map->fd);
|
||||
if (err)
|
||||
if (err) {
|
||||
err = -errno;
|
||||
goto err_close_new_fd;
|
||||
}
|
||||
free(map->name);
|
||||
|
||||
map->fd = new_fd;
|
||||
@ -1944,7 +2002,7 @@ err_close_new_fd:
|
||||
close(new_fd);
|
||||
err_free_new_name:
|
||||
free(new_name);
|
||||
return -errno;
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
|
||||
@ -2120,6 +2178,66 @@ bpf_object__probe_caps(struct bpf_object *obj)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
|
||||
{
|
||||
struct bpf_map_info map_info = {};
|
||||
char msg[STRERR_BUFSIZE];
|
||||
__u32 map_info_len;
|
||||
|
||||
map_info_len = sizeof(map_info);
|
||||
|
||||
if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
|
||||
pr_warn("failed to get map info for map FD %d: %s\n",
|
||||
map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
|
||||
return false;
|
||||
}
|
||||
|
||||
return (map_info.type == map->def.type &&
|
||||
map_info.key_size == map->def.key_size &&
|
||||
map_info.value_size == map->def.value_size &&
|
||||
map_info.max_entries == map->def.max_entries &&
|
||||
map_info.map_flags == map->def.map_flags);
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__reuse_map(struct bpf_map *map)
|
||||
{
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
int err, pin_fd;
|
||||
|
||||
pin_fd = bpf_obj_get(map->pin_path);
|
||||
if (pin_fd < 0) {
|
||||
err = -errno;
|
||||
if (err == -ENOENT) {
|
||||
pr_debug("found no pinned map to reuse at '%s'\n",
|
||||
map->pin_path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||
pr_warn("couldn't retrieve pinned map '%s': %s\n",
|
||||
map->pin_path, cp);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!map_is_reuse_compat(map, pin_fd)) {
|
||||
pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
|
||||
map->pin_path);
|
||||
close(pin_fd);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = bpf_map__reuse_fd(map, pin_fd);
|
||||
if (err) {
|
||||
close(pin_fd);
|
||||
return err;
|
||||
}
|
||||
map->pinned = true;
|
||||
pr_debug("reused pinned map at '%s'\n", map->pin_path);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
|
||||
{
|
||||
@ -2162,6 +2280,15 @@ bpf_object__create_maps(struct bpf_object *obj)
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
int *pfd = &map->fd;
|
||||
|
||||
if (map->pin_path) {
|
||||
err = bpf_object__reuse_map(map);
|
||||
if (err) {
|
||||
pr_warn("error reusing pinned map %s\n",
|
||||
map->name);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (map->fd >= 0) {
|
||||
pr_debug("skip map create (preset) %s: fd=%d\n",
|
||||
map->name, map->fd);
|
||||
@ -2240,6 +2367,15 @@ err_out:
|
||||
}
|
||||
}
|
||||
|
||||
if (map->pin_path && !map->pinned) {
|
||||
err = bpf_map__pin(map, NULL);
|
||||
if (err) {
|
||||
pr_warn("failed to auto-pin map name '%s' at '%s'\n",
|
||||
map->name, map->pin_path);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("created map %s: fd=%d\n", map->name, *pfd);
|
||||
}
|
||||
|
||||
@ -3446,6 +3582,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
|
||||
load_attr.line_info_cnt = prog->line_info_cnt;
|
||||
load_attr.log_level = prog->log_level;
|
||||
load_attr.prog_flags = prog->prog_flags;
|
||||
load_attr.attach_btf_id = prog->attach_btf_id;
|
||||
|
||||
retry_load:
|
||||
log_buf = malloc(log_buf_size);
|
||||
@ -3607,10 +3744,13 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id);
|
||||
|
||||
static struct bpf_object *
|
||||
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||
struct bpf_object_open_opts *opts)
|
||||
{
|
||||
const char *pin_root_path;
|
||||
struct bpf_program *prog;
|
||||
struct bpf_object *obj;
|
||||
const char *obj_name;
|
||||
@ -3645,17 +3785,20 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||
|
||||
obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
|
||||
relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
|
||||
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
|
||||
|
||||
CHECK_ERR(bpf_object__elf_init(obj), err, out);
|
||||
CHECK_ERR(bpf_object__check_endianness(obj), err, out);
|
||||
CHECK_ERR(bpf_object__probe_caps(obj), err, out);
|
||||
CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps), err, out);
|
||||
CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
|
||||
err, out);
|
||||
CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
|
||||
bpf_object__elf_finish(obj);
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
enum bpf_prog_type prog_type;
|
||||
enum bpf_attach_type attach_type;
|
||||
__u32 btf_id;
|
||||
|
||||
err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
|
||||
&attach_type);
|
||||
@ -3667,6 +3810,12 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||
|
||||
bpf_program__set_type(prog, prog_type);
|
||||
bpf_program__set_expected_attach_type(prog, attach_type);
|
||||
if (prog_type == BPF_PROG_TYPE_TRACING) {
|
||||
err = libbpf_attach_btf_id_by_name(prog->section_name, &btf_id);
|
||||
if (err)
|
||||
goto out;
|
||||
prog->attach_btf_id = btf_id;
|
||||
}
|
||||
}
|
||||
|
||||
return obj;
|
||||
@ -3797,6 +3946,28 @@ int bpf_object__load(struct bpf_object *obj)
|
||||
return bpf_object__load_xattr(&attr);
|
||||
}
|
||||
|
||||
static int make_parent_dir(const char *path)
|
||||
{
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
char *dname, *dir;
|
||||
int err = 0;
|
||||
|
||||
dname = strdup(path);
|
||||
if (dname == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
dir = dirname(dname);
|
||||
if (mkdir(dir, 0700) && errno != EEXIST)
|
||||
err = -errno;
|
||||
|
||||
free(dname);
|
||||
if (err) {
|
||||
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||
pr_warn("failed to mkdir %s: %s\n", path, cp);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int check_path(const char *path)
|
||||
{
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
@ -3833,6 +4004,10 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
int err;
|
||||
|
||||
err = make_parent_dir(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = check_path(path);
|
||||
if (err)
|
||||
return err;
|
||||
@ -3886,25 +4061,14 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int make_dir(const char *path)
|
||||
{
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
int err = 0;
|
||||
|
||||
if (mkdir(path, 0700) && errno != EEXIST)
|
||||
err = -errno;
|
||||
|
||||
if (err) {
|
||||
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||
pr_warn("failed to mkdir %s: %s\n", path, cp);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_program__pin(struct bpf_program *prog, const char *path)
|
||||
{
|
||||
int i, err;
|
||||
|
||||
err = make_parent_dir(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = check_path(path);
|
||||
if (err)
|
||||
return err;
|
||||
@ -3925,10 +4089,6 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
|
||||
return bpf_program__pin_instance(prog, path, 0);
|
||||
}
|
||||
|
||||
err = make_dir(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < prog->instances.nr; i++) {
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
@ -4019,47 +4179,123 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
int err;
|
||||
|
||||
err = check_path(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (map == NULL) {
|
||||
pr_warn("invalid map pointer\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (bpf_obj_pin(map->fd, path)) {
|
||||
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
|
||||
pr_warn("failed to pin map: %s\n", cp);
|
||||
return -errno;
|
||||
if (map->pin_path) {
|
||||
if (path && strcmp(path, map->pin_path)) {
|
||||
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
|
||||
bpf_map__name(map), map->pin_path, path);
|
||||
return -EINVAL;
|
||||
} else if (map->pinned) {
|
||||
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
|
||||
bpf_map__name(map), map->pin_path);
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (!path) {
|
||||
pr_warn("missing a path to pin map '%s' at\n",
|
||||
bpf_map__name(map));
|
||||
return -EINVAL;
|
||||
} else if (map->pinned) {
|
||||
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
map->pin_path = strdup(path);
|
||||
if (!map->pin_path) {
|
||||
err = -errno;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("pinned map '%s'\n", path);
|
||||
err = make_parent_dir(map->pin_path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = check_path(map->pin_path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (bpf_obj_pin(map->fd, map->pin_path)) {
|
||||
err = -errno;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
map->pinned = true;
|
||||
pr_debug("pinned map '%s'\n", map->pin_path);
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
|
||||
pr_warn("failed to pin map: %s\n", cp);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_map__unpin(struct bpf_map *map, const char *path)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = check_path(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (map == NULL) {
|
||||
pr_warn("invalid map pointer\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (map->pin_path) {
|
||||
if (path && strcmp(path, map->pin_path)) {
|
||||
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
|
||||
bpf_map__name(map), map->pin_path, path);
|
||||
return -EINVAL;
|
||||
}
|
||||
path = map->pin_path;
|
||||
} else if (!path) {
|
||||
pr_warn("no path to unpin map '%s' from\n",
|
||||
bpf_map__name(map));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = check_path(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = unlink(path);
|
||||
if (err != 0)
|
||||
return -errno;
|
||||
pr_debug("unpinned map '%s'\n", path);
|
||||
|
||||
map->pinned = false;
|
||||
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
|
||||
{
|
||||
char *new = NULL;
|
||||
|
||||
if (path) {
|
||||
new = strdup(path);
|
||||
if (!new)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
free(map->pin_path);
|
||||
map->pin_path = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *bpf_map__get_pin_path(const struct bpf_map *map)
|
||||
{
|
||||
return map->pin_path;
|
||||
}
|
||||
|
||||
bool bpf_map__is_pinned(const struct bpf_map *map)
|
||||
{
|
||||
return map->pinned;
|
||||
}
|
||||
|
||||
int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
@ -4073,25 +4309,28 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
err = make_dir(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
bpf_object__for_each_map(map, obj) {
|
||||
char *pin_path = NULL;
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0) {
|
||||
err = -EINVAL;
|
||||
goto err_unpin_maps;
|
||||
} else if (len >= PATH_MAX) {
|
||||
err = -ENAMETOOLONG;
|
||||
goto err_unpin_maps;
|
||||
if (path) {
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0) {
|
||||
err = -EINVAL;
|
||||
goto err_unpin_maps;
|
||||
} else if (len >= PATH_MAX) {
|
||||
err = -ENAMETOOLONG;
|
||||
goto err_unpin_maps;
|
||||
}
|
||||
pin_path = buf;
|
||||
} else if (!map->pin_path) {
|
||||
continue;
|
||||
}
|
||||
|
||||
err = bpf_map__pin(map, buf);
|
||||
err = bpf_map__pin(map, pin_path);
|
||||
if (err)
|
||||
goto err_unpin_maps;
|
||||
}
|
||||
@ -4100,17 +4339,10 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||
|
||||
err_unpin_maps:
|
||||
while ((map = bpf_map__prev(map, obj))) {
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0)
|
||||
continue;
|
||||
else if (len >= PATH_MAX)
|
||||
if (!map->pin_path)
|
||||
continue;
|
||||
|
||||
bpf_map__unpin(map, buf);
|
||||
bpf_map__unpin(map, NULL);
|
||||
}
|
||||
|
||||
return err;
|
||||
@ -4125,17 +4357,24 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
|
||||
return -ENOENT;
|
||||
|
||||
bpf_object__for_each_map(map, obj) {
|
||||
char *pin_path = NULL;
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0)
|
||||
return -EINVAL;
|
||||
else if (len >= PATH_MAX)
|
||||
return -ENAMETOOLONG;
|
||||
if (path) {
|
||||
int len;
|
||||
|
||||
err = bpf_map__unpin(map, buf);
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0)
|
||||
return -EINVAL;
|
||||
else if (len >= PATH_MAX)
|
||||
return -ENAMETOOLONG;
|
||||
pin_path = buf;
|
||||
} else if (!map->pin_path) {
|
||||
continue;
|
||||
}
|
||||
|
||||
err = bpf_map__unpin(map, pin_path);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -4156,10 +4395,6 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
err = make_dir(path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
@ -4260,6 +4495,7 @@ void bpf_object__close(struct bpf_object *obj)
|
||||
|
||||
for (i = 0; i < obj->nr_maps; i++) {
|
||||
zfree(&obj->maps[i].name);
|
||||
zfree(&obj->maps[i].pin_path);
|
||||
if (obj->maps[i].clear_priv)
|
||||
obj->maps[i].clear_priv(&obj->maps[i],
|
||||
obj->maps[i].priv);
|
||||
@ -4518,6 +4754,7 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
|
||||
BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
|
||||
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
|
||||
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
|
||||
BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
|
||||
|
||||
enum bpf_attach_type
|
||||
bpf_program__get_expected_attach_type(struct bpf_program *prog)
|
||||
@ -4546,7 +4783,8 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
|
||||
BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
|
||||
|
||||
/* Programs that use BTF to identify attach point */
|
||||
#define BPF_PROG_BTF(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 1, 0)
|
||||
#define BPF_PROG_BTF(string, ptype, eatype) \
|
||||
BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
|
||||
|
||||
/* Programs that can be attached but attach type can't be identified by section
|
||||
* name. Kept for backward compatibility.
|
||||
@ -4573,7 +4811,8 @@ static const struct {
|
||||
BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
|
||||
BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
||||
BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
||||
BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_RAW_TRACEPOINT),
|
||||
BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
|
||||
BPF_TRACE_RAW_TP),
|
||||
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
|
||||
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
|
||||
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
|
||||
@ -4678,27 +4917,6 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
|
||||
continue;
|
||||
*prog_type = section_names[i].prog_type;
|
||||
*expected_attach_type = section_names[i].expected_attach_type;
|
||||
if (section_names[i].is_attach_btf) {
|
||||
struct btf *btf = bpf_core_find_kernel_btf();
|
||||
char raw_tp_btf_name[128] = "btf_trace_";
|
||||
char *dst = raw_tp_btf_name + sizeof("btf_trace_") - 1;
|
||||
int ret;
|
||||
|
||||
if (IS_ERR(btf)) {
|
||||
pr_warn("vmlinux BTF is not found\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* prepend "btf_trace_" prefix per kernel convention */
|
||||
strncat(dst, name + section_names[i].len,
|
||||
sizeof(raw_tp_btf_name) - sizeof("btf_trace_"));
|
||||
ret = btf__find_by_name(btf, raw_tp_btf_name);
|
||||
btf__free(btf);
|
||||
if (ret <= 0) {
|
||||
pr_warn("%s is not found in vmlinux BTF\n", dst);
|
||||
return -EINVAL;
|
||||
}
|
||||
*expected_attach_type = ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
pr_warn("failed to guess program type based on ELF section name '%s'\n", name);
|
||||
@ -4711,6 +4929,46 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
#define BTF_PREFIX "btf_trace_"
|
||||
static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id)
|
||||
{
|
||||
struct btf *btf = bpf_core_find_kernel_btf();
|
||||
char raw_tp_btf_name[128] = BTF_PREFIX;
|
||||
char *dst = raw_tp_btf_name + sizeof(BTF_PREFIX) - 1;
|
||||
int ret, i, err = -EINVAL;
|
||||
|
||||
if (IS_ERR(btf)) {
|
||||
pr_warn("vmlinux BTF is not found\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!name)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(section_names); i++) {
|
||||
if (!section_names[i].is_attach_btf)
|
||||
continue;
|
||||
if (strncmp(name, section_names[i].sec, section_names[i].len))
|
||||
continue;
|
||||
/* prepend "btf_trace_" prefix per kernel convention */
|
||||
strncat(dst, name + section_names[i].len,
|
||||
sizeof(raw_tp_btf_name) - sizeof(BTF_PREFIX));
|
||||
ret = btf__find_by_name(btf, raw_tp_btf_name);
|
||||
if (ret <= 0) {
|
||||
pr_warn("%s is not found in vmlinux BTF\n", dst);
|
||||
goto out;
|
||||
}
|
||||
*btf_id = ret;
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
|
||||
err = -ESRCH;
|
||||
out:
|
||||
btf__free(btf);
|
||||
return err;
|
||||
}
|
||||
|
||||
int libbpf_attach_type_by_name(const char *name,
|
||||
enum bpf_attach_type *attach_type)
|
||||
{
|
||||
|
@ -103,8 +103,13 @@ struct bpf_object_open_opts {
|
||||
bool relaxed_maps;
|
||||
/* process CO-RE relocations non-strictly, allowing them to fail */
|
||||
bool relaxed_core_relocs;
|
||||
/* maps that set the 'pinning' attribute in their definition will have
|
||||
* their pin_path attribute set to a file in this directory, and be
|
||||
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
|
||||
*/
|
||||
const char *pin_root_path;
|
||||
};
|
||||
#define bpf_object_open_opts__last_field relaxed_core_relocs
|
||||
#define bpf_object_open_opts__last_field pin_root_path
|
||||
|
||||
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
|
||||
LIBBPF_API struct bpf_object *
|
||||
@ -124,6 +129,17 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
|
||||
__u32 *size);
|
||||
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
|
||||
__u32 *off);
|
||||
|
||||
enum libbpf_pin_type {
|
||||
LIBBPF_PIN_NONE,
|
||||
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
||||
LIBBPF_PIN_BY_NAME,
|
||||
};
|
||||
|
||||
/* pin_maps and unpin_maps can both be called with a NULL path, in which case
|
||||
* they will use the pin_path attribute of each map (and ignore all maps that
|
||||
* don't have a pin_path set).
|
||||
*/
|
||||
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
|
||||
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
|
||||
const char *path);
|
||||
@ -307,6 +323,7 @@ LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
|
||||
LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
|
||||
LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
|
||||
LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
|
||||
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
|
||||
|
||||
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
|
||||
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
|
||||
@ -326,6 +343,7 @@ LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
|
||||
LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
|
||||
LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
|
||||
LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
|
||||
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
|
||||
|
||||
/*
|
||||
* No need for __attribute__((packed)), all members of 'bpf_map_def'
|
||||
@ -385,6 +403,9 @@ LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
|
||||
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
|
||||
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
|
||||
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
|
||||
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
|
||||
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
|
||||
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
|
||||
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
|
||||
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
|
||||
|
||||
|
@ -193,8 +193,13 @@ LIBBPF_0.0.5 {
|
||||
|
||||
LIBBPF_0.0.6 {
|
||||
global:
|
||||
bpf_map__get_pin_path;
|
||||
bpf_map__is_pinned;
|
||||
bpf_map__set_pin_path;
|
||||
bpf_object__open_file;
|
||||
bpf_object__open_mem;
|
||||
bpf_program__get_expected_attach_type;
|
||||
bpf_program__get_type;
|
||||
bpf_program__is_tracing;
|
||||
bpf_program__set_tracing;
|
||||
} LIBBPF_0.0.5;
|
||||
|
@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
|
||||
case BPF_PROG_TYPE_FLOW_DISSECTOR:
|
||||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||
case BPF_PROG_TYPE_TRACING:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -73,6 +73,21 @@ struct xsk_nl_info {
|
||||
int fd;
|
||||
};
|
||||
|
||||
/* Up until and including Linux 5.3 */
|
||||
struct xdp_ring_offset_v1 {
|
||||
__u64 producer;
|
||||
__u64 consumer;
|
||||
__u64 desc;
|
||||
};
|
||||
|
||||
/* Up until and including Linux 5.3 */
|
||||
struct xdp_mmap_offsets_v1 {
|
||||
struct xdp_ring_offset_v1 rx;
|
||||
struct xdp_ring_offset_v1 tx;
|
||||
struct xdp_ring_offset_v1 fr;
|
||||
struct xdp_ring_offset_v1 cr;
|
||||
};
|
||||
|
||||
int xsk_umem__fd(const struct xsk_umem *umem)
|
||||
{
|
||||
return umem ? umem->fd : -EINVAL;
|
||||
@ -133,6 +148,58 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
|
||||
{
|
||||
struct xdp_mmap_offsets_v1 off_v1;
|
||||
|
||||
/* getsockopt on a kernel <= 5.3 has no flags fields.
|
||||
* Copy over the offsets to the correct places in the >=5.4 format
|
||||
* and put the flags where they would have been on that kernel.
|
||||
*/
|
||||
memcpy(&off_v1, off, sizeof(off_v1));
|
||||
|
||||
off->rx.producer = off_v1.rx.producer;
|
||||
off->rx.consumer = off_v1.rx.consumer;
|
||||
off->rx.desc = off_v1.rx.desc;
|
||||
off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
|
||||
|
||||
off->tx.producer = off_v1.tx.producer;
|
||||
off->tx.consumer = off_v1.tx.consumer;
|
||||
off->tx.desc = off_v1.tx.desc;
|
||||
off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
|
||||
|
||||
off->fr.producer = off_v1.fr.producer;
|
||||
off->fr.consumer = off_v1.fr.consumer;
|
||||
off->fr.desc = off_v1.fr.desc;
|
||||
off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
|
||||
|
||||
off->cr.producer = off_v1.cr.producer;
|
||||
off->cr.consumer = off_v1.cr.consumer;
|
||||
off->cr.desc = off_v1.cr.desc;
|
||||
off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
|
||||
}
|
||||
|
||||
static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
|
||||
{
|
||||
socklen_t optlen;
|
||||
int err;
|
||||
|
||||
optlen = sizeof(*off);
|
||||
err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (optlen == sizeof(*off))
|
||||
return 0;
|
||||
|
||||
if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
|
||||
xsk_mmap_offsets_v1(off);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
||||
__u64 size, struct xsk_ring_prod *fill,
|
||||
struct xsk_ring_cons *comp,
|
||||
@ -141,7 +208,6 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
||||
struct xdp_mmap_offsets off;
|
||||
struct xdp_umem_reg mr;
|
||||
struct xsk_umem *umem;
|
||||
socklen_t optlen;
|
||||
void *map;
|
||||
int err;
|
||||
|
||||
@ -190,8 +256,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
|
||||
goto out_socket;
|
||||
}
|
||||
|
||||
optlen = sizeof(off);
|
||||
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
||||
err = xsk_get_mmap_offsets(umem->fd, &off);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
goto out_socket;
|
||||
@ -514,7 +579,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
|
||||
struct sockaddr_xdp sxdp = {};
|
||||
struct xdp_mmap_offsets off;
|
||||
struct xsk_socket *xsk;
|
||||
socklen_t optlen;
|
||||
int err;
|
||||
|
||||
if (!umem || !xsk_ptr || !rx || !tx)
|
||||
@ -573,8 +637,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
|
||||
}
|
||||
}
|
||||
|
||||
optlen = sizeof(off);
|
||||
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
||||
err = xsk_get_mmap_offsets(xsk->fd, &off);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
goto out_socket;
|
||||
@ -660,7 +723,6 @@ out_xsk_alloc:
|
||||
int xsk_umem__delete(struct xsk_umem *umem)
|
||||
{
|
||||
struct xdp_mmap_offsets off;
|
||||
socklen_t optlen;
|
||||
int err;
|
||||
|
||||
if (!umem)
|
||||
@ -669,8 +731,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
|
||||
if (umem->refcount)
|
||||
return -EBUSY;
|
||||
|
||||
optlen = sizeof(off);
|
||||
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
||||
err = xsk_get_mmap_offsets(umem->fd, &off);
|
||||
if (!err) {
|
||||
munmap(umem->fill->ring - off.fr.desc,
|
||||
off.fr.desc + umem->config.fill_size * sizeof(__u64));
|
||||
@ -688,7 +749,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
|
||||
{
|
||||
size_t desc_sz = sizeof(struct xdp_desc);
|
||||
struct xdp_mmap_offsets off;
|
||||
socklen_t optlen;
|
||||
int err;
|
||||
|
||||
if (!xsk)
|
||||
@ -699,8 +759,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
|
||||
close(xsk->prog_fd);
|
||||
}
|
||||
|
||||
optlen = sizeof(off);
|
||||
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
|
||||
err = xsk_get_mmap_offsets(xsk->fd, &off);
|
||||
if (!err) {
|
||||
if (xsk->rx) {
|
||||
munmap(xsk->rx->ring - off.rx.desc,
|
||||
|
@ -89,6 +89,9 @@ $(notdir $(TEST_GEN_PROGS) \
|
||||
$(OUTPUT)/urandom_read: urandom_read.c
|
||||
$(CC) -o $@ $< -Wl,--build-id
|
||||
|
||||
$(OUTPUT)/test_stub.o: test_stub.c
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
BPFOBJ := $(OUTPUT)/libbpf.a
|
||||
|
||||
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
|
||||
@ -131,8 +134,13 @@ $(shell $(1) -v -E - </dev/null 2>&1 \
|
||||
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
|
||||
endef
|
||||
|
||||
# Determine target endianness.
|
||||
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
|
||||
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
|
||||
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
|
||||
|
||||
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
|
||||
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \
|
||||
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
|
||||
-I. -I./include/uapi -I$(APIDIR) \
|
||||
-I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include)
|
||||
|
||||
@ -271,12 +279,8 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
|
||||
|
||||
# Define test_progs BPF-GCC-flavored test runner.
|
||||
ifneq ($(BPF_GCC),)
|
||||
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
|
||||
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
|
||||
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
|
||||
|
||||
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
|
||||
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) $(MENDIAN)
|
||||
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
|
||||
TRUNNER_BPF_LDFLAGS :=
|
||||
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
|
||||
endif
|
||||
|
210
tools/testing/selftests/bpf/prog_tests/pinning.c
Normal file
210
tools/testing/selftests/bpf/prog_tests/pinning.c
Normal file
@ -0,0 +1,210 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <test_progs.h>
|
||||
|
||||
__u32 get_map_id(struct bpf_object *obj, const char *name)
|
||||
{
|
||||
struct bpf_map_info map_info = {};
|
||||
__u32 map_info_len, duration = 0;
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
|
||||
map_info_len = sizeof(map_info);
|
||||
|
||||
map = bpf_object__find_map_by_name(obj, name);
|
||||
if (CHECK(!map, "find map", "NULL map"))
|
||||
return 0;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
|
||||
&map_info, &map_info_len);
|
||||
CHECK(err, "get map info", "err %d errno %d", err, errno);
|
||||
return map_info.id;
|
||||
}
|
||||
|
||||
void test_pinning(void)
|
||||
{
|
||||
const char *file_invalid = "./test_pinning_invalid.o";
|
||||
const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
|
||||
const char *nopinpath = "/sys/fs/bpf/nopinmap";
|
||||
const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
|
||||
const char *custpath = "/sys/fs/bpf/custom";
|
||||
const char *pinpath = "/sys/fs/bpf/pinmap";
|
||||
const char *file = "./test_pinning.o";
|
||||
__u32 map_id, map_id2, duration = 0;
|
||||
struct stat statbuf = {};
|
||||
struct bpf_object *obj;
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
|
||||
.pin_root_path = custpath,
|
||||
);
|
||||
|
||||
/* check that opening fails with invalid pinning value in map def */
|
||||
obj = bpf_object__open_file(file_invalid, NULL);
|
||||
err = libbpf_get_error(obj);
|
||||
if (CHECK(err != -EINVAL, "invalid open", "err %d errno %d\n", err, errno)) {
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* open the valid object file */
|
||||
obj = bpf_object__open_file(file, NULL);
|
||||
err = libbpf_get_error(obj);
|
||||
if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* check that pinmap was pinned */
|
||||
err = stat(pinpath, &statbuf);
|
||||
if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* check that nopinmap was *not* pinned */
|
||||
err = stat(nopinpath, &statbuf);
|
||||
if (CHECK(!err || errno != ENOENT, "stat nopinpath",
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* check that nopinmap2 was *not* pinned */
|
||||
err = stat(nopinpath2, &statbuf);
|
||||
if (CHECK(!err || errno != ENOENT, "stat nopinpath2",
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
map_id = get_map_id(obj, "pinmap");
|
||||
if (!map_id)
|
||||
goto out;
|
||||
|
||||
bpf_object__close(obj);
|
||||
|
||||
obj = bpf_object__open_file(file, NULL);
|
||||
if (CHECK_FAIL(libbpf_get_error(obj))) {
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* check that same map ID was reused for second load */
|
||||
map_id2 = get_map_id(obj, "pinmap");
|
||||
if (CHECK(map_id != map_id2, "check reuse",
|
||||
"err %d errno %d id %d id2 %d\n", err, errno, map_id, map_id2))
|
||||
goto out;
|
||||
|
||||
/* should be no-op to re-pin same map */
|
||||
map = bpf_object__find_map_by_name(obj, "pinmap");
|
||||
if (CHECK(!map, "find map", "NULL map"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map__pin(map, NULL);
|
||||
if (CHECK(err, "re-pin map", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* but error to pin at different location */
|
||||
err = bpf_map__pin(map, "/sys/fs/bpf/other");
|
||||
if (CHECK(!err, "pin map different", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* unpin maps with a pin_path set */
|
||||
err = bpf_object__unpin_maps(obj, NULL);
|
||||
if (CHECK(err, "unpin maps", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* and re-pin them... */
|
||||
err = bpf_object__pin_maps(obj, NULL);
|
||||
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* set pinning path of other map and re-pin all */
|
||||
map = bpf_object__find_map_by_name(obj, "nopinmap");
|
||||
if (CHECK(!map, "find map", "NULL map"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map__set_pin_path(map, custpinpath);
|
||||
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* should only pin the one unpinned map */
|
||||
err = bpf_object__pin_maps(obj, NULL);
|
||||
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* check that nopinmap was pinned at the custom path */
|
||||
err = stat(custpinpath, &statbuf);
|
||||
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* remove the custom pin path to re-test it with auto-pinning below */
|
||||
err = unlink(custpinpath);
|
||||
if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
err = rmdir(custpath);
|
||||
if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
bpf_object__close(obj);
|
||||
|
||||
/* open the valid object file again */
|
||||
obj = bpf_object__open_file(file, NULL);
|
||||
err = libbpf_get_error(obj);
|
||||
if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* swap pin paths of the two maps */
|
||||
bpf_object__for_each_map(map, obj) {
|
||||
if (!strcmp(bpf_map__name(map), "nopinmap"))
|
||||
err = bpf_map__set_pin_path(map, pinpath);
|
||||
else if (!strcmp(bpf_map__name(map), "pinmap"))
|
||||
err = bpf_map__set_pin_path(map, NULL);
|
||||
else
|
||||
continue;
|
||||
|
||||
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* should fail because of map parameter mismatch */
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
bpf_object__close(obj);
|
||||
|
||||
/* test auto-pinning at custom path with open opt */
|
||||
obj = bpf_object__open_file(file, &opts);
|
||||
if (CHECK_FAIL(libbpf_get_error(obj))) {
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
/* check that pinmap was pinned at the custom path */
|
||||
err = stat(custpinpath, &statbuf);
|
||||
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||
goto out;
|
||||
|
||||
out:
|
||||
unlink(pinpath);
|
||||
unlink(nopinpath);
|
||||
unlink(nopinpath2);
|
||||
unlink(custpinpath);
|
||||
rmdir(custpath);
|
||||
if (obj)
|
||||
bpf_object__close(obj);
|
||||
}
|
78
tools/testing/selftests/bpf/prog_tests/probe_user.c
Normal file
78
tools/testing/selftests/bpf/prog_tests/probe_user.c
Normal file
@ -0,0 +1,78 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <test_progs.h>
|
||||
|
||||
void test_probe_user(void)
|
||||
{
|
||||
#define kprobe_name "__sys_connect"
|
||||
const char *prog_name = "kprobe/" kprobe_name;
|
||||
const char *obj_file = "./test_probe_user.o";
|
||||
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
|
||||
int err, results_map_fd, sock_fd, duration = 0;
|
||||
struct sockaddr curr, orig, tmp;
|
||||
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
|
||||
struct bpf_link *kprobe_link = NULL;
|
||||
struct bpf_program *kprobe_prog;
|
||||
struct bpf_object *obj;
|
||||
static const int zero = 0;
|
||||
|
||||
obj = bpf_object__open_file(obj_file, &opts);
|
||||
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
|
||||
return;
|
||||
|
||||
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
|
||||
if (CHECK(!kprobe_prog, "find_probe",
|
||||
"prog '%s' not found\n", prog_name))
|
||||
goto cleanup;
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (CHECK(err, "obj_load", "err %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss");
|
||||
if (CHECK(results_map_fd < 0, "find_bss_map",
|
||||
"err %d\n", results_map_fd))
|
||||
goto cleanup;
|
||||
|
||||
kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
|
||||
kprobe_name);
|
||||
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
|
||||
"err %ld\n", PTR_ERR(kprobe_link))) {
|
||||
kprobe_link = NULL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
memset(&curr, 0, sizeof(curr));
|
||||
in->sin_family = AF_INET;
|
||||
in->sin_port = htons(5555);
|
||||
in->sin_addr.s_addr = inet_addr("255.255.255.255");
|
||||
memcpy(&orig, &curr, sizeof(curr));
|
||||
|
||||
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
|
||||
goto cleanup;
|
||||
|
||||
connect(sock_fd, &curr, sizeof(curr));
|
||||
close(sock_fd);
|
||||
|
||||
err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
|
||||
if (CHECK(err, "get_kprobe_res",
|
||||
"failed to get kprobe res: %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
in = (struct sockaddr_in *)&tmp;
|
||||
if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
|
||||
"wrong kprobe res from probe read: %s:%u\n",
|
||||
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
|
||||
goto cleanup;
|
||||
|
||||
memset(&tmp, 0xab, sizeof(tmp));
|
||||
|
||||
in = (struct sockaddr_in *)&curr;
|
||||
if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
|
||||
"wrong kprobe res from probe write: %s:%u\n",
|
||||
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
|
||||
goto cleanup;
|
||||
cleanup:
|
||||
bpf_link__destroy(kprobe_link);
|
||||
bpf_object__close(obj);
|
||||
}
|
@ -79,11 +79,11 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx)
|
||||
func = ptr->func;
|
||||
}));
|
||||
|
||||
bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
|
||||
bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
|
||||
pkt_type &= 7;
|
||||
|
||||
/* read eth proto */
|
||||
bpf_probe_read(&pkt_data, sizeof(pkt_data), data + 12);
|
||||
bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12);
|
||||
|
||||
bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
|
||||
bpf_printk("skb->len %d users %d pkt_type %x\n",
|
||||
|
@ -72,9 +72,9 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
|
||||
void* thread_state;
|
||||
int key;
|
||||
|
||||
bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
|
||||
bpf_probe_read(&thread_state, sizeof(thread_state),
|
||||
tls_base + 0x310 + key * 0x10 + 0x08);
|
||||
bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
|
||||
bpf_probe_read_user(&thread_state, sizeof(thread_state),
|
||||
tls_base + 0x310 + key * 0x10 + 0x08);
|
||||
return thread_state;
|
||||
}
|
||||
|
||||
@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
|
||||
FrameData *frame, Symbol *symbol)
|
||||
{
|
||||
// read data from PyFrameObject
|
||||
bpf_probe_read(&frame->f_back,
|
||||
sizeof(frame->f_back),
|
||||
frame_ptr + pidData->offsets.PyFrameObject_back);
|
||||
bpf_probe_read(&frame->f_code,
|
||||
sizeof(frame->f_code),
|
||||
frame_ptr + pidData->offsets.PyFrameObject_code);
|
||||
bpf_probe_read_user(&frame->f_back,
|
||||
sizeof(frame->f_back),
|
||||
frame_ptr + pidData->offsets.PyFrameObject_back);
|
||||
bpf_probe_read_user(&frame->f_code,
|
||||
sizeof(frame->f_code),
|
||||
frame_ptr + pidData->offsets.PyFrameObject_code);
|
||||
|
||||
// read data from PyCodeObject
|
||||
if (!frame->f_code)
|
||||
return false;
|
||||
bpf_probe_read(&frame->co_filename,
|
||||
sizeof(frame->co_filename),
|
||||
frame->f_code + pidData->offsets.PyCodeObject_filename);
|
||||
bpf_probe_read(&frame->co_name,
|
||||
sizeof(frame->co_name),
|
||||
frame->f_code + pidData->offsets.PyCodeObject_name);
|
||||
bpf_probe_read_user(&frame->co_filename,
|
||||
sizeof(frame->co_filename),
|
||||
frame->f_code + pidData->offsets.PyCodeObject_filename);
|
||||
bpf_probe_read_user(&frame->co_name,
|
||||
sizeof(frame->co_name),
|
||||
frame->f_code + pidData->offsets.PyCodeObject_name);
|
||||
// read actual names into symbol
|
||||
if (frame->co_filename)
|
||||
bpf_probe_read_str(&symbol->file,
|
||||
sizeof(symbol->file),
|
||||
frame->co_filename + pidData->offsets.String_data);
|
||||
bpf_probe_read_user_str(&symbol->file,
|
||||
sizeof(symbol->file),
|
||||
frame->co_filename +
|
||||
pidData->offsets.String_data);
|
||||
if (frame->co_name)
|
||||
bpf_probe_read_str(&symbol->name,
|
||||
sizeof(symbol->name),
|
||||
frame->co_name + pidData->offsets.String_data);
|
||||
bpf_probe_read_user_str(&symbol->name,
|
||||
sizeof(symbol->name),
|
||||
frame->co_name +
|
||||
pidData->offsets.String_data);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -174,9 +176,9 @@ static __always_inline int __on_event(struct pt_regs *ctx)
|
||||
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
|
||||
|
||||
void* thread_state_current = (void*)0;
|
||||
bpf_probe_read(&thread_state_current,
|
||||
sizeof(thread_state_current),
|
||||
(void*)(long)pidData->current_state_addr);
|
||||
bpf_probe_read_user(&thread_state_current,
|
||||
sizeof(thread_state_current),
|
||||
(void*)(long)pidData->current_state_addr);
|
||||
|
||||
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||
void* tls_base = (void*)task;
|
||||
@ -188,11 +190,13 @@ static __always_inline int __on_event(struct pt_regs *ctx)
|
||||
if (pidData->use_tls) {
|
||||
uint64_t pthread_created;
|
||||
uint64_t pthread_self;
|
||||
bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
|
||||
bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
|
||||
tls_base + 0x10);
|
||||
|
||||
bpf_probe_read(&pthread_created,
|
||||
sizeof(pthread_created),
|
||||
thread_state + pidData->offsets.PyThreadState_thread);
|
||||
bpf_probe_read_user(&pthread_created,
|
||||
sizeof(pthread_created),
|
||||
thread_state +
|
||||
pidData->offsets.PyThreadState_thread);
|
||||
event->pthread_match = pthread_created == pthread_self;
|
||||
} else {
|
||||
event->pthread_match = 1;
|
||||
@ -204,9 +208,10 @@ static __always_inline int __on_event(struct pt_regs *ctx)
|
||||
Symbol sym = {};
|
||||
int cur_cpu = bpf_get_smp_processor_id();
|
||||
|
||||
bpf_probe_read(&frame_ptr,
|
||||
sizeof(frame_ptr),
|
||||
thread_state + pidData->offsets.PyThreadState_frame);
|
||||
bpf_probe_read_user(&frame_ptr,
|
||||
sizeof(frame_ptr),
|
||||
thread_state +
|
||||
pidData->offsets.PyThreadState_frame);
|
||||
|
||||
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
|
||||
if (symbol_counter == NULL)
|
||||
|
@ -98,7 +98,7 @@ struct strobe_map_raw {
|
||||
/*
|
||||
* having volatile doesn't change anything on BPF side, but clang
|
||||
* emits warnings for passing `volatile const char *` into
|
||||
* bpf_probe_read_str that expects just `const char *`
|
||||
* bpf_probe_read_user_str that expects just `const char *`
|
||||
*/
|
||||
const char* tag;
|
||||
/*
|
||||
@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
|
||||
dtv_t *dtv;
|
||||
void *tls_ptr;
|
||||
|
||||
bpf_probe_read(&tls_index, sizeof(struct tls_index),
|
||||
(void *)loc->offset);
|
||||
bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
|
||||
(void *)loc->offset);
|
||||
/* valid module index is always positive */
|
||||
if (tls_index.module > 0) {
|
||||
/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
|
||||
bpf_probe_read(&dtv, sizeof(dtv),
|
||||
&((struct tcbhead *)tls_base)->dtv);
|
||||
bpf_probe_read_user(&dtv, sizeof(dtv),
|
||||
&((struct tcbhead *)tls_base)->dtv);
|
||||
dtv += tls_index.module;
|
||||
} else {
|
||||
dtv = NULL;
|
||||
}
|
||||
bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
|
||||
bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
|
||||
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
|
||||
return tls_ptr && tls_ptr != (void *)-1
|
||||
? tls_ptr + tls_index.offset
|
||||
@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
|
||||
if (!location)
|
||||
return;
|
||||
|
||||
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
|
||||
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
|
||||
data->int_vals[idx] = value->val;
|
||||
if (value->header.len)
|
||||
data->int_vals_set_mask |= (1 << idx);
|
||||
@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
|
||||
if (!location)
|
||||
return 0;
|
||||
|
||||
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
|
||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
|
||||
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
|
||||
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
|
||||
/*
|
||||
* if bpf_probe_read_str returns error (<0), due to casting to
|
||||
* if bpf_probe_read_user_str returns error (<0), due to casting to
|
||||
* unsinged int, it will become big number, so next check is
|
||||
* sufficient to check for errors AND prove to BPF verifier, that
|
||||
* bpf_probe_read_str won't return anything bigger than
|
||||
* bpf_probe_read_user_str won't return anything bigger than
|
||||
* STROBE_MAX_STR_LEN
|
||||
*/
|
||||
if (len > STROBE_MAX_STR_LEN)
|
||||
@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
||||
if (!location)
|
||||
return payload;
|
||||
|
||||
bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
|
||||
if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
|
||||
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
|
||||
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
|
||||
return payload;
|
||||
|
||||
descr->id = map.id;
|
||||
@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
||||
data->req_meta_valid = 1;
|
||||
}
|
||||
|
||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
|
||||
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
|
||||
if (len <= STROBE_MAX_STR_LEN) {
|
||||
descr->tag_len = len;
|
||||
payload += len;
|
||||
@ -418,15 +418,15 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
||||
break;
|
||||
|
||||
descr->key_lens[i] = 0;
|
||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
|
||||
map.entries[i].key);
|
||||
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
|
||||
map.entries[i].key);
|
||||
if (len <= STROBE_MAX_STR_LEN) {
|
||||
descr->key_lens[i] = len;
|
||||
payload += len;
|
||||
}
|
||||
descr->val_lens[i] = 0;
|
||||
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
|
||||
map.entries[i].val);
|
||||
len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
|
||||
map.entries[i].val);
|
||||
if (len <= STROBE_MAX_STR_LEN) {
|
||||
descr->val_lens[i] = len;
|
||||
payload += len;
|
||||
|
31
tools/testing/selftests/bpf/progs/test_pinning.c
Normal file
31
tools/testing/selftests/bpf/progs/test_pinning.c
Normal file
@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
int _version SEC("version") = 1;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} pinmap SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} nopinmap SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
__uint(pinning, LIBBPF_PIN_NONE);
|
||||
} nopinmap2 SEC(".maps");
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
16
tools/testing/selftests/bpf/progs/test_pinning_invalid.c
Normal file
16
tools/testing/selftests/bpf/progs/test_pinning_invalid.c
Normal file
@ -0,0 +1,16 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
int _version SEC("version") = 1;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
__uint(pinning, 2); /* invalid */
|
||||
} nopinmap3 SEC(".maps");
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
26
tools/testing/selftests/bpf/progs/test_probe_user.c
Normal file
26
tools/testing/selftests/bpf/progs/test_probe_user.c
Normal file
@ -0,0 +1,26 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include "bpf_helpers.h"
|
||||
#include "bpf_tracing.h"
|
||||
|
||||
static struct sockaddr_in old;
|
||||
|
||||
SEC("kprobe/__sys_connect")
|
||||
int handle_sys_connect(struct pt_regs *ctx)
|
||||
{
|
||||
void *ptr = (void *)PT_REGS_PARM2(ctx);
|
||||
struct sockaddr_in new;
|
||||
|
||||
bpf_probe_read_user(&old, sizeof(old), ptr);
|
||||
__builtin_memset(&new, 0xab, sizeof(new));
|
||||
bpf_probe_write_user(ptr, &new, sizeof(new));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -38,7 +38,7 @@
|
||||
#include <sys/socket.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
|
||||
#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
|
||||
#define TCP_ESTATS_MAGIC 0xBAADBEEF
|
||||
|
||||
/* This test case needs "sock" and "pt_regs" data structure.
|
||||
|
@ -314,9 +314,6 @@ class DebugfsDir:
|
||||
continue
|
||||
|
||||
p = os.path.join(path, f)
|
||||
if not os.stat(p).st_mode & stat.S_IRUSR:
|
||||
continue
|
||||
|
||||
if os.path.isfile(p) and os.access(p, os.R_OK):
|
||||
_, out = cmd('cat %s/%s' % (path, f))
|
||||
dfs[f] = out.strip()
|
||||
|
@ -120,6 +120,29 @@ static struct sysctl_test tests[] = {
|
||||
.newval = "(none)", /* same as default, should fail anyway */
|
||||
.result = OP_EPERM,
|
||||
},
|
||||
{
|
||||
.descr = "ctx:write sysctl:write read ok narrow",
|
||||
.insns = {
|
||||
/* u64 w = (u16)write & 1; */
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
|
||||
offsetof(struct bpf_sysctl, write)),
|
||||
#else
|
||||
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
|
||||
offsetof(struct bpf_sysctl, write) + 2),
|
||||
#endif
|
||||
BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1),
|
||||
/* return 1 - w; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.attach_type = BPF_CGROUP_SYSCTL,
|
||||
.sysctl = "kernel/domainname",
|
||||
.open_flags = O_WRONLY,
|
||||
.newval = "(none)", /* same as default, should fail anyway */
|
||||
.result = OP_EPERM,
|
||||
},
|
||||
{
|
||||
.descr = "ctx:write sysctl:read write reject",
|
||||
.insns = {
|
||||
|
Loading…
Reference in New Issue
Block a user