Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2021-03-09 The following pull-request contains BPF updates for your *net-next* tree. We've added 90 non-merge commits during the last 17 day(s) which contain a total of 114 files changed, 5158 insertions(+), 1288 deletions(-). The main changes are: 1) Faster bpf_redirect_map(), from Björn. 2) skmsg cleanup, from Cong. 3) Support for floating point types in BTF, from Ilya. 4) Documentation for sys_bpf commands, from Joe. 5) Support for sk_lookup in bpf_prog_test_run, form Lorenz. 6) Enable task local storage for tracing programs, from Song. 7) bpf_for_each_map_elem() helper, from Yonghong. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
c1acda9807
@ -84,6 +84,7 @@ sequentially and type id is assigned to each recognized type starting from id
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
#define BTF_KIND_VAR 14 /* Variable */
|
||||
#define BTF_KIND_DATASEC 15 /* Section */
|
||||
#define BTF_KIND_FLOAT 16 /* Floating point */
|
||||
|
||||
Note that the type section encodes debug info, not just pure types.
|
||||
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
|
||||
@ -95,8 +96,8 @@ Each type contains the following common data::
|
||||
/* "info" bits arrangement
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-30: unused
|
||||
* bits 24-28: kind (e.g. int, ptr, array...etc)
|
||||
* bits 29-30: unused
|
||||
* bit 31: kind_flag, currently used by
|
||||
* struct, union and fwd
|
||||
*/
|
||||
@ -452,6 +453,18 @@ map definition.
|
||||
* ``offset``: the in-section offset of the variable
|
||||
* ``size``: the size of the variable in bytes
|
||||
|
||||
2.2.16 BTF_KIND_FLOAT
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: any valid offset
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FLOAT
|
||||
* ``info.vlen``: 0
|
||||
* ``size``: the size of the float type in bytes: 2, 4, 8, 12 or 16.
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
3. BTF Kernel API
|
||||
*****************
|
||||
|
||||
|
@ -12,9 +12,6 @@ BPF instruction-set.
|
||||
The Cilium project also maintains a `BPF and XDP Reference Guide`_
|
||||
that goes into great technical depth about the BPF Architecture.
|
||||
|
||||
The primary info for the bpf syscall is available in the `man-pages`_
|
||||
for `bpf(2)`_.
|
||||
|
||||
BPF Type Format (BTF)
|
||||
=====================
|
||||
|
||||
@ -35,6 +32,12 @@ Two sets of Questions and Answers (Q&A) are maintained.
|
||||
bpf_design_QA
|
||||
bpf_devel_QA
|
||||
|
||||
Syscall API
|
||||
===========
|
||||
|
||||
The primary info for the bpf syscall is available in the `man-pages`_
|
||||
for `bpf(2)`_. For more information about the userspace API, see
|
||||
Documentation/userspace-api/ebpf/index.rst.
|
||||
|
||||
Helper functions
|
||||
================
|
||||
|
17
Documentation/userspace-api/ebpf/index.rst
Normal file
17
Documentation/userspace-api/ebpf/index.rst
Normal file
@ -0,0 +1,17 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
eBPF Userspace API
|
||||
==================
|
||||
|
||||
eBPF is a kernel mechanism to provide a sandboxed runtime environment in the
|
||||
Linux kernel for runtime extension and instrumentation without changing kernel
|
||||
source code or loading kernel modules. eBPF programs can be attached to various
|
||||
kernel subsystems, including networking, tracing and Linux security modules
|
||||
(LSM).
|
||||
|
||||
For internal kernel documentation on eBPF, see Documentation/bpf/index.rst.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
syscall
|
24
Documentation/userspace-api/ebpf/syscall.rst
Normal file
24
Documentation/userspace-api/ebpf/syscall.rst
Normal file
@ -0,0 +1,24 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
eBPF Syscall
|
||||
------------
|
||||
|
||||
:Authors: - Alexei Starovoitov <ast@kernel.org>
|
||||
- Joe Stringer <joe@wand.net.nz>
|
||||
- Michael Kerrisk <mtk.manpages@gmail.com>
|
||||
|
||||
The primary info for the bpf syscall is available in the `man-pages`_
|
||||
for `bpf(2)`_.
|
||||
|
||||
bpf() subcommand reference
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/bpf.h
|
||||
:doc: eBPF Syscall Preamble
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/bpf.h
|
||||
:doc: eBPF Syscall Commands
|
||||
|
||||
.. Links:
|
||||
.. _man-pages: https://www.kernel.org/doc/man-pages/
|
||||
.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
|
@ -21,6 +21,7 @@ place where this information is gathered.
|
||||
unshare
|
||||
spec_ctrl
|
||||
accelerators/ocxl
|
||||
ebpf/index
|
||||
ioctl/index
|
||||
iommu
|
||||
media/index
|
||||
|
@ -3233,6 +3233,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
|
||||
F: Documentation/bpf/
|
||||
F: Documentation/networking/filter.rst
|
||||
F: Documentation/userspace-api/ebpf/
|
||||
F: arch/*/net/*
|
||||
F: include/linux/bpf*
|
||||
F: include/linux/filter.h
|
||||
@ -3247,6 +3248,7 @@ F: net/core/filter.c
|
||||
F: net/sched/act_bpf.c
|
||||
F: net/sched/cls_bpf.c
|
||||
F: samples/bpf/
|
||||
F: scripts/bpf_doc.py
|
||||
F: tools/bpf/
|
||||
F: tools/lib/bpf/
|
||||
F: tools/testing/selftests/bpf/
|
||||
|
@ -2973,7 +2973,8 @@ static int virtnet_probe(struct virtio_device *vdev)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Set up network device as normal. */
|
||||
dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
|
||||
dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
|
||||
IFF_TX_SKB_NO_LINEAR;
|
||||
dev->netdev_ops = &virtnet_netdev;
|
||||
dev->features = NETIF_F_HIGHDMA;
|
||||
|
||||
|
@ -39,6 +39,7 @@ struct bpf_local_storage;
|
||||
struct bpf_local_storage_map;
|
||||
struct kobject;
|
||||
struct mem_cgroup;
|
||||
struct bpf_func_state;
|
||||
|
||||
extern struct idr btf_idr;
|
||||
extern spinlock_t btf_idr_lock;
|
||||
@ -117,6 +118,9 @@ struct bpf_map_ops {
|
||||
void *owner, u32 size);
|
||||
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
|
||||
|
||||
/* Misc helpers.*/
|
||||
int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
|
||||
|
||||
/* map_meta_equal must be implemented for maps that can be
|
||||
* used as an inner map. It is a runtime check to ensure
|
||||
* an inner map can be inserted to an outer map.
|
||||
@ -129,6 +133,13 @@ struct bpf_map_ops {
|
||||
bool (*map_meta_equal)(const struct bpf_map *meta0,
|
||||
const struct bpf_map *meta1);
|
||||
|
||||
|
||||
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee);
|
||||
int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
|
||||
void *callback_ctx, u64 flags);
|
||||
|
||||
/* BTF name and id of struct allocated by map_alloc */
|
||||
const char * const map_btf_name;
|
||||
int *map_btf_id;
|
||||
@ -295,6 +306,8 @@ enum bpf_arg_type {
|
||||
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
|
||||
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
|
||||
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
|
||||
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
|
||||
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
|
||||
__BPF_ARG_TYPE_MAX,
|
||||
};
|
||||
|
||||
@ -411,6 +424,8 @@ enum bpf_reg_type {
|
||||
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
|
||||
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
|
||||
PTR_TO_FUNC, /* reg points to a bpf program function */
|
||||
PTR_TO_MAP_KEY, /* reg points to a map element key */
|
||||
};
|
||||
|
||||
/* The information passed from prog-specific *_is_valid_access
|
||||
@ -506,6 +521,11 @@ enum bpf_cgroup_storage_type {
|
||||
*/
|
||||
#define MAX_BPF_FUNC_ARGS 12
|
||||
|
||||
/* The maximum number of arguments passed through registers
|
||||
* a single function may have.
|
||||
*/
|
||||
#define MAX_BPF_FUNC_REG_ARGS 5
|
||||
|
||||
struct btf_func_model {
|
||||
u8 ret_size;
|
||||
u8 nr_args;
|
||||
@ -1380,6 +1400,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
|
||||
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
|
||||
struct bpf_link_info *info);
|
||||
|
||||
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee);
|
||||
|
||||
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
|
||||
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
|
||||
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
|
||||
@ -1429,9 +1453,9 @@ struct btf *bpf_get_btf_vmlinux(void);
|
||||
/* Map specifics */
|
||||
struct xdp_buff;
|
||||
struct sk_buff;
|
||||
struct bpf_dtab_netdev;
|
||||
struct bpf_cpu_map_entry;
|
||||
|
||||
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
|
||||
void __dev_flush(void);
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx);
|
||||
@ -1441,7 +1465,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
||||
struct bpf_prog *xdp_prog);
|
||||
bool dev_map_can_have_prog(struct bpf_map *map);
|
||||
|
||||
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
void __cpu_map_flush(void);
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx);
|
||||
@ -1470,6 +1493,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
|
||||
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
|
||||
const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr);
|
||||
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
|
||||
const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr);
|
||||
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info);
|
||||
@ -1499,6 +1525,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
|
||||
struct bpf_link *bpf_link_by_id(u32 id);
|
||||
|
||||
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
|
||||
void bpf_task_storage_free(struct task_struct *task);
|
||||
#else /* !CONFIG_BPF_SYSCALL */
|
||||
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
|
||||
{
|
||||
@ -1568,17 +1595,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline bool dev_map_can_have_prog(struct bpf_map *map)
|
||||
{
|
||||
return false;
|
||||
@ -1590,6 +1606,7 @@ static inline void __dev_flush(void)
|
||||
|
||||
struct xdp_buff;
|
||||
struct bpf_dtab_netdev;
|
||||
struct bpf_cpu_map_entry;
|
||||
|
||||
static inline
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
@ -1614,12 +1631,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void __cpu_map_flush(void)
|
||||
{
|
||||
}
|
||||
@ -1670,6 +1681,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
|
||||
const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static inline void bpf_map_put(struct bpf_map *map)
|
||||
{
|
||||
}
|
||||
@ -1684,6 +1702,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void bpf_task_storage_free(struct task_struct *task)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
||||
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
|
||||
@ -1768,22 +1790,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
|
||||
}
|
||||
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
|
||||
|
||||
#if defined(CONFIG_BPF_STREAM_PARSER)
|
||||
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
||||
struct bpf_prog *old, u32 which);
|
||||
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
||||
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
|
||||
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
|
||||
void sock_map_unhash(struct sock *sk);
|
||||
void sock_map_close(struct sock *sk, long timeout);
|
||||
|
||||
void bpf_sk_reuseport_detach(struct sock *sk);
|
||||
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
||||
void *value);
|
||||
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags);
|
||||
#else
|
||||
static inline int sock_map_prog_update(struct bpf_map *map,
|
||||
struct bpf_prog *prog,
|
||||
struct bpf_prog *old, u32 which)
|
||||
static inline void bpf_sk_reuseport_detach(struct sock *sk)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
@ -1801,20 +1825,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* CONFIG_BPF_STREAM_PARSER */
|
||||
|
||||
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
|
||||
void bpf_sk_reuseport_detach(struct sock *sk);
|
||||
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
|
||||
void *value);
|
||||
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags);
|
||||
#else
|
||||
static inline void bpf_sk_reuseport_detach(struct sock *sk)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
|
||||
void *key, void *value)
|
||||
{
|
||||
@ -1886,6 +1897,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
|
||||
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_from_file_proto;
|
||||
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
|
||||
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
|
||||
|
||||
const struct bpf_func_proto *bpf_tracing_func_proto(
|
||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
|
@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_map *smap,
|
||||
bool cacheit_lockit);
|
||||
|
||||
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
|
||||
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
|
||||
int __percpu *busy_counter);
|
||||
|
||||
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
|
@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
|
||||
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
|
||||
}
|
||||
|
||||
static inline struct bpf_storage_blob *bpf_task(
|
||||
const struct task_struct *task)
|
||||
{
|
||||
if (unlikely(!task->security))
|
||||
return NULL;
|
||||
|
||||
return task->security + bpf_lsm_blob_sizes.lbs_task;
|
||||
}
|
||||
|
||||
extern const struct bpf_func_proto bpf_inode_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
|
||||
void bpf_inode_storage_free(struct inode *inode);
|
||||
void bpf_task_storage_free(struct task_struct *task);
|
||||
|
||||
#else /* !CONFIG_BPF_LSM */
|
||||
|
||||
@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct bpf_storage_blob *bpf_task(
|
||||
const struct task_struct *task)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void bpf_inode_storage_free(struct inode *inode)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void bpf_task_storage_free(struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BPF_LSM */
|
||||
|
||||
#endif /* _LINUX_BPF_LSM_H */
|
||||
|
@ -103,19 +103,17 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
|
||||
#if defined(CONFIG_BPF_STREAM_PARSER)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
|
||||
#endif
|
||||
#ifdef CONFIG_BPF_LSM
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
|
||||
#endif
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
|
||||
#if defined(CONFIG_XDP_SOCKETS)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
|
||||
#endif
|
||||
#ifdef CONFIG_INET
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
|
||||
#endif
|
||||
#endif
|
||||
|
@ -68,6 +68,8 @@ struct bpf_reg_state {
|
||||
unsigned long raw1;
|
||||
unsigned long raw2;
|
||||
} raw;
|
||||
|
||||
u32 subprogno; /* for PTR_TO_FUNC */
|
||||
};
|
||||
/* For PTR_TO_PACKET, used to find other pointers with the same variable
|
||||
* offset, so they can share range knowledge.
|
||||
@ -204,6 +206,7 @@ struct bpf_func_state {
|
||||
int acquired_refs;
|
||||
struct bpf_reference_state *refs;
|
||||
int allocated_stack;
|
||||
bool in_callback_fn;
|
||||
struct bpf_stack_state *stack;
|
||||
};
|
||||
|
||||
|
@ -646,7 +646,8 @@ struct bpf_redirect_info {
|
||||
u32 flags;
|
||||
u32 tgt_index;
|
||||
void *tgt_value;
|
||||
struct bpf_map *map;
|
||||
u32 map_id;
|
||||
enum bpf_map_type map_type;
|
||||
u32 kern_flags;
|
||||
struct bpf_nh_params nh;
|
||||
};
|
||||
@ -1472,4 +1473,32 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
|
||||
}
|
||||
#endif /* IS_ENABLED(CONFIG_IPV6) */
|
||||
|
||||
static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
|
||||
void *lookup_elem(struct bpf_map *map, u32 key))
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
/* Lower bits of the flags are used as return code on lookup failure */
|
||||
if (unlikely(flags > XDP_TX))
|
||||
return XDP_ABORTED;
|
||||
|
||||
ri->tgt_value = lookup_elem(map, ifindex);
|
||||
if (unlikely(!ri->tgt_value)) {
|
||||
/* If the lookup fails we want to clear out the state in the
|
||||
* redirect_info struct completely, so that if an eBPF program
|
||||
* performs multiple lookups, the last one always takes
|
||||
* precedence.
|
||||
*/
|
||||
ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
return flags;
|
||||
}
|
||||
|
||||
ri->tgt_index = ifindex;
|
||||
ri->map_id = map->id;
|
||||
ri->map_type = map->map_type;
|
||||
|
||||
return XDP_REDIRECT;
|
||||
}
|
||||
|
||||
#endif /* __LINUX_FILTER_H__ */
|
||||
|
@ -1518,6 +1518,8 @@ struct net_device_ops {
|
||||
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
|
||||
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
|
||||
* @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
|
||||
* @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
|
||||
* skb_headlen(skb) == 0 (data starts from frag0)
|
||||
*/
|
||||
enum netdev_priv_flags {
|
||||
IFF_802_1Q_VLAN = 1<<0,
|
||||
@ -1551,6 +1553,7 @@ enum netdev_priv_flags {
|
||||
IFF_FAILOVER_SLAVE = 1<<28,
|
||||
IFF_L3MDEV_RX_HANDLER = 1<<29,
|
||||
IFF_LIVE_RENAME_OK = 1<<30,
|
||||
IFF_TX_SKB_NO_LINEAR = 1<<31,
|
||||
};
|
||||
|
||||
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
|
||||
@ -1577,12 +1580,14 @@ enum netdev_priv_flags {
|
||||
#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
|
||||
#define IFF_TEAM IFF_TEAM
|
||||
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
|
||||
#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM
|
||||
#define IFF_MACSEC IFF_MACSEC
|
||||
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
|
||||
#define IFF_FAILOVER IFF_FAILOVER
|
||||
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
|
||||
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
|
||||
#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
|
||||
#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
|
||||
|
||||
/* Specifies the type of the struct net_device::ml_priv pointer */
|
||||
enum netdev_ml_priv_type {
|
||||
|
@ -42,6 +42,7 @@ struct audit_context;
|
||||
struct backing_dev_info;
|
||||
struct bio_list;
|
||||
struct blk_plug;
|
||||
struct bpf_local_storage;
|
||||
struct capture_control;
|
||||
struct cfs_rq;
|
||||
struct fs_struct;
|
||||
@ -1351,6 +1352,10 @@ struct task_struct {
|
||||
/* Used by LSM modules for access restriction: */
|
||||
void *security;
|
||||
#endif
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
/* Used by BPF task local storage */
|
||||
struct bpf_local_storage __rcu *bpf_storage;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
||||
unsigned long lowest_stack;
|
||||
|
@ -656,6 +656,7 @@ typedef unsigned char *sk_buff_data_t;
|
||||
* @protocol: Packet protocol from driver
|
||||
* @destructor: Destruct function
|
||||
* @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
|
||||
* @_sk_redir: socket redirection information for skmsg
|
||||
* @_nfct: Associated connection, if any (with nfctinfo bits)
|
||||
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
|
||||
* @skb_iif: ifindex of device we arrived on
|
||||
@ -755,6 +756,9 @@ struct sk_buff {
|
||||
void (*destructor)(struct sk_buff *skb);
|
||||
};
|
||||
struct list_head tcp_tsorted_anchor;
|
||||
#ifdef CONFIG_NET_SOCK_MSG
|
||||
unsigned long _sk_redir;
|
||||
#endif
|
||||
};
|
||||
|
||||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
|
@ -56,8 +56,8 @@ struct sk_msg {
|
||||
|
||||
struct sk_psock_progs {
|
||||
struct bpf_prog *msg_parser;
|
||||
struct bpf_prog *skb_parser;
|
||||
struct bpf_prog *skb_verdict;
|
||||
struct bpf_prog *stream_parser;
|
||||
struct bpf_prog *stream_verdict;
|
||||
};
|
||||
|
||||
enum sk_psock_state_bits {
|
||||
@ -70,12 +70,6 @@ struct sk_psock_link {
|
||||
void *link_raw;
|
||||
};
|
||||
|
||||
struct sk_psock_parser {
|
||||
struct strparser strp;
|
||||
bool enabled;
|
||||
void (*saved_data_ready)(struct sock *sk);
|
||||
};
|
||||
|
||||
struct sk_psock_work_state {
|
||||
struct sk_buff *skb;
|
||||
u32 len;
|
||||
@ -90,7 +84,9 @@ struct sk_psock {
|
||||
u32 eval;
|
||||
struct sk_msg *cork;
|
||||
struct sk_psock_progs progs;
|
||||
struct sk_psock_parser parser;
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
struct strparser strp;
|
||||
#endif
|
||||
struct sk_buff_head ingress_skb;
|
||||
struct list_head ingress_msg;
|
||||
unsigned long state;
|
||||
@ -100,6 +96,7 @@ struct sk_psock {
|
||||
void (*saved_unhash)(struct sock *sk);
|
||||
void (*saved_close)(struct sock *sk, long timeout);
|
||||
void (*saved_write_space)(struct sock *sk);
|
||||
void (*saved_data_ready)(struct sock *sk);
|
||||
struct proto *sk_proto;
|
||||
struct sk_psock_work_state work_state;
|
||||
struct work_struct work;
|
||||
@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
|
||||
|
||||
struct sk_psock *sk_psock_init(struct sock *sk, int node);
|
||||
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
|
||||
#else
|
||||
static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
|
||||
|
||||
@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
|
||||
|
||||
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
|
||||
|
||||
void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
|
||||
|
||||
static inline void sk_psock_cork_free(struct sk_psock *psock)
|
||||
{
|
||||
if (psock->cork) {
|
||||
@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
|
||||
return psock;
|
||||
}
|
||||
|
||||
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
|
||||
|
||||
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
|
||||
@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
|
||||
|
||||
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
if (psock->parser.enabled)
|
||||
psock->parser.saved_data_ready(sk);
|
||||
if (psock->saved_data_ready)
|
||||
psock->saved_data_ready(sk);
|
||||
else
|
||||
sk->sk_data_ready(sk);
|
||||
}
|
||||
@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
|
||||
static inline void psock_progs_drop(struct sk_psock_progs *progs)
|
||||
{
|
||||
psock_set_prog(&progs->msg_parser, NULL);
|
||||
psock_set_prog(&progs->skb_parser, NULL);
|
||||
psock_set_prog(&progs->skb_verdict, NULL);
|
||||
psock_set_prog(&progs->stream_parser, NULL);
|
||||
psock_set_prog(&progs->stream_verdict, NULL);
|
||||
}
|
||||
|
||||
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
|
||||
@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
|
||||
{
|
||||
if (!psock)
|
||||
return false;
|
||||
return psock->parser.enabled;
|
||||
return !!psock->saved_data_ready;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
|
||||
|
||||
/* We only have one bit so far. */
|
||||
#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
|
||||
|
||||
static inline bool skb_bpf_ingress(const struct sk_buff *skb)
|
||||
{
|
||||
unsigned long sk_redir = skb->_sk_redir;
|
||||
|
||||
return sk_redir & BPF_F_INGRESS;
|
||||
}
|
||||
|
||||
static inline void skb_bpf_set_ingress(struct sk_buff *skb)
|
||||
{
|
||||
skb->_sk_redir |= BPF_F_INGRESS;
|
||||
}
|
||||
|
||||
static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
|
||||
bool ingress)
|
||||
{
|
||||
skb->_sk_redir = (unsigned long)sk_redir;
|
||||
if (ingress)
|
||||
skb->_sk_redir |= BPF_F_INGRESS;
|
||||
}
|
||||
|
||||
static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
|
||||
{
|
||||
unsigned long sk_redir = skb->_sk_redir;
|
||||
|
||||
return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
|
||||
}
|
||||
|
||||
static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
|
||||
{
|
||||
skb->_sk_redir = 0;
|
||||
}
|
||||
#endif /* CONFIG_NET_SOCK_MSG */
|
||||
#endif /* _LINUX_SKMSG_H */
|
||||
|
@ -883,36 +883,11 @@ struct tcp_skb_cb {
|
||||
struct inet6_skb_parm h6;
|
||||
#endif
|
||||
} header; /* For incoming skbs */
|
||||
struct {
|
||||
__u32 flags;
|
||||
struct sock *sk_redir;
|
||||
void *data_end;
|
||||
} bpf;
|
||||
};
|
||||
};
|
||||
|
||||
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
|
||||
|
||||
static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
|
||||
{
|
||||
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
|
||||
}
|
||||
|
||||
static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
|
||||
{
|
||||
return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
|
||||
}
|
||||
|
||||
static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
|
||||
{
|
||||
return TCP_SKB_CB(skb)->bpf.sk_redir;
|
||||
}
|
||||
|
||||
static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
|
||||
{
|
||||
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
|
||||
}
|
||||
|
||||
extern const struct inet_connection_sock_af_ops ipv4_specific;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
|
||||
__MODULE_INFO(alias, alias_userspace, name); \
|
||||
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
|
||||
|
||||
#ifdef CONFIG_NET_SOCK_MSG
|
||||
struct sk_msg;
|
||||
struct sk_psock;
|
||||
|
||||
#ifdef CONFIG_BPF_STREAM_PARSER
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
|
||||
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
|
||||
#else
|
||||
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BPF_STREAM_PARSER */
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
||||
#ifdef CONFIG_NET_SOCK_MSG
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
|
||||
int flags);
|
||||
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
|
||||
struct msghdr *msg, int len, int flags);
|
||||
#endif /* CONFIG_NET_SOCK_MSG */
|
||||
|
||||
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
|
||||
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
||||
struct sk_buff *skb,
|
||||
|
@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
|
||||
return segs;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_STREAM_PARSER
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
struct sk_psock;
|
||||
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
|
||||
#endif /* BPF_STREAM_PARSER */
|
||||
#endif
|
||||
|
||||
#endif /* _UDP_H */
|
||||
|
@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
|
||||
void __xsk_map_flush(void);
|
||||
|
||||
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct xdp_sock *xs;
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
xs = READ_ONCE(m->xsk_map[key]);
|
||||
return xs;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_XDP_SOCKETS */
|
||||
|
||||
#endif /* _LINUX_XDP_SOCK_H */
|
||||
|
@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
|
||||
};
|
||||
#endif /* __DEVMAP_OBJ_TYPE */
|
||||
|
||||
#define devmap_ifindex(tgt, map) \
|
||||
(((map->map_type == BPF_MAP_TYPE_DEVMAP || \
|
||||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
|
||||
((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
|
||||
|
||||
DECLARE_EVENT_CLASS(xdp_redirect_template,
|
||||
|
||||
TP_PROTO(const struct net_device *dev,
|
||||
const struct bpf_prog *xdp,
|
||||
const void *tgt, int err,
|
||||
const struct bpf_map *map, u32 index),
|
||||
enum bpf_map_type map_type,
|
||||
u32 map_id, u32 index),
|
||||
|
||||
TP_ARGS(dev, xdp, tgt, err, map, index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, prog_id)
|
||||
@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
u32 ifindex = 0, map_index = index;
|
||||
|
||||
if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
|
||||
ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
|
||||
} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
|
||||
ifindex = index;
|
||||
map_index = 0;
|
||||
}
|
||||
|
||||
__entry->prog_id = xdp->aux->id;
|
||||
__entry->act = XDP_REDIRECT;
|
||||
__entry->ifindex = dev->ifindex;
|
||||
__entry->err = err;
|
||||
__entry->to_ifindex = map ? devmap_ifindex(tgt, map) :
|
||||
index;
|
||||
__entry->map_id = map ? map->id : 0;
|
||||
__entry->map_index = map ? index : 0;
|
||||
__entry->to_ifindex = ifindex;
|
||||
__entry->map_id = map_id;
|
||||
__entry->map_index = map_index;
|
||||
),
|
||||
|
||||
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
|
||||
@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
|
||||
TP_PROTO(const struct net_device *dev,
|
||||
const struct bpf_prog *xdp,
|
||||
const void *tgt, int err,
|
||||
const struct bpf_map *map, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map, index)
|
||||
enum bpf_map_type map_type,
|
||||
u32 map_id, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
|
||||
TP_PROTO(const struct net_device *dev,
|
||||
const struct bpf_prog *xdp,
|
||||
const void *tgt, int err,
|
||||
const struct bpf_map *map, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map, index)
|
||||
enum bpf_map_type map_type,
|
||||
u32 map_id, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
|
||||
);
|
||||
|
||||
#define _trace_xdp_redirect(dev, xdp, to) \
|
||||
trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
|
||||
#define _trace_xdp_redirect(dev, xdp, to) \
|
||||
trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
|
||||
|
||||
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
|
||||
trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
|
||||
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
|
||||
trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
|
||||
|
||||
#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
|
||||
trace_xdp_redirect(dev, xdp, to, 0, map, index)
|
||||
#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
|
||||
trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
|
||||
|
||||
#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
|
||||
trace_xdp_redirect_err(dev, xdp, to, err, map, index)
|
||||
#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
|
||||
trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
|
||||
|
||||
/* not used anymore, but kept around so as not to break old programs */
|
||||
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
|
||||
TP_PROTO(const struct net_device *dev,
|
||||
const struct bpf_prog *xdp,
|
||||
const void *tgt, int err,
|
||||
const struct bpf_map *map, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map, index)
|
||||
enum bpf_map_type map_type,
|
||||
u32 map_id, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
|
||||
TP_PROTO(const struct net_device *dev,
|
||||
const struct bpf_prog *xdp,
|
||||
const void *tgt, int err,
|
||||
const struct bpf_map *map, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map, index)
|
||||
enum bpf_map_type map_type,
|
||||
u32 map_id, u32 index),
|
||||
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xdp_cpumap_kthread,
|
||||
|
@ -93,7 +93,717 @@ union bpf_iter_link_info {
|
||||
} map;
|
||||
};
|
||||
|
||||
/* BPF syscall commands, see bpf(2) man-page for details. */
|
||||
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
||||
/**
|
||||
* DOC: eBPF Syscall Preamble
|
||||
*
|
||||
* The operation to be performed by the **bpf**\ () system call is determined
|
||||
* by the *cmd* argument. Each operation takes an accompanying argument,
|
||||
* provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
|
||||
* below). The size argument is the size of the union pointed to by *attr*.
|
||||
*/
|
||||
/**
|
||||
* DOC: eBPF Syscall Commands
|
||||
*
|
||||
* BPF_MAP_CREATE
|
||||
* Description
|
||||
* Create a map and return a file descriptor that refers to the
|
||||
* map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
|
||||
* is automatically enabled for the new file descriptor.
|
||||
*
|
||||
* Applying **close**\ (2) to the file descriptor returned by
|
||||
* **BPF_MAP_CREATE** will delete the map (but see NOTES).
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_MAP_LOOKUP_ELEM
|
||||
* Description
|
||||
* Look up an element with a given *key* in the map referred to
|
||||
* by the file descriptor *map_fd*.
|
||||
*
|
||||
* The *flags* argument may be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up the value of a spin-locked map without
|
||||
* returning the lock. This must be specified if the
|
||||
* elements contain a spinlock.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_UPDATE_ELEM
|
||||
* Description
|
||||
* Create or update an element (key/value pair) in a specified map.
|
||||
*
|
||||
* The *flags* argument should be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_ANY**
|
||||
* Create a new element or update an existing element.
|
||||
* **BPF_NOEXIST**
|
||||
* Create a new element only if it did not exist.
|
||||
* **BPF_EXIST**
|
||||
* Update an existing element.
|
||||
* **BPF_F_LOCK**
|
||||
* Update a spin_lock-ed map element.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
|
||||
* **E2BIG**, **EEXIST**, or **ENOENT**.
|
||||
*
|
||||
* **E2BIG**
|
||||
* The number of elements in the map reached the
|
||||
* *max_entries* limit specified at map creation time.
|
||||
* **EEXIST**
|
||||
* If *flags* specifies **BPF_NOEXIST** and the element
|
||||
* with *key* already exists in the map.
|
||||
* **ENOENT**
|
||||
* If *flags* specifies **BPF_EXIST** and the element with
|
||||
* *key* does not exist in the map.
|
||||
*
|
||||
* BPF_MAP_DELETE_ELEM
|
||||
* Description
|
||||
* Look up and delete an element by key in a specified map.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_GET_NEXT_KEY
|
||||
* Description
|
||||
* Look up an element by key in a specified map and return the key
|
||||
* of the next element. Can be used to iterate over all elements
|
||||
* in the map.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* The following cases can be used to iterate over all elements of
|
||||
* the map:
|
||||
*
|
||||
* * If *key* is not found, the operation returns zero and sets
|
||||
* the *next_key* pointer to the key of the first element.
|
||||
* * If *key* is found, the operation returns zero and sets the
|
||||
* *next_key* pointer to the key of the next element.
|
||||
* * If *key* is the last element, returns -1 and *errno* is set
|
||||
* to **ENOENT**.
|
||||
*
|
||||
* May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
|
||||
* **EINVAL** on error.
|
||||
*
|
||||
* BPF_PROG_LOAD
|
||||
* Description
|
||||
* Verify and load an eBPF program, returning a new file
|
||||
* descriptor associated with the program.
|
||||
*
|
||||
* Applying **close**\ (2) to the file descriptor returned by
|
||||
* **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
|
||||
*
|
||||
* The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
|
||||
* automatically enabled for the new file descriptor.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_OBJ_PIN
|
||||
* Description
|
||||
* Pin an eBPF program or map referred by the specified *bpf_fd*
|
||||
* to the provided *pathname* on the filesystem.
|
||||
*
|
||||
* The *pathname* argument must not contain a dot (".").
|
||||
*
|
||||
* On success, *pathname* retains a reference to the eBPF object,
|
||||
* preventing deallocation of the object when the original
|
||||
* *bpf_fd* is closed. This allow the eBPF object to live beyond
|
||||
* **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
|
||||
* process.
|
||||
*
|
||||
* Applying **unlink**\ (2) or similar calls to the *pathname*
|
||||
* unpins the object from the filesystem, removing the reference.
|
||||
* If no other file descriptors or filesystem nodes refer to the
|
||||
* same object, it will be deallocated (see NOTES).
|
||||
*
|
||||
* The filesystem type for the parent directory of *pathname* must
|
||||
* be **BPF_FS_MAGIC**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_OBJ_GET
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF object pinned to the
|
||||
* specified *pathname*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_PROG_ATTACH
|
||||
* Description
|
||||
* Attach an eBPF program to a *target_fd* at the specified
|
||||
* *attach_type* hook.
|
||||
*
|
||||
* The *attach_type* specifies the eBPF attachment point to
|
||||
* attach the program to, and must be one of *bpf_attach_type*
|
||||
* (see below).
|
||||
*
|
||||
* The *attach_bpf_fd* must be a valid file descriptor for a
|
||||
* loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
|
||||
* or sock_ops type corresponding to the specified *attach_type*.
|
||||
*
|
||||
* The *target_fd* must be a valid file descriptor for a kernel
|
||||
* object which depends on the attach type of *attach_bpf_fd*:
|
||||
*
|
||||
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SKB**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
|
||||
* **BPF_PROG_TYPE_SOCK_OPS**
|
||||
*
|
||||
* Control Group v2 hierarchy with the eBPF controller
|
||||
* enabled. Requires the kernel to be compiled with
|
||||
* **CONFIG_CGROUP_BPF**.
|
||||
*
|
||||
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
|
||||
*
|
||||
* Network namespace (eg /proc/self/ns/net).
|
||||
*
|
||||
* **BPF_PROG_TYPE_LIRC_MODE2**
|
||||
*
|
||||
* LIRC device path (eg /dev/lircN). Requires the kernel
|
||||
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
|
||||
*
|
||||
* **BPF_PROG_TYPE_SK_SKB**,
|
||||
* **BPF_PROG_TYPE_SK_MSG**
|
||||
*
|
||||
* eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_DETACH
|
||||
* Description
|
||||
* Detach the eBPF program associated with the *target_fd* at the
|
||||
* hook specified by *attach_type*. The program must have been
|
||||
* previously attached using **BPF_PROG_ATTACH**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_TEST_RUN
|
||||
* Description
|
||||
* Run the eBPF program associated with the *prog_fd* a *repeat*
|
||||
* number of times against a provided program context *ctx_in* and
|
||||
* data *data_in*, and return the modified program context
|
||||
* *ctx_out*, *data_out* (for example, packet data), result of the
|
||||
* execution *retval*, and *duration* of the test run.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* **ENOSPC**
|
||||
* Either *data_size_out* or *ctx_size_out* is too small.
|
||||
* **ENOTSUPP**
|
||||
* This command is not supported by the program type of
|
||||
* the program referred to by *prog_fd*.
|
||||
*
|
||||
* BPF_PROG_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next eBPF program currently loaded into the kernel.
|
||||
*
|
||||
* Looks for the eBPF program with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other eBPF programs
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next eBPF map currently loaded into the kernel.
|
||||
*
|
||||
* Looks for the eBPF map with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other eBPF maps
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF program corresponding to
|
||||
* *prog_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_MAP_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF map corresponding to
|
||||
* *map_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_OBJ_GET_INFO_BY_FD
|
||||
* Description
|
||||
* Obtain information about the eBPF object corresponding to
|
||||
* *bpf_fd*.
|
||||
*
|
||||
* Populates up to *info_len* bytes of *info*, which will be in
|
||||
* one of the following formats depending on the eBPF object type
|
||||
* of *bpf_fd*:
|
||||
*
|
||||
* * **struct bpf_prog_info**
|
||||
* * **struct bpf_map_info**
|
||||
* * **struct bpf_btf_info**
|
||||
* * **struct bpf_link_info**
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_QUERY
|
||||
* Description
|
||||
* Obtain information about eBPF programs associated with the
|
||||
* specified *attach_type* hook.
|
||||
*
|
||||
* The *target_fd* must be a valid file descriptor for a kernel
|
||||
* object which depends on the attach type of *attach_bpf_fd*:
|
||||
*
|
||||
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SKB**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
|
||||
* **BPF_PROG_TYPE_SOCK_OPS**
|
||||
*
|
||||
* Control Group v2 hierarchy with the eBPF controller
|
||||
* enabled. Requires the kernel to be compiled with
|
||||
* **CONFIG_CGROUP_BPF**.
|
||||
*
|
||||
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
|
||||
*
|
||||
* Network namespace (eg /proc/self/ns/net).
|
||||
*
|
||||
* **BPF_PROG_TYPE_LIRC_MODE2**
|
||||
*
|
||||
* LIRC device path (eg /dev/lircN). Requires the kernel
|
||||
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
|
||||
*
|
||||
* **BPF_PROG_QUERY** always fetches the number of programs
|
||||
* attached and the *attach_flags* which were used to attach those
|
||||
* programs. Additionally, if *prog_ids* is nonzero and the number
|
||||
* of attached programs is less than *prog_cnt*, populates
|
||||
* *prog_ids* with the eBPF program ids of the programs attached
|
||||
* at *target_fd*.
|
||||
*
|
||||
* The following flags may alter the result:
|
||||
*
|
||||
* **BPF_F_QUERY_EFFECTIVE**
|
||||
* Only return information regarding programs which are
|
||||
* currently effective at the specified *target_fd*.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_RAW_TRACEPOINT_OPEN
|
||||
* Description
|
||||
* Attach an eBPF program to a tracepoint *name* to access kernel
|
||||
* internal arguments of the tracepoint in their raw form.
|
||||
*
|
||||
* The *prog_fd* must be a valid file descriptor associated with
|
||||
* a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
|
||||
*
|
||||
* No ABI guarantees are made about the content of tracepoint
|
||||
* arguments exposed to the corresponding eBPF program.
|
||||
*
|
||||
* Applying **close**\ (2) to the file descriptor returned by
|
||||
* **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_BTF_LOAD
|
||||
* Description
|
||||
* Verify and load BPF Type Format (BTF) metadata into the kernel,
|
||||
* returning a new file descriptor associated with the metadata.
|
||||
* BTF is described in more detail at
|
||||
* https://www.kernel.org/doc/html/latest/bpf/btf.html.
|
||||
*
|
||||
* The *btf* parameter must point to valid memory providing
|
||||
* *btf_size* bytes of BTF binary metadata.
|
||||
*
|
||||
* The returned file descriptor can be passed to other **bpf**\ ()
|
||||
* subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
|
||||
* associate the BTF with those objects.
|
||||
*
|
||||
* Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
|
||||
* parameters to specify a *btf_log_buf*, *btf_log_size* and
|
||||
* *btf_log_level* which allow the kernel to return freeform log
|
||||
* output regarding the BTF verification process.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_BTF_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the BPF Type Format (BTF)
|
||||
* corresponding to *btf_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_TASK_FD_QUERY
|
||||
* Description
|
||||
* Obtain information about eBPF programs associated with the
|
||||
* target process identified by *pid* and *fd*.
|
||||
*
|
||||
* If the *pid* and *fd* are associated with a tracepoint, kprobe
|
||||
* or uprobe perf event, then the *prog_id* and *fd_type* will
|
||||
* be populated with the eBPF program id and file descriptor type
|
||||
* of type **bpf_task_fd_type**. If associated with a kprobe or
|
||||
* uprobe, the *probe_offset* and *probe_addr* will also be
|
||||
* populated. Optionally, if *buf* is provided, then up to
|
||||
* *buf_len* bytes of *buf* will be populated with the name of
|
||||
* the tracepoint, kprobe or uprobe.
|
||||
*
|
||||
* The resulting *prog_id* may be introspected in deeper detail
|
||||
* using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_LOOKUP_AND_DELETE_ELEM
|
||||
* Description
|
||||
* Look up an element with the given *key* in the map referred to
|
||||
* by the file descriptor *fd*, and if found, delete the element.
|
||||
*
|
||||
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
|
||||
* implement this command as a "pop" operation, deleting the top
|
||||
* element rather than one corresponding to *key*.
|
||||
* The *key* and *key_len* parameters should be zeroed when
|
||||
* issuing this operation for these map types.
|
||||
*
|
||||
* This command is only valid for the following map types:
|
||||
* * **BPF_MAP_TYPE_QUEUE**
|
||||
* * **BPF_MAP_TYPE_STACK**
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_FREEZE
|
||||
* Description
|
||||
* Freeze the permissions of the specified map.
|
||||
*
|
||||
* Write permissions may be frozen by passing zero *flags*.
|
||||
* Upon success, no future syscall invocations may alter the
|
||||
* map state of *map_fd*. Write operations from eBPF programs
|
||||
* are still possible for a frozen map.
|
||||
*
|
||||
* Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_BTF_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next BPF Type Format (BTF) object currently loaded
|
||||
* into the kernel.
|
||||
*
|
||||
* Looks for the BTF object with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other BTF objects
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_LOOKUP_BATCH
|
||||
* Description
|
||||
* Iterate and fetch multiple elements in a map.
|
||||
*
|
||||
* Two opaque values are used to manage batch operations,
|
||||
* *in_batch* and *out_batch*. Initially, *in_batch* must be set
|
||||
* to NULL to begin the batched operation. After each subsequent
|
||||
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
|
||||
* *out_batch* as the *in_batch* for the next operation to
|
||||
* continue iteration from the current point.
|
||||
*
|
||||
* The *keys* and *values* are output parameters which must point
|
||||
* to memory large enough to hold *count* items based on the key
|
||||
* and value size of the map *map_fd*. The *keys* buffer must be
|
||||
* of *key_size* * *count*. The *values* buffer must be of
|
||||
* *value_size* * *count*.
|
||||
*
|
||||
* The *elem_flags* argument may be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up the value of a spin-locked map without
|
||||
* returning the lock. This must be specified if the
|
||||
* elements contain a spinlock.
|
||||
*
|
||||
* On success, *count* elements from the map are copied into the
|
||||
* user buffer, with the keys copied into *keys* and the values
|
||||
* copied into the corresponding indices in *values*.
|
||||
*
|
||||
* If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
* is set to the number of successfully processed elements.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* May set *errno* to **ENOSPC** to indicate that *keys* or
|
||||
* *values* is too small to dump an entire bucket during
|
||||
* iteration of a hash-based map type.
|
||||
*
|
||||
* BPF_MAP_LOOKUP_AND_DELETE_BATCH
|
||||
* Description
|
||||
* Iterate and delete all elements in a map.
|
||||
*
|
||||
* This operation has the same behavior as
|
||||
* **BPF_MAP_LOOKUP_BATCH** with two exceptions:
|
||||
*
|
||||
* * Every element that is successfully returned is also deleted
|
||||
* from the map. This is at least *count* elements. Note that
|
||||
* *count* is both an input and an output parameter.
|
||||
* * Upon returning with *errno* set to **EFAULT**, up to
|
||||
* *count* elements may be deleted without returning the keys
|
||||
* and values of the deleted elements.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_UPDATE_BATCH
|
||||
* Description
|
||||
* Update multiple elements in a map by *key*.
|
||||
*
|
||||
* The *keys* and *values* are input parameters which must point
|
||||
* to memory large enough to hold *count* items based on the key
|
||||
* and value size of the map *map_fd*. The *keys* buffer must be
|
||||
* of *key_size* * *count*. The *values* buffer must be of
|
||||
* *value_size* * *count*.
|
||||
*
|
||||
* Each element specified in *keys* is sequentially updated to the
|
||||
* value in the corresponding index in *values*. The *in_batch*
|
||||
* and *out_batch* parameters are ignored and should be zeroed.
|
||||
*
|
||||
* The *elem_flags* argument should be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_ANY**
|
||||
* Create new elements or update a existing elements.
|
||||
* **BPF_NOEXIST**
|
||||
* Create new elements only if they do not exist.
|
||||
* **BPF_EXIST**
|
||||
* Update existing elements.
|
||||
* **BPF_F_LOCK**
|
||||
* Update spin_lock-ed map elements. This must be
|
||||
* specified if the map value contains a spinlock.
|
||||
*
|
||||
* On success, *count* elements from the map are updated.
|
||||
*
|
||||
* If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
* is set to the number of successfully processed elements.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
|
||||
* **E2BIG**. **E2BIG** indicates that the number of elements in
|
||||
* the map reached the *max_entries* limit specified at map
|
||||
* creation time.
|
||||
*
|
||||
* May set *errno* to one of the following error codes under
|
||||
* specific circumstances:
|
||||
*
|
||||
* **EEXIST**
|
||||
* If *flags* specifies **BPF_NOEXIST** and the element
|
||||
* with *key* already exists in the map.
|
||||
* **ENOENT**
|
||||
* If *flags* specifies **BPF_EXIST** and the element with
|
||||
* *key* does not exist in the map.
|
||||
*
|
||||
* BPF_MAP_DELETE_BATCH
|
||||
* Description
|
||||
* Delete multiple elements in a map by *key*.
|
||||
*
|
||||
* The *keys* parameter is an input parameter which must point
|
||||
* to memory large enough to hold *count* items based on the key
|
||||
* size of the map *map_fd*, that is, *key_size* * *count*.
|
||||
*
|
||||
* Each element specified in *keys* is sequentially deleted. The
|
||||
* *in_batch*, *out_batch*, and *values* parameters are ignored
|
||||
* and should be zeroed.
|
||||
*
|
||||
* The *elem_flags* argument may be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up the value of a spin-locked map without
|
||||
* returning the lock. This must be specified if the
|
||||
* elements contain a spinlock.
|
||||
*
|
||||
* On success, *count* elements from the map are updated.
|
||||
*
|
||||
* If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
* is set to the number of successfully processed elements. If
|
||||
* *errno* is **EFAULT**, up to *count* elements may be been
|
||||
* deleted.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_LINK_CREATE
|
||||
* Description
|
||||
* Attach an eBPF program to a *target_fd* at the specified
|
||||
* *attach_type* hook and return a file descriptor handle for
|
||||
* managing the link.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_LINK_UPDATE
|
||||
* Description
|
||||
* Update the eBPF program in the specified *link_fd* to
|
||||
* *new_prog_fd*.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_LINK_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF Link corresponding to
|
||||
* *link_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_LINK_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next eBPF link currently loaded into the kernel.
|
||||
*
|
||||
* Looks for the eBPF link with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other eBPF links
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_ENABLE_STATS
|
||||
* Description
|
||||
* Enable eBPF runtime statistics gathering.
|
||||
*
|
||||
* Runtime statistics gathering for the eBPF runtime is disabled
|
||||
* by default to minimize the corresponding performance overhead.
|
||||
* This command enables statistics globally.
|
||||
*
|
||||
* Multiple programs may independently enable statistics.
|
||||
* After gathering the desired statistics, eBPF runtime statistics
|
||||
* may be disabled again by calling **close**\ (2) for the file
|
||||
* descriptor returned by this function. Statistics will only be
|
||||
* disabled system-wide when all outstanding file descriptors
|
||||
* returned by prior calls for this subcommand are closed.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_ITER_CREATE
|
||||
* Description
|
||||
* Create an iterator on top of the specified *link_fd* (as
|
||||
* previously created using **BPF_LINK_CREATE**) and return a
|
||||
* file descriptor that can be used to trigger the iteration.
|
||||
*
|
||||
* If the resulting file descriptor is pinned to the filesystem
|
||||
* using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
|
||||
* for that path will trigger the iterator to read kernel state
|
||||
* using the eBPF program attached to *link_fd*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_LINK_DETACH
|
||||
* Description
|
||||
* Forcefully detach the specified *link_fd* from its
|
||||
* corresponding attachment point.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_BIND_MAP
|
||||
* Description
|
||||
* Bind a map to the lifetime of an eBPF program.
|
||||
*
|
||||
* The map identified by *map_fd* is bound to the program
|
||||
* identified by *prog_fd* and only released when *prog_fd* is
|
||||
* released. This may be used in cases where metadata should be
|
||||
* associated with a program which otherwise does not contain any
|
||||
* references to the map (for example, embedded in the eBPF
|
||||
* program instructions).
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* NOTES
|
||||
* eBPF objects (maps and programs) can be shared between processes.
|
||||
*
|
||||
* * After **fork**\ (2), the child inherits file descriptors
|
||||
* referring to the same eBPF objects.
|
||||
* * File descriptors referring to eBPF objects can be transferred over
|
||||
* **unix**\ (7) domain sockets.
|
||||
* * File descriptors referring to eBPF objects can be duplicated in the
|
||||
* usual way, using **dup**\ (2) and similar calls.
|
||||
* * File descriptors referring to eBPF objects can be pinned to the
|
||||
* filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
|
||||
*
|
||||
* An eBPF object is deallocated only after all file descriptors referring
|
||||
* to the object have been closed and no references remain pinned to the
|
||||
* filesystem or attached (for example, bound to a program or device).
|
||||
*/
|
||||
enum bpf_cmd {
|
||||
BPF_MAP_CREATE,
|
||||
BPF_MAP_LOOKUP_ELEM,
|
||||
@ -393,6 +1103,15 @@ enum bpf_link_type {
|
||||
* is struct/union.
|
||||
*/
|
||||
#define BPF_PSEUDO_BTF_ID 3
|
||||
/* insn[0].src_reg: BPF_PSEUDO_FUNC
|
||||
* insn[0].imm: insn offset to the func
|
||||
* insn[1].imm: 0
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of the function
|
||||
* verifier type: PTR_TO_FUNC.
|
||||
*/
|
||||
#define BPF_PSEUDO_FUNC 4
|
||||
|
||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||
* offset to another bpf function
|
||||
@ -720,7 +1439,7 @@ union bpf_attr {
|
||||
* parsed and used to produce a manual page. The workflow is the following,
|
||||
* and requires the rst2man utility:
|
||||
*
|
||||
* $ ./scripts/bpf_helpers_doc.py \
|
||||
* $ ./scripts/bpf_doc.py \
|
||||
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
|
||||
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
|
||||
* $ man /tmp/bpf-helpers.7
|
||||
@ -1765,6 +2484,10 @@ union bpf_attr {
|
||||
* Use with ENCAP_L3/L4 flags to further specify the tunnel
|
||||
* type; *len* is the length of the inner MAC header.
|
||||
*
|
||||
* * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
|
||||
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
|
||||
* L2 type as Ethernet.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
@ -3909,6 +4632,34 @@ union bpf_attr {
|
||||
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
|
||||
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
|
||||
*
|
||||
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
|
||||
* Description
|
||||
* For each element in **map**, call **callback_fn** function with
|
||||
* **map**, **callback_ctx** and other map-specific parameters.
|
||||
* The **callback_fn** should be a static function and
|
||||
* the **callback_ctx** should be a pointer to the stack.
|
||||
* The **flags** is used to control certain aspects of the helper.
|
||||
* Currently, the **flags** must be 0.
|
||||
*
|
||||
* The following are a list of supported map types and their
|
||||
* respective expected callback signatures:
|
||||
*
|
||||
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
|
||||
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
||||
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
|
||||
*
|
||||
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
|
||||
*
|
||||
* For per_cpu maps, the map_value is the value on the cpu where the
|
||||
* bpf_prog is running.
|
||||
*
|
||||
* If **callback_fn** return 0, the helper will continue to the next
|
||||
* element. If return value is 1, the helper will skip the rest of
|
||||
* elements and return. Other return values are not used now.
|
||||
*
|
||||
* Return
|
||||
* The number of traversed map elements for success, **-EINVAL** for
|
||||
* invalid **flags**.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -4075,6 +4826,7 @@ union bpf_attr {
|
||||
FN(ima_inode_hash), \
|
||||
FN(sock_from_file), \
|
||||
FN(check_mtu), \
|
||||
FN(for_each_map_elem), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
@ -4168,6 +4920,7 @@ enum {
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -5205,7 +5958,10 @@ struct bpf_pidns_info {
|
||||
|
||||
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
|
||||
struct bpf_sk_lookup {
|
||||
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
|
||||
union {
|
||||
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
|
||||
__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
|
||||
};
|
||||
|
||||
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
|
||||
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
|
||||
|
@ -52,7 +52,7 @@ struct btf_type {
|
||||
};
|
||||
};
|
||||
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
|
||||
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
|
||||
#define BTF_INFO_KFLAG(info) ((info) >> 31)
|
||||
|
||||
@ -72,7 +72,8 @@ struct btf_type {
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
#define BTF_KIND_VAR 14 /* Variable */
|
||||
#define BTF_KIND_DATASEC 15 /* Section */
|
||||
#define BTF_KIND_MAX BTF_KIND_DATASEC
|
||||
#define BTF_KIND_FLOAT 16 /* Floating point */
|
||||
#define BTF_KIND_MAX BTF_KIND_FLOAT
|
||||
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
|
||||
|
||||
/* For some specific BTF_KIND, "struct btf_type" is immediately
|
||||
|
@ -1709,6 +1709,7 @@ config BPF_SYSCALL
|
||||
select BPF
|
||||
select IRQ_WORK
|
||||
select TASKS_TRACE_RCU
|
||||
select NET_SOCK_MSG if INET
|
||||
default n
|
||||
help
|
||||
Enable the bpf() system call that allows to manipulate eBPF
|
||||
|
@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
|
||||
obj-$(CONFIG_BPF_JIT) += trampoline.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf.o
|
||||
@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
|
||||
ifeq ($(CONFIG_NET),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += offload.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
|
||||
endif
|
||||
|
@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
|
||||
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
|
||||
};
|
||||
|
||||
static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
|
||||
void *callback_ctx, u64 flags)
|
||||
{
|
||||
u32 i, key, num_elems = 0;
|
||||
struct bpf_array *array;
|
||||
bool is_percpu;
|
||||
u64 ret = 0;
|
||||
void *val;
|
||||
|
||||
if (flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
|
||||
array = container_of(map, struct bpf_array, map);
|
||||
if (is_percpu)
|
||||
migrate_disable();
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
if (is_percpu)
|
||||
val = this_cpu_ptr(array->pptrs[i]);
|
||||
else
|
||||
val = array->value + array->elem_size * i;
|
||||
num_elems++;
|
||||
key = i;
|
||||
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
|
||||
(u64)(long)&key, (u64)(long)val,
|
||||
(u64)(long)callback_ctx, 0);
|
||||
/* return value: 0 - continue, 1 - stop and return */
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_percpu)
|
||||
migrate_enable();
|
||||
return num_elems;
|
||||
}
|
||||
|
||||
static int array_map_btf_id;
|
||||
const struct bpf_map_ops array_map_ops = {
|
||||
.map_meta_equal = array_map_meta_equal,
|
||||
@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
|
||||
.map_check_btf = array_map_check_btf,
|
||||
.map_lookup_batch = generic_map_lookup_batch,
|
||||
.map_update_batch = generic_map_update_batch,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_array_elem,
|
||||
.map_btf_name = "bpf_array",
|
||||
.map_btf_id = &array_map_btf_id,
|
||||
.iter_seq_info = &iter_seq_info,
|
||||
@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
|
||||
.map_delete_elem = array_map_delete_elem,
|
||||
.map_seq_show_elem = percpu_array_map_seq_show_elem,
|
||||
.map_check_btf = array_map_check_btf,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_array_elem,
|
||||
.map_btf_name = "bpf_array",
|
||||
.map_btf_id = &percpu_array_map_btf_id,
|
||||
.iter_seq_info = &iter_seq_info,
|
||||
|
@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map)
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
|
||||
bpf_local_storage_map_free(smap);
|
||||
bpf_local_storage_map_free(smap, NULL);
|
||||
}
|
||||
|
||||
static int inode_storage_map_btf_id;
|
||||
|
@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
|
||||
*/
|
||||
return ret == 0 ? 0 : -EAGAIN;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
|
||||
void *, callback_ctx, u64, flags)
|
||||
{
|
||||
return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_for_each_map_elem_proto = {
|
||||
.func = bpf_for_each_map_elem,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_FUNC,
|
||||
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool free_local_storage = false;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!selem_linked_to_storage(selem)))
|
||||
/* selem has already been unlinked from sk */
|
||||
return;
|
||||
|
||||
local_storage = rcu_dereference(selem->local_storage);
|
||||
raw_spin_lock_bh(&local_storage->lock);
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
if (likely(selem_linked_to_storage(selem)))
|
||||
free_local_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, true);
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
if (free_local_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
struct bpf_local_storage_map_bucket *b;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!selem_linked_to_map(selem)))
|
||||
/* selem has already be unlinked from smap */
|
||||
@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
b = select_bucket(smap, selem);
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
if (likely(selem_linked_to_map(selem)))
|
||||
hlist_del_init_rcu(&selem->map_node);
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
}
|
||||
|
||||
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
|
||||
struct bpf_local_storage_elem *selem)
|
||||
{
|
||||
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
|
||||
hlist_add_head_rcu(&selem->map_node, &b->list);
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
}
|
||||
|
||||
void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
|
||||
@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
|
||||
|
||||
sdata = SDATA(selem);
|
||||
if (cacheit_lockit) {
|
||||
unsigned long flags;
|
||||
|
||||
/* spinlock is needed to avoid racing with the
|
||||
* parallel delete. Otherwise, publishing an already
|
||||
* deleted sdata to the cache will become a use-after-free
|
||||
* problem in the next bpf_local_storage_lookup().
|
||||
*/
|
||||
raw_spin_lock_bh(&local_storage->lock);
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
if (selem_linked_to_storage(selem))
|
||||
rcu_assign_pointer(local_storage->cache[smap->cache_idx],
|
||||
sdata);
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
}
|
||||
|
||||
return sdata;
|
||||
@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
struct bpf_local_storage_data *old_sdata = NULL;
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage *local_storage;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
|
||||
@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_lock_bh(&local_storage->lock);
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
|
||||
/* Recheck local_storage->list under local_storage->lock */
|
||||
if (unlikely(hlist_empty(&local_storage->list))) {
|
||||
@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
}
|
||||
|
||||
unlock:
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
return SDATA(selem);
|
||||
|
||||
unlock_err:
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@ -468,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
|
||||
spin_unlock(&cache->idx_lock);
|
||||
}
|
||||
|
||||
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
|
||||
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
|
||||
int __percpu *busy_counter)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_map_bucket *b;
|
||||
@ -497,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
|
||||
while ((selem = hlist_entry_safe(
|
||||
rcu_dereference_raw(hlist_first_rcu(&b->list)),
|
||||
struct bpf_local_storage_elem, map_node))) {
|
||||
if (busy_counter) {
|
||||
migrate_disable();
|
||||
__this_cpu_inc(*busy_counter);
|
||||
}
|
||||
bpf_selem_unlink(selem);
|
||||
if (busy_counter) {
|
||||
__this_cpu_dec(*busy_counter);
|
||||
migrate_enable();
|
||||
}
|
||||
cond_resched_rcu();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_spin_lock_proto;
|
||||
case BPF_FUNC_spin_unlock:
|
||||
return &bpf_spin_unlock_proto;
|
||||
case BPF_FUNC_task_storage_get:
|
||||
return &bpf_task_storage_get_proto;
|
||||
case BPF_FUNC_task_storage_delete:
|
||||
return &bpf_task_storage_delete_proto;
|
||||
case BPF_FUNC_bprm_opts_set:
|
||||
return &bpf_bprm_opts_set_proto;
|
||||
case BPF_FUNC_ima_inode_hash:
|
||||
|
@ -15,21 +15,41 @@
|
||||
#include <linux/bpf_local_storage.h>
|
||||
#include <linux/filter.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/fdtable.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(task_cache);
|
||||
|
||||
DEFINE_PER_CPU(int, bpf_task_storage_busy);
|
||||
|
||||
static void bpf_task_storage_lock(void)
|
||||
{
|
||||
migrate_disable();
|
||||
__this_cpu_inc(bpf_task_storage_busy);
|
||||
}
|
||||
|
||||
static void bpf_task_storage_unlock(void)
|
||||
{
|
||||
__this_cpu_dec(bpf_task_storage_busy);
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
static bool bpf_task_storage_trylock(void)
|
||||
{
|
||||
migrate_disable();
|
||||
if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
|
||||
__this_cpu_dec(bpf_task_storage_busy);
|
||||
migrate_enable();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
|
||||
{
|
||||
struct task_struct *task = owner;
|
||||
struct bpf_storage_blob *bsb;
|
||||
|
||||
bsb = bpf_task(task);
|
||||
if (!bsb)
|
||||
return NULL;
|
||||
return &bsb->storage;
|
||||
return &task->bpf_storage;
|
||||
}
|
||||
|
||||
static struct bpf_local_storage_data *
|
||||
@ -38,13 +58,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
|
||||
{
|
||||
struct bpf_local_storage *task_storage;
|
||||
struct bpf_local_storage_map *smap;
|
||||
struct bpf_storage_blob *bsb;
|
||||
|
||||
bsb = bpf_task(task);
|
||||
if (!bsb)
|
||||
return NULL;
|
||||
|
||||
task_storage = rcu_dereference(bsb->storage);
|
||||
task_storage = rcu_dereference(task->bpf_storage);
|
||||
if (!task_storage)
|
||||
return NULL;
|
||||
|
||||
@ -57,16 +72,12 @@ void bpf_task_storage_free(struct task_struct *task)
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool free_task_storage = false;
|
||||
struct bpf_storage_blob *bsb;
|
||||
struct hlist_node *n;
|
||||
|
||||
bsb = bpf_task(task);
|
||||
if (!bsb)
|
||||
return;
|
||||
unsigned long flags;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
local_storage = rcu_dereference(bsb->storage);
|
||||
local_storage = rcu_dereference(task->bpf_storage);
|
||||
if (!local_storage) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
@ -81,7 +92,8 @@ void bpf_task_storage_free(struct task_struct *task)
|
||||
* when unlinking elem from the local_storage->list and
|
||||
* the map's bucket->list.
|
||||
*/
|
||||
raw_spin_lock_bh(&local_storage->lock);
|
||||
bpf_task_storage_lock();
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
|
||||
/* Always unlink from map before unlinking from
|
||||
* local_storage.
|
||||
@ -90,7 +102,8 @@ void bpf_task_storage_free(struct task_struct *task)
|
||||
free_task_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, false);
|
||||
}
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
bpf_task_storage_unlock();
|
||||
rcu_read_unlock();
|
||||
|
||||
/* free_task_storage should always be true as long as
|
||||
@ -123,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
|
||||
goto out;
|
||||
}
|
||||
|
||||
bpf_task_storage_lock();
|
||||
sdata = task_storage_lookup(task, map, true);
|
||||
bpf_task_storage_unlock();
|
||||
put_pid(pid);
|
||||
return sdata ? sdata->data : NULL;
|
||||
out:
|
||||
@ -150,13 +165,15 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
|
||||
*/
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
task = pid_task(pid, PIDTYPE_PID);
|
||||
if (!task || !task_storage_ptr(task)) {
|
||||
if (!task) {
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bpf_task_storage_lock();
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value, map_flags);
|
||||
bpf_task_storage_unlock();
|
||||
|
||||
err = PTR_ERR_OR_ZERO(sdata);
|
||||
out:
|
||||
@ -199,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
goto out;
|
||||
}
|
||||
|
||||
bpf_task_storage_lock();
|
||||
err = task_storage_delete(task, map);
|
||||
bpf_task_storage_unlock();
|
||||
out:
|
||||
put_pid(pid);
|
||||
return err;
|
||||
@ -213,44 +232,47 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
/* explicitly check that the task_storage_ptr is not
|
||||
* NULL as task_storage_lookup returns NULL in this case and
|
||||
* bpf_local_storage_update expects the owner to have a
|
||||
* valid storage pointer.
|
||||
*/
|
||||
if (!task || !task_storage_ptr(task))
|
||||
if (!task)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
if (!bpf_task_storage_trylock())
|
||||
return (unsigned long)NULL;
|
||||
|
||||
sdata = task_storage_lookup(task, map, true);
|
||||
if (sdata)
|
||||
return (unsigned long)sdata->data;
|
||||
goto unlock;
|
||||
|
||||
/* This helper must only be called from places where the lifetime of the task
|
||||
* is guaranteed. Either by being refcounted or by being protected
|
||||
* by an RCU read-side critical section.
|
||||
*/
|
||||
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
|
||||
/* only allocate new storage, when the task is refcounted */
|
||||
if (refcount_read(&task->usage) &&
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST);
|
||||
return IS_ERR(sdata) ? (unsigned long)NULL :
|
||||
(unsigned long)sdata->data;
|
||||
}
|
||||
|
||||
return (unsigned long)NULL;
|
||||
unlock:
|
||||
bpf_task_storage_unlock();
|
||||
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
|
||||
(unsigned long)sdata->data;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
|
||||
task)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!task)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bpf_task_storage_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
/* This helper must only be called from places where the lifetime of the task
|
||||
* is guaranteed. Either by being refcounted or by being protected
|
||||
* by an RCU read-side critical section.
|
||||
*/
|
||||
return task_storage_delete(task, map);
|
||||
ret = task_storage_delete(task, map);
|
||||
bpf_task_storage_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
@ -276,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map)
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
|
||||
bpf_local_storage_map_free(smap);
|
||||
bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
|
||||
}
|
||||
|
||||
static int task_storage_map_btf_id;
|
||||
|
108
kernel/bpf/btf.c
108
kernel/bpf/btf.c
@ -173,7 +173,7 @@
|
||||
#define BITS_ROUNDUP_BYTES(bits) \
|
||||
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
|
||||
|
||||
#define BTF_INFO_MASK 0x8f00ffff
|
||||
#define BTF_INFO_MASK 0x9f00ffff
|
||||
#define BTF_INT_MASK 0x0fffffff
|
||||
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
|
||||
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
|
||||
@ -280,6 +280,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
|
||||
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
|
||||
[BTF_KIND_VAR] = "VAR",
|
||||
[BTF_KIND_DATASEC] = "DATASEC",
|
||||
[BTF_KIND_FLOAT] = "FLOAT",
|
||||
};
|
||||
|
||||
static const char *btf_type_str(const struct btf_type *t)
|
||||
@ -574,6 +575,7 @@ static bool btf_type_has_size(const struct btf_type *t)
|
||||
case BTF_KIND_UNION:
|
||||
case BTF_KIND_ENUM:
|
||||
case BTF_KIND_DATASEC:
|
||||
case BTF_KIND_FLOAT:
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1704,6 +1706,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
||||
case BTF_KIND_STRUCT:
|
||||
case BTF_KIND_UNION:
|
||||
case BTF_KIND_ENUM:
|
||||
case BTF_KIND_FLOAT:
|
||||
size = type->size;
|
||||
goto resolved;
|
||||
|
||||
@ -1849,7 +1852,7 @@ static int btf_df_check_kflag_member(struct btf_verifier_env *env,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Used for ptr, array and struct/union type members.
|
||||
/* Used for ptr, array struct/union and float type members.
|
||||
* int, enum and modifier types have their specific callback functions.
|
||||
*/
|
||||
static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
|
||||
@ -3675,6 +3678,81 @@ static const struct btf_kind_operations datasec_ops = {
|
||||
.show = btf_datasec_show,
|
||||
};
|
||||
|
||||
static s32 btf_float_check_meta(struct btf_verifier_env *env,
|
||||
const struct btf_type *t,
|
||||
u32 meta_left)
|
||||
{
|
||||
if (btf_type_vlen(t)) {
|
||||
btf_verifier_log_type(env, t, "vlen != 0");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (btf_type_kflag(t)) {
|
||||
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
|
||||
t->size != 16) {
|
||||
btf_verifier_log_type(env, t, "Invalid type_size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
btf_verifier_log_type(env, t, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btf_float_check_member(struct btf_verifier_env *env,
|
||||
const struct btf_type *struct_type,
|
||||
const struct btf_member *member,
|
||||
const struct btf_type *member_type)
|
||||
{
|
||||
u64 start_offset_bytes;
|
||||
u64 end_offset_bytes;
|
||||
u64 misalign_bits;
|
||||
u64 align_bytes;
|
||||
u64 align_bits;
|
||||
|
||||
/* Different architectures have different alignment requirements, so
|
||||
* here we check only for the reasonable minimum. This way we ensure
|
||||
* that types after CO-RE can pass the kernel BTF verifier.
|
||||
*/
|
||||
align_bytes = min_t(u64, sizeof(void *), member_type->size);
|
||||
align_bits = align_bytes * BITS_PER_BYTE;
|
||||
div64_u64_rem(member->offset, align_bits, &misalign_bits);
|
||||
if (misalign_bits) {
|
||||
btf_verifier_log_member(env, struct_type, member,
|
||||
"Member is not properly aligned");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
start_offset_bytes = member->offset / BITS_PER_BYTE;
|
||||
end_offset_bytes = start_offset_bytes + member_type->size;
|
||||
if (end_offset_bytes > struct_type->size) {
|
||||
btf_verifier_log_member(env, struct_type, member,
|
||||
"Member exceeds struct_size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btf_float_log(struct btf_verifier_env *env,
|
||||
const struct btf_type *t)
|
||||
{
|
||||
btf_verifier_log(env, "size=%u", t->size);
|
||||
}
|
||||
|
||||
static const struct btf_kind_operations float_ops = {
|
||||
.check_meta = btf_float_check_meta,
|
||||
.resolve = btf_df_resolve,
|
||||
.check_member = btf_float_check_member,
|
||||
.check_kflag_member = btf_generic_check_kflag_member,
|
||||
.log_details = btf_float_log,
|
||||
.show = btf_df_show,
|
||||
};
|
||||
|
||||
static int btf_func_proto_check(struct btf_verifier_env *env,
|
||||
const struct btf_type *t)
|
||||
{
|
||||
@ -3808,6 +3886,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
|
||||
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
|
||||
[BTF_KIND_VAR] = &var_ops,
|
||||
[BTF_KIND_DATASEC] = &datasec_ops,
|
||||
[BTF_KIND_FLOAT] = &float_ops,
|
||||
};
|
||||
|
||||
static s32 btf_check_meta(struct btf_verifier_env *env,
|
||||
@ -4592,8 +4671,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
}
|
||||
arg = off / 8;
|
||||
args = (const struct btf_param *)(t + 1);
|
||||
/* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
|
||||
nr_args = t ? btf_type_vlen(t) : 5;
|
||||
/* if (t == NULL) Fall back to default BPF prog with
|
||||
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
|
||||
*/
|
||||
nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
|
||||
if (prog->aux->attach_btf_trace) {
|
||||
/* skip first 'void *__data' argument in btf_trace_##name typedef */
|
||||
args++;
|
||||
@ -4649,7 +4730,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
}
|
||||
} else {
|
||||
if (!t)
|
||||
/* Default prog with 5 args */
|
||||
/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
|
||||
return true;
|
||||
t = btf_type_by_id(btf, args[arg].type);
|
||||
}
|
||||
@ -5100,12 +5181,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
|
||||
|
||||
if (!func) {
|
||||
/* BTF function prototype doesn't match the verifier types.
|
||||
* Fall back to 5 u64 args.
|
||||
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
|
||||
*/
|
||||
for (i = 0; i < 5; i++)
|
||||
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
|
||||
m->arg_size[i] = 8;
|
||||
m->ret_size = 8;
|
||||
m->nr_args = 5;
|
||||
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
|
||||
return 0;
|
||||
}
|
||||
args = (const struct btf_param *)(func + 1);
|
||||
@ -5328,8 +5409,9 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
|
||||
}
|
||||
args = (const struct btf_param *)(t + 1);
|
||||
nargs = btf_type_vlen(t);
|
||||
if (nargs > 5) {
|
||||
bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
|
||||
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
|
||||
bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
|
||||
MAX_BPF_FUNC_REG_ARGS);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -5458,9 +5540,9 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
|
||||
}
|
||||
args = (const struct btf_param *)(t + 1);
|
||||
nargs = btf_type_vlen(t);
|
||||
if (nargs > 5) {
|
||||
bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n",
|
||||
tname, nargs);
|
||||
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
|
||||
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
|
||||
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* check that function returns int */
|
||||
|
@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
|
||||
* complete.
|
||||
*/
|
||||
|
||||
bpf_clear_redirect_map(map);
|
||||
synchronize_rcu();
|
||||
|
||||
/* For cpu_map the remote CPUs can still be using the entries
|
||||
@ -563,7 +562,7 @@ static void cpu_map_free(struct bpf_map *map)
|
||||
kfree(cmap);
|
||||
}
|
||||
|
||||
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
|
||||
struct bpf_cpu_map_entry *rcpu;
|
||||
@ -600,6 +599,11 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
|
||||
}
|
||||
|
||||
static int cpu_map_btf_id;
|
||||
const struct bpf_map_ops cpu_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
@ -612,6 +616,7 @@ const struct bpf_map_ops cpu_map_ops = {
|
||||
.map_check_btf = map_check_no_btf,
|
||||
.map_btf_name = "bpf_cpu_map",
|
||||
.map_btf_id = &cpu_map_btf_id,
|
||||
.map_redirect = cpu_map_redirect,
|
||||
};
|
||||
|
||||
static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
|
||||
|
@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
|
||||
list_del_rcu(&dtab->list);
|
||||
spin_unlock(&dev_map_lock);
|
||||
|
||||
bpf_clear_redirect_map(map);
|
||||
synchronize_rcu();
|
||||
|
||||
/* Make sure prior __dev_map_entry_free() have completed. */
|
||||
@ -258,7 +257,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
|
||||
static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct hlist_head *head = dev_map_index_hash(dtab, key);
|
||||
@ -392,7 +391,7 @@ void __dev_flush(void)
|
||||
* update happens in parallel here a dev_put wont happen until after reading the
|
||||
* ifindex.
|
||||
*/
|
||||
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct bpf_dtab_netdev *obj;
|
||||
@ -735,6 +734,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
map, key, value, map_flags);
|
||||
}
|
||||
|
||||
static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
|
||||
}
|
||||
|
||||
static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
|
||||
}
|
||||
|
||||
static int dev_map_btf_id;
|
||||
const struct bpf_map_ops dev_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
@ -747,6 +756,7 @@ const struct bpf_map_ops dev_map_ops = {
|
||||
.map_check_btf = map_check_no_btf,
|
||||
.map_btf_name = "bpf_dtab",
|
||||
.map_btf_id = &dev_map_btf_id,
|
||||
.map_redirect = dev_map_redirect,
|
||||
};
|
||||
|
||||
static int dev_map_hash_map_btf_id;
|
||||
@ -761,6 +771,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
|
||||
.map_check_btf = map_check_no_btf,
|
||||
.map_btf_name = "bpf_dtab",
|
||||
.map_btf_id = &dev_map_hash_map_btf_id,
|
||||
.map_redirect = dev_hash_map_redirect,
|
||||
};
|
||||
|
||||
static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
|
||||
|
@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
|
||||
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
|
||||
};
|
||||
|
||||
static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
|
||||
void *callback_ctx, u64 flags)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
struct hlist_nulls_head *head;
|
||||
struct hlist_nulls_node *n;
|
||||
struct htab_elem *elem;
|
||||
u32 roundup_key_size;
|
||||
int i, num_elems = 0;
|
||||
void __percpu *pptr;
|
||||
struct bucket *b;
|
||||
void *key, *val;
|
||||
bool is_percpu;
|
||||
u64 ret = 0;
|
||||
|
||||
if (flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
is_percpu = htab_is_percpu(htab);
|
||||
|
||||
roundup_key_size = round_up(map->key_size, 8);
|
||||
/* disable migration so percpu value prepared here will be the
|
||||
* same as the one seen by the bpf program with bpf_map_lookup_elem().
|
||||
*/
|
||||
if (is_percpu)
|
||||
migrate_disable();
|
||||
for (i = 0; i < htab->n_buckets; i++) {
|
||||
b = &htab->buckets[i];
|
||||
rcu_read_lock();
|
||||
head = &b->head;
|
||||
hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
|
||||
key = elem->key;
|
||||
if (is_percpu) {
|
||||
/* current cpu value for percpu map */
|
||||
pptr = htab_elem_get_ptr(elem, map->key_size);
|
||||
val = this_cpu_ptr(pptr);
|
||||
} else {
|
||||
val = elem->key + roundup_key_size;
|
||||
}
|
||||
num_elems++;
|
||||
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
|
||||
(u64)(long)key, (u64)(long)val,
|
||||
(u64)(long)callback_ctx, 0);
|
||||
/* return value: 0 - continue, 1 - stop and return */
|
||||
if (ret) {
|
||||
rcu_read_unlock();
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
out:
|
||||
if (is_percpu)
|
||||
migrate_enable();
|
||||
return num_elems;
|
||||
}
|
||||
|
||||
static int htab_map_btf_id;
|
||||
const struct bpf_map_ops htab_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
|
||||
.map_delete_elem = htab_map_delete_elem,
|
||||
.map_gen_lookup = htab_map_gen_lookup,
|
||||
.map_seq_show_elem = htab_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
BATCH_OPS(htab),
|
||||
.map_btf_name = "bpf_htab",
|
||||
.map_btf_id = &htab_map_btf_id,
|
||||
@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
|
||||
.map_delete_elem = htab_lru_map_delete_elem,
|
||||
.map_gen_lookup = htab_lru_map_gen_lookup,
|
||||
.map_seq_show_elem = htab_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
BATCH_OPS(htab_lru),
|
||||
.map_btf_name = "bpf_htab",
|
||||
.map_btf_id = &htab_lru_map_btf_id,
|
||||
@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
|
||||
.map_update_elem = htab_percpu_map_update_elem,
|
||||
.map_delete_elem = htab_map_delete_elem,
|
||||
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
BATCH_OPS(htab_percpu),
|
||||
.map_btf_name = "bpf_htab",
|
||||
.map_btf_id = &htab_percpu_map_btf_id,
|
||||
@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
|
||||
.map_update_elem = htab_lru_percpu_map_update_elem,
|
||||
.map_delete_elem = htab_lru_map_delete_elem,
|
||||
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
BATCH_OPS(htab_lru_percpu),
|
||||
.map_btf_name = "bpf_htab",
|
||||
.map_btf_id = &htab_lru_percpu_map_btf_id,
|
||||
|
@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
return &bpf_ringbuf_discard_proto;
|
||||
case BPF_FUNC_ringbuf_query:
|
||||
return &bpf_ringbuf_query_proto;
|
||||
case BPF_FUNC_for_each_map_elem:
|
||||
return &bpf_for_each_map_elem_proto;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
|
||||
insn->src_reg == BPF_PSEUDO_CALL;
|
||||
}
|
||||
|
||||
static bool bpf_pseudo_func(const struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
|
||||
insn->src_reg == BPF_PSEUDO_FUNC;
|
||||
}
|
||||
|
||||
struct bpf_call_arg_meta {
|
||||
struct bpf_map *map_ptr;
|
||||
bool raw_mode;
|
||||
@ -248,6 +254,7 @@ struct bpf_call_arg_meta {
|
||||
u32 btf_id;
|
||||
struct btf *ret_btf;
|
||||
u32 ret_btf_id;
|
||||
u32 subprogno;
|
||||
};
|
||||
|
||||
struct btf *btf_vmlinux;
|
||||
@ -390,6 +397,24 @@ __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
|
||||
env->prev_linfo = linfo;
|
||||
}
|
||||
|
||||
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg,
|
||||
struct tnum *range, const char *ctx,
|
||||
const char *reg_name)
|
||||
{
|
||||
char tn_buf[48];
|
||||
|
||||
verbose(env, "At %s the register %s ", ctx, reg_name);
|
||||
if (!tnum_is_unknown(reg->var_off)) {
|
||||
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
|
||||
verbose(env, "has value %s", tn_buf);
|
||||
} else {
|
||||
verbose(env, "has unknown scalar value");
|
||||
}
|
||||
tnum_strn(tn_buf, sizeof(tn_buf), *range);
|
||||
verbose(env, " should have been in %s\n", tn_buf);
|
||||
}
|
||||
|
||||
static bool type_is_pkt_pointer(enum bpf_reg_type type)
|
||||
{
|
||||
return type == PTR_TO_PACKET ||
|
||||
@ -409,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
|
||||
return type == PTR_TO_SOCKET ||
|
||||
type == PTR_TO_TCP_SOCK ||
|
||||
type == PTR_TO_MAP_VALUE ||
|
||||
type == PTR_TO_MAP_KEY ||
|
||||
type == PTR_TO_SOCK_COMMON;
|
||||
}
|
||||
|
||||
@ -451,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
|
||||
type == ARG_PTR_TO_MEM_OR_NULL ||
|
||||
type == ARG_PTR_TO_CTX_OR_NULL ||
|
||||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
|
||||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
|
||||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
|
||||
type == ARG_PTR_TO_STACK_OR_NULL;
|
||||
}
|
||||
|
||||
/* Determine whether the function releases some resources allocated by another
|
||||
@ -541,6 +568,8 @@ static const char * const reg_type_str[] = {
|
||||
[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
|
||||
[PTR_TO_RDWR_BUF] = "rdwr_buf",
|
||||
[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
|
||||
[PTR_TO_FUNC] = "func",
|
||||
[PTR_TO_MAP_KEY] = "map_key",
|
||||
};
|
||||
|
||||
static char slot_type_char[] = {
|
||||
@ -612,6 +641,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
|
||||
if (type_is_pkt_pointer(t))
|
||||
verbose(env, ",r=%d", reg->range);
|
||||
else if (t == CONST_PTR_TO_MAP ||
|
||||
t == PTR_TO_MAP_KEY ||
|
||||
t == PTR_TO_MAP_VALUE ||
|
||||
t == PTR_TO_MAP_VALUE_OR_NULL)
|
||||
verbose(env, ",ks=%d,vs=%d",
|
||||
@ -1519,7 +1549,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
|
||||
}
|
||||
ret = find_subprog(env, off);
|
||||
if (ret >= 0)
|
||||
return 0;
|
||||
return ret;
|
||||
if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
|
||||
verbose(env, "too many subprograms\n");
|
||||
return -E2BIG;
|
||||
@ -1527,7 +1557,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
|
||||
env->subprog_info[env->subprog_cnt++].start = off;
|
||||
sort(env->subprog_info, env->subprog_cnt,
|
||||
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
|
||||
return 0;
|
||||
return env->subprog_cnt - 1;
|
||||
}
|
||||
|
||||
static int check_subprogs(struct bpf_verifier_env *env)
|
||||
@ -1544,6 +1574,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
|
||||
|
||||
/* determine subprog starts. The end is one before the next starts */
|
||||
for (i = 0; i < insn_cnt; i++) {
|
||||
if (bpf_pseudo_func(insn + i)) {
|
||||
if (!env->bpf_capable) {
|
||||
verbose(env,
|
||||
"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
|
||||
return -EPERM;
|
||||
}
|
||||
ret = add_subprog(env, i + insn[i].imm + 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/* remember subprog */
|
||||
insn[i + 1].imm = ret;
|
||||
continue;
|
||||
}
|
||||
if (!bpf_pseudo_call(insn + i))
|
||||
continue;
|
||||
if (!env->bpf_capable) {
|
||||
@ -2295,6 +2338,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
||||
case PTR_TO_PERCPU_BTF_ID:
|
||||
case PTR_TO_MEM:
|
||||
case PTR_TO_MEM_OR_NULL:
|
||||
case PTR_TO_FUNC:
|
||||
case PTR_TO_MAP_KEY:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -2899,6 +2944,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
|
||||
reg = &cur_regs(env)[regno];
|
||||
switch (reg->type) {
|
||||
case PTR_TO_MAP_KEY:
|
||||
verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
|
||||
mem_size, off, size);
|
||||
break;
|
||||
case PTR_TO_MAP_VALUE:
|
||||
verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
|
||||
mem_size, off, size);
|
||||
@ -3304,6 +3353,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
|
||||
case PTR_TO_FLOW_KEYS:
|
||||
pointer_desc = "flow keys ";
|
||||
break;
|
||||
case PTR_TO_MAP_KEY:
|
||||
pointer_desc = "key ";
|
||||
break;
|
||||
case PTR_TO_MAP_VALUE:
|
||||
pointer_desc = "value ";
|
||||
break;
|
||||
@ -3405,7 +3457,7 @@ process_func:
|
||||
continue_func:
|
||||
subprog_end = subprog[idx + 1].start;
|
||||
for (; i < subprog_end; i++) {
|
||||
if (!bpf_pseudo_call(insn + i))
|
||||
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
|
||||
continue;
|
||||
/* remember insn and function to return to */
|
||||
ret_insn[frame] = i + 1;
|
||||
@ -3842,7 +3894,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
||||
/* for access checks, reg->off is just part of off */
|
||||
off += reg->off;
|
||||
|
||||
if (reg->type == PTR_TO_MAP_VALUE) {
|
||||
if (reg->type == PTR_TO_MAP_KEY) {
|
||||
if (t == BPF_WRITE) {
|
||||
verbose(env, "write to change key R%d not allowed\n", regno);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
err = check_mem_region_access(env, regno, off, size,
|
||||
reg->map_ptr->key_size, false);
|
||||
if (err)
|
||||
return err;
|
||||
if (value_regno >= 0)
|
||||
mark_reg_unknown(env, regs, value_regno);
|
||||
} else if (reg->type == PTR_TO_MAP_VALUE) {
|
||||
if (t == BPF_WRITE && value_regno >= 0 &&
|
||||
is_pointer_value(env, value_regno)) {
|
||||
verbose(env, "R%d leaks addr into map\n", value_regno);
|
||||
@ -4258,6 +4322,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
case PTR_TO_PACKET_META:
|
||||
return check_packet_access(env, regno, reg->off, access_size,
|
||||
zero_size_allowed);
|
||||
case PTR_TO_MAP_KEY:
|
||||
return check_mem_region_access(env, regno, reg->off, access_size,
|
||||
reg->map_ptr->key_size, false);
|
||||
case PTR_TO_MAP_VALUE:
|
||||
if (check_map_access_type(env, regno, reg->off, access_size,
|
||||
meta && meta->raw_mode ? BPF_WRITE :
|
||||
@ -4474,6 +4541,7 @@ static const struct bpf_reg_types map_key_value_types = {
|
||||
PTR_TO_STACK,
|
||||
PTR_TO_PACKET,
|
||||
PTR_TO_PACKET_META,
|
||||
PTR_TO_MAP_KEY,
|
||||
PTR_TO_MAP_VALUE,
|
||||
},
|
||||
};
|
||||
@ -4505,6 +4573,7 @@ static const struct bpf_reg_types mem_types = {
|
||||
PTR_TO_STACK,
|
||||
PTR_TO_PACKET,
|
||||
PTR_TO_PACKET_META,
|
||||
PTR_TO_MAP_KEY,
|
||||
PTR_TO_MAP_VALUE,
|
||||
PTR_TO_MEM,
|
||||
PTR_TO_RDONLY_BUF,
|
||||
@ -4517,6 +4586,7 @@ static const struct bpf_reg_types int_ptr_types = {
|
||||
PTR_TO_STACK,
|
||||
PTR_TO_PACKET,
|
||||
PTR_TO_PACKET_META,
|
||||
PTR_TO_MAP_KEY,
|
||||
PTR_TO_MAP_VALUE,
|
||||
},
|
||||
};
|
||||
@ -4529,6 +4599,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
|
||||
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
|
||||
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
|
||||
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
|
||||
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
|
||||
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
|
||||
|
||||
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
||||
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
|
||||
@ -4557,6 +4629,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
||||
[ARG_PTR_TO_INT] = &int_ptr_types,
|
||||
[ARG_PTR_TO_LONG] = &int_ptr_types,
|
||||
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
|
||||
[ARG_PTR_TO_FUNC] = &func_ptr_types,
|
||||
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
|
||||
};
|
||||
|
||||
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
|
||||
@ -4738,6 +4812,8 @@ skip_type_check:
|
||||
verbose(env, "verifier internal error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
} else if (arg_type == ARG_PTR_TO_FUNC) {
|
||||
meta->subprogno = reg->subprogno;
|
||||
} else if (arg_type_is_mem_ptr(arg_type)) {
|
||||
/* The access to this pointer is only checked when we hit the
|
||||
* next is_mem_size argument below.
|
||||
@ -5258,13 +5334,19 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
|
||||
}
|
||||
}
|
||||
|
||||
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
int *insn_idx)
|
||||
typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee,
|
||||
int insn_idx);
|
||||
|
||||
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
int *insn_idx, int subprog,
|
||||
set_callee_state_fn set_callee_state_cb)
|
||||
{
|
||||
struct bpf_verifier_state *state = env->cur_state;
|
||||
struct bpf_func_info_aux *func_info_aux;
|
||||
struct bpf_func_state *caller, *callee;
|
||||
int i, err, subprog, target_insn;
|
||||
int err;
|
||||
bool is_global = false;
|
||||
|
||||
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
|
||||
@ -5273,14 +5355,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
target_insn = *insn_idx + insn->imm;
|
||||
subprog = find_subprog(env, target_insn + 1);
|
||||
if (subprog < 0) {
|
||||
verbose(env, "verifier bug. No program starts at insn %d\n",
|
||||
target_insn + 1);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
caller = state->frame[state->curframe];
|
||||
if (state->frame[state->curframe + 1]) {
|
||||
verbose(env, "verifier bug. Frame %d already allocated\n",
|
||||
@ -5335,11 +5409,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* copy r1 - r5 args that callee can access. The copy includes parent
|
||||
* pointers, which connects us up to the liveness chain
|
||||
*/
|
||||
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
|
||||
callee->regs[i] = caller->regs[i];
|
||||
err = set_callee_state_cb(env, caller, callee, *insn_idx);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
clear_caller_saved_regs(env, caller->regs);
|
||||
|
||||
@ -5347,7 +5419,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
state->curframe++;
|
||||
|
||||
/* and go analyze first insn of the callee */
|
||||
*insn_idx = target_insn;
|
||||
*insn_idx = env->subprog_info[subprog].start - 1;
|
||||
|
||||
if (env->log.level & BPF_LOG_LEVEL) {
|
||||
verbose(env, "caller:\n");
|
||||
@ -5358,6 +5430,92 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee)
|
||||
{
|
||||
/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
|
||||
* void *callback_ctx, u64 flags);
|
||||
* callback_fn(struct bpf_map *map, void *key, void *value,
|
||||
* void *callback_ctx);
|
||||
*/
|
||||
callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
|
||||
|
||||
callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
|
||||
__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
|
||||
callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
|
||||
|
||||
callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
|
||||
__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
|
||||
callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
|
||||
|
||||
/* pointer to stack or null */
|
||||
callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
|
||||
|
||||
/* unused */
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_callee_state(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee, int insn_idx)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* copy r1 - r5 args that callee can access. The copy includes parent
|
||||
* pointers, which connects us up to the liveness chain
|
||||
*/
|
||||
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
|
||||
callee->regs[i] = caller->regs[i];
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
int *insn_idx)
|
||||
{
|
||||
int subprog, target_insn;
|
||||
|
||||
target_insn = *insn_idx + insn->imm + 1;
|
||||
subprog = find_subprog(env, target_insn);
|
||||
if (subprog < 0) {
|
||||
verbose(env, "verifier bug. No program starts at insn %d\n",
|
||||
target_insn);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
|
||||
}
|
||||
|
||||
static int set_map_elem_callback_state(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee,
|
||||
int insn_idx)
|
||||
{
|
||||
struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
|
||||
if (bpf_map_ptr_poisoned(insn_aux)) {
|
||||
verbose(env, "tail_call abusing map_ptr\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
map = BPF_MAP_PTR(insn_aux->map_ptr_state);
|
||||
if (!map->ops->map_set_for_each_callback_args ||
|
||||
!map->ops->map_for_each_callback) {
|
||||
verbose(env, "callback function not allowed for map\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
err = map->ops->map_set_for_each_callback_args(env, caller, callee);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
callee->in_callback_fn = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
|
||||
{
|
||||
struct bpf_verifier_state *state = env->cur_state;
|
||||
@ -5380,8 +5538,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
|
||||
|
||||
state->curframe--;
|
||||
caller = state->frame[state->curframe];
|
||||
/* return to the caller whatever r0 had in the callee */
|
||||
caller->regs[BPF_REG_0] = *r0;
|
||||
if (callee->in_callback_fn) {
|
||||
/* enforce R0 return value range [0, 1]. */
|
||||
struct tnum range = tnum_range(0, 1);
|
||||
|
||||
if (r0->type != SCALAR_VALUE) {
|
||||
verbose(env, "R0 not a scalar value\n");
|
||||
return -EACCES;
|
||||
}
|
||||
if (!tnum_in(range, r0->var_off)) {
|
||||
verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
/* return to the caller whatever r0 had in the callee */
|
||||
caller->regs[BPF_REG_0] = *r0;
|
||||
}
|
||||
|
||||
/* Transfer references to the caller */
|
||||
err = transfer_reference_state(caller, callee);
|
||||
@ -5436,7 +5608,9 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
|
||||
func_id != BPF_FUNC_map_delete_elem &&
|
||||
func_id != BPF_FUNC_map_push_elem &&
|
||||
func_id != BPF_FUNC_map_pop_elem &&
|
||||
func_id != BPF_FUNC_map_peek_elem)
|
||||
func_id != BPF_FUNC_map_peek_elem &&
|
||||
func_id != BPF_FUNC_for_each_map_elem &&
|
||||
func_id != BPF_FUNC_redirect_map)
|
||||
return 0;
|
||||
|
||||
if (map == NULL) {
|
||||
@ -5517,15 +5691,18 @@ static int check_reference_leak(struct bpf_verifier_env *env)
|
||||
return state->acquired_refs ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
|
||||
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
int *insn_idx_p)
|
||||
{
|
||||
const struct bpf_func_proto *fn = NULL;
|
||||
struct bpf_reg_state *regs;
|
||||
struct bpf_call_arg_meta meta;
|
||||
int insn_idx = *insn_idx_p;
|
||||
bool changes_data;
|
||||
int i, err;
|
||||
int i, err, func_id;
|
||||
|
||||
/* find function prototype */
|
||||
func_id = insn->imm;
|
||||
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
|
||||
verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
|
||||
func_id);
|
||||
@ -5571,7 +5748,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||
|
||||
meta.func_id = func_id;
|
||||
/* check args */
|
||||
for (i = 0; i < 5; i++) {
|
||||
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
|
||||
err = check_func_arg(env, i, &meta, fn);
|
||||
if (err)
|
||||
return err;
|
||||
@ -5621,6 +5798,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_for_each_map_elem) {
|
||||
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
|
||||
set_map_elem_callback_state);
|
||||
if (err < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* reset caller saved regs */
|
||||
for (i = 0; i < CALLER_SAVED_REGS; i++) {
|
||||
mark_reg_not_init(env, regs, caller_saved[i]);
|
||||
@ -5874,6 +6058,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
|
||||
else
|
||||
*ptr_limit = -off;
|
||||
return 0;
|
||||
case PTR_TO_MAP_KEY:
|
||||
/* Currently, this code is not exercised as the only use
|
||||
* is bpf_for_each_map_elem() helper which requires
|
||||
* bpf_capble. The code has been tested manually for
|
||||
* future use.
|
||||
*/
|
||||
if (mask_to_left) {
|
||||
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
|
||||
} else {
|
||||
off = ptr_reg->smin_value + ptr_reg->off;
|
||||
*ptr_limit = ptr_reg->map_ptr->key_size - off;
|
||||
}
|
||||
return 0;
|
||||
case PTR_TO_MAP_VALUE:
|
||||
if (mask_to_left) {
|
||||
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
|
||||
@ -5904,7 +6101,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
|
||||
aux->alu_limit != alu_limit))
|
||||
return -EACCES;
|
||||
|
||||
/* Corresponding fixup done in fixup_bpf_calls(). */
|
||||
/* Corresponding fixup done in do_misc_fixups(). */
|
||||
aux->alu_state = alu_state;
|
||||
aux->alu_limit = alu_limit;
|
||||
return 0;
|
||||
@ -6075,6 +6272,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
||||
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
|
||||
dst, reg_type_str[ptr_reg->type]);
|
||||
return -EACCES;
|
||||
case PTR_TO_MAP_KEY:
|
||||
case PTR_TO_MAP_VALUE:
|
||||
if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
|
||||
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
|
||||
@ -8254,6 +8452,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_FUNC) {
|
||||
struct bpf_prog_aux *aux = env->prog->aux;
|
||||
u32 subprogno = insn[1].imm;
|
||||
|
||||
if (!aux->func_info) {
|
||||
verbose(env, "missing btf func_info\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
|
||||
verbose(env, "callback function not static\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dst_reg->type = PTR_TO_FUNC;
|
||||
dst_reg->subprogno = subprogno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
map = env->used_maps[aux->map_index];
|
||||
mark_reg_known_zero(env, regs, insn->dst_reg);
|
||||
dst_reg->map_ptr = map;
|
||||
@ -8482,17 +8698,7 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||
}
|
||||
|
||||
if (!tnum_in(range, reg->var_off)) {
|
||||
char tn_buf[48];
|
||||
|
||||
verbose(env, "At program exit the register R0 ");
|
||||
if (!tnum_is_unknown(reg->var_off)) {
|
||||
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
|
||||
verbose(env, "has value %s", tn_buf);
|
||||
} else {
|
||||
verbose(env, "has unknown scalar value");
|
||||
}
|
||||
tnum_strn(tn_buf, sizeof(tn_buf), range);
|
||||
verbose(env, " should have been in %s\n", tn_buf);
|
||||
verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -8619,6 +8825,27 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
|
||||
return DONE_EXPLORING;
|
||||
}
|
||||
|
||||
static int visit_func_call_insn(int t, int insn_cnt,
|
||||
struct bpf_insn *insns,
|
||||
struct bpf_verifier_env *env,
|
||||
bool visit_callee)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (t + 1 < insn_cnt)
|
||||
init_explored_state(env, t + 1);
|
||||
if (visit_callee) {
|
||||
init_explored_state(env, t);
|
||||
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
|
||||
env, false);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Visits the instruction at index t and returns one of the following:
|
||||
* < 0 - an error occurred
|
||||
* DONE_EXPLORING - the instruction was fully explored
|
||||
@ -8629,6 +8856,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
|
||||
struct bpf_insn *insns = env->prog->insnsi;
|
||||
int ret;
|
||||
|
||||
if (bpf_pseudo_func(insns + t))
|
||||
return visit_func_call_insn(t, insn_cnt, insns, env, true);
|
||||
|
||||
/* All non-branch instructions have a single fall-through edge. */
|
||||
if (BPF_CLASS(insns[t].code) != BPF_JMP &&
|
||||
BPF_CLASS(insns[t].code) != BPF_JMP32)
|
||||
@ -8639,18 +8869,8 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
|
||||
return DONE_EXPLORING;
|
||||
|
||||
case BPF_CALL:
|
||||
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (t + 1 < insn_cnt)
|
||||
init_explored_state(env, t + 1);
|
||||
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
|
||||
init_explored_state(env, t);
|
||||
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
|
||||
env, false);
|
||||
}
|
||||
return ret;
|
||||
return visit_func_call_insn(t, insn_cnt, insns, env,
|
||||
insns[t].src_reg == BPF_PSEUDO_CALL);
|
||||
|
||||
case BPF_JA:
|
||||
if (BPF_SRC(insns[t].code) != BPF_K)
|
||||
@ -9259,6 +9479,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
case PTR_TO_MAP_KEY:
|
||||
case PTR_TO_MAP_VALUE:
|
||||
/* If the new min/max/var_off satisfy the old ones and
|
||||
* everything else matches, we are OK.
|
||||
@ -10105,10 +10326,9 @@ static int do_check(struct bpf_verifier_env *env)
|
||||
if (insn->src_reg == BPF_PSEUDO_CALL)
|
||||
err = check_func_call(env, insn, &env->insn_idx);
|
||||
else
|
||||
err = check_helper_call(env, insn->imm, env->insn_idx);
|
||||
err = check_helper_call(env, insn, &env->insn_idx);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
} else if (opcode == BPF_JA) {
|
||||
if (BPF_SRC(insn->code) != BPF_K ||
|
||||
insn->imm != 0 ||
|
||||
@ -10537,6 +10757,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
|
||||
goto next_insn;
|
||||
}
|
||||
|
||||
if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
|
||||
aux = &env->insn_aux_data[i];
|
||||
aux->ptr_type = PTR_TO_FUNC;
|
||||
goto next_insn;
|
||||
}
|
||||
|
||||
/* In final convert_pseudo_ld_imm64() step, this is
|
||||
* converted into regular 64-bit imm load insn.
|
||||
*/
|
||||
@ -10669,9 +10895,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
|
||||
int insn_cnt = env->prog->len;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++)
|
||||
if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
|
||||
insn->src_reg = 0;
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
|
||||
continue;
|
||||
if (insn->src_reg == BPF_PSEUDO_FUNC)
|
||||
continue;
|
||||
insn->src_reg = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* single env->prog->insni[off] instruction was replaced with the range
|
||||
@ -11310,6 +11540,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
return 0;
|
||||
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
env->insn_aux_data[i].call_imm = insn->imm;
|
||||
/* subprog is encoded in insn[1].imm */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!bpf_pseudo_call(insn))
|
||||
continue;
|
||||
/* Upon error here we cannot fall back to interpreter but
|
||||
@ -11439,6 +11675,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
insn = func[i]->insnsi;
|
||||
for (j = 0; j < func[i]->len; j++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
subprog = insn[1].imm;
|
||||
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
|
||||
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
|
||||
continue;
|
||||
}
|
||||
if (!bpf_pseudo_call(insn))
|
||||
continue;
|
||||
subprog = insn->off;
|
||||
@ -11484,6 +11726,11 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
* later look the same as if they were interpreted only.
|
||||
*/
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
insn[0].imm = env->insn_aux_data[i].call_imm;
|
||||
insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
|
||||
continue;
|
||||
}
|
||||
if (!bpf_pseudo_call(insn))
|
||||
continue;
|
||||
insn->off = env->insn_aux_data[i].call_imm;
|
||||
@ -11548,6 +11795,14 @@ static int fixup_call_args(struct bpf_verifier_env *env)
|
||||
return -EINVAL;
|
||||
}
|
||||
for (i = 0; i < prog->len; i++, insn++) {
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
/* When JIT fails the progs with callback calls
|
||||
* have to be rejected, since interpreter doesn't support them yet.
|
||||
*/
|
||||
verbose(env, "callbacks are not allowed in non-JITed programs\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!bpf_pseudo_call(insn))
|
||||
continue;
|
||||
depth = get_callee_stack_depth(env, insn, i);
|
||||
@ -11560,12 +11815,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* fixup insn->imm field of bpf_call instructions
|
||||
* and inline eligible helpers as explicit sequence of BPF instructions
|
||||
*
|
||||
* this function is called after eBPF program passed verification
|
||||
/* Do various post-verification rewrites in a single program pass.
|
||||
* These rewrites simplify JIT and interpreter implementations.
|
||||
*/
|
||||
static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
static int do_misc_fixups(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_prog *prog = env->prog;
|
||||
bool expect_blinding = bpf_jit_blinding_enabled(prog);
|
||||
@ -11580,6 +11833,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
int i, ret, cnt, delta = 0;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
/* Make divide-by-zero exceptions impossible. */
|
||||
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
|
||||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
|
||||
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
|
||||
@ -11620,6 +11874,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
|
||||
if (BPF_CLASS(insn->code) == BPF_LD &&
|
||||
(BPF_MODE(insn->code) == BPF_ABS ||
|
||||
BPF_MODE(insn->code) == BPF_IND)) {
|
||||
@ -11639,6 +11894,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Rewrite pointer arithmetic to mitigate speculation attacks. */
|
||||
if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
|
||||
insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
|
||||
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
|
||||
@ -11787,7 +12043,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
insn->imm == BPF_FUNC_map_delete_elem ||
|
||||
insn->imm == BPF_FUNC_map_push_elem ||
|
||||
insn->imm == BPF_FUNC_map_pop_elem ||
|
||||
insn->imm == BPF_FUNC_map_peek_elem)) {
|
||||
insn->imm == BPF_FUNC_map_peek_elem ||
|
||||
insn->imm == BPF_FUNC_redirect_map)) {
|
||||
aux = &env->insn_aux_data[i + delta];
|
||||
if (bpf_map_ptr_poisoned(aux))
|
||||
goto patch_call_imm;
|
||||
@ -11829,6 +12086,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
(int (*)(struct bpf_map *map, void *value))NULL));
|
||||
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
|
||||
(int (*)(struct bpf_map *map, void *value))NULL));
|
||||
BUILD_BUG_ON(!__same_type(ops->map_redirect,
|
||||
(int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
|
||||
|
||||
patch_map_ops_generic:
|
||||
switch (insn->imm) {
|
||||
case BPF_FUNC_map_lookup_elem:
|
||||
@ -11855,11 +12115,16 @@ patch_map_ops_generic:
|
||||
insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
|
||||
__bpf_call_base;
|
||||
continue;
|
||||
case BPF_FUNC_redirect_map:
|
||||
insn->imm = BPF_CAST_CALL(ops->map_redirect) -
|
||||
__bpf_call_base;
|
||||
continue;
|
||||
}
|
||||
|
||||
goto patch_call_imm;
|
||||
}
|
||||
|
||||
/* Implement bpf_jiffies64 inline. */
|
||||
if (prog->jit_requested && BITS_PER_LONG == 64 &&
|
||||
insn->imm == BPF_FUNC_jiffies64) {
|
||||
struct bpf_insn ld_jiffies_addr[2] = {
|
||||
@ -12670,7 +12935,7 @@ skip_full_check:
|
||||
ret = convert_ctx_accesses(env);
|
||||
|
||||
if (ret == 0)
|
||||
ret = fixup_bpf_calls(env);
|
||||
ret = do_misc_fixups(env);
|
||||
|
||||
/* do 32-bit optimization after insn patching has done so those patched
|
||||
* insns could be handled correctly.
|
||||
|
@ -96,6 +96,7 @@
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/scs.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
|
||||
cgroup_free(tsk);
|
||||
task_numa_free(tsk, true);
|
||||
security_task_free(tsk);
|
||||
bpf_task_storage_free(tsk);
|
||||
exit_creds(tsk);
|
||||
delayacct_tsk_free(tsk);
|
||||
put_signal_struct(tsk->signal);
|
||||
@ -2064,6 +2066,9 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
p->sequential_io = 0;
|
||||
p->sequential_io_avg = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
RCU_INIT_POINTER(p->bpf_storage, NULL);
|
||||
#endif
|
||||
|
||||
/* Perform scheduler related setup. Assign this task to a CPU. */
|
||||
retval = sched_fork(clone_flags, p);
|
||||
|
@ -1367,6 +1367,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_per_cpu_ptr_proto;
|
||||
case BPF_FUNC_this_cpu_ptr:
|
||||
return &bpf_this_cpu_ptr_proto;
|
||||
case BPF_FUNC_task_storage_get:
|
||||
return &bpf_task_storage_get_proto;
|
||||
case BPF_FUNC_task_storage_delete:
|
||||
return &bpf_task_storage_delete_proto;
|
||||
case BPF_FUNC_for_each_map_elem:
|
||||
return &bpf_for_each_map_elem_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@ -317,13 +317,9 @@ config BPF_STREAM_PARSER
|
||||
select STREAM_PARSER
|
||||
select NET_SOCK_MSG
|
||||
help
|
||||
Enabling this allows a stream parser to be used with
|
||||
Enabling this allows a TCP stream parser to be used with
|
||||
BPF_MAP_TYPE_SOCKMAP.
|
||||
|
||||
BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
|
||||
It can be used to enforce socket policy, implement socket redirects,
|
||||
etc.
|
||||
|
||||
config NET_FLOW_LIMIT
|
||||
bool
|
||||
depends on RPS
|
||||
|
@ -10,20 +10,86 @@
|
||||
#include <net/bpf_sk_storage.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sock_diag.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/bpf_test_run.h>
|
||||
|
||||
struct bpf_test_timer {
|
||||
enum { NO_PREEMPT, NO_MIGRATE } mode;
|
||||
u32 i;
|
||||
u64 time_start, time_spent;
|
||||
};
|
||||
|
||||
static void bpf_test_timer_enter(struct bpf_test_timer *t)
|
||||
__acquires(rcu)
|
||||
{
|
||||
rcu_read_lock();
|
||||
if (t->mode == NO_PREEMPT)
|
||||
preempt_disable();
|
||||
else
|
||||
migrate_disable();
|
||||
|
||||
t->time_start = ktime_get_ns();
|
||||
}
|
||||
|
||||
static void bpf_test_timer_leave(struct bpf_test_timer *t)
|
||||
__releases(rcu)
|
||||
{
|
||||
t->time_start = 0;
|
||||
|
||||
if (t->mode == NO_PREEMPT)
|
||||
preempt_enable();
|
||||
else
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
|
||||
__must_hold(rcu)
|
||||
{
|
||||
t->i++;
|
||||
if (t->i >= repeat) {
|
||||
/* We're done. */
|
||||
t->time_spent += ktime_get_ns() - t->time_start;
|
||||
do_div(t->time_spent, t->i);
|
||||
*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
|
||||
*err = 0;
|
||||
goto reset;
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
/* During iteration: we've been cancelled, abort. */
|
||||
*err = -EINTR;
|
||||
goto reset;
|
||||
}
|
||||
|
||||
if (need_resched()) {
|
||||
/* During iteration: we need to reschedule between runs. */
|
||||
t->time_spent += ktime_get_ns() - t->time_start;
|
||||
bpf_test_timer_leave(t);
|
||||
cond_resched();
|
||||
bpf_test_timer_enter(t);
|
||||
}
|
||||
|
||||
/* Do another round. */
|
||||
return true;
|
||||
|
||||
reset:
|
||||
t->i = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
||||
u32 *retval, u32 *time, bool xdp)
|
||||
{
|
||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
|
||||
struct bpf_test_timer t = { NO_MIGRATE };
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
u64 time_start, time_spent = 0;
|
||||
int ret = 0;
|
||||
u32 i;
|
||||
int ret;
|
||||
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
|
||||
@ -38,40 +104,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
||||
if (!repeat)
|
||||
repeat = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
time_start = ktime_get_ns();
|
||||
for (i = 0; i < repeat; i++) {
|
||||
bpf_test_timer_enter(&t);
|
||||
do {
|
||||
bpf_cgroup_storage_set(storage);
|
||||
|
||||
if (xdp)
|
||||
*retval = bpf_prog_run_xdp(prog, ctx);
|
||||
else
|
||||
*retval = BPF_PROG_RUN(prog, ctx);
|
||||
|
||||
if (signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (need_resched()) {
|
||||
time_spent += ktime_get_ns() - time_start;
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
|
||||
cond_resched();
|
||||
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
time_start = ktime_get_ns();
|
||||
}
|
||||
}
|
||||
time_spent += ktime_get_ns() - time_start;
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
|
||||
do_div(time_spent, repeat);
|
||||
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
|
||||
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
|
||||
bpf_test_timer_leave(&t);
|
||||
|
||||
for_each_cgroup_storage_type(stype)
|
||||
bpf_cgroup_storage_free(storage[stype]);
|
||||
@ -674,18 +716,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
|
||||
const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
struct bpf_test_timer t = { NO_PREEMPT };
|
||||
u32 size = kattr->test.data_size_in;
|
||||
struct bpf_flow_dissector ctx = {};
|
||||
u32 repeat = kattr->test.repeat;
|
||||
struct bpf_flow_keys *user_ctx;
|
||||
struct bpf_flow_keys flow_keys;
|
||||
u64 time_start, time_spent = 0;
|
||||
const struct ethhdr *eth;
|
||||
unsigned int flags = 0;
|
||||
u32 retval, duration;
|
||||
void *data;
|
||||
int ret;
|
||||
u32 i;
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
|
||||
return -EINVAL;
|
||||
@ -721,39 +762,15 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
|
||||
ctx.data = data;
|
||||
ctx.data_end = (__u8 *)data + size;
|
||||
|
||||
rcu_read_lock();
|
||||
preempt_disable();
|
||||
time_start = ktime_get_ns();
|
||||
for (i = 0; i < repeat; i++) {
|
||||
bpf_test_timer_enter(&t);
|
||||
do {
|
||||
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
|
||||
size, flags);
|
||||
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
|
||||
bpf_test_timer_leave(&t);
|
||||
|
||||
if (signal_pending(current)) {
|
||||
preempt_enable();
|
||||
rcu_read_unlock();
|
||||
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (need_resched()) {
|
||||
time_spent += ktime_get_ns() - time_start;
|
||||
preempt_enable();
|
||||
rcu_read_unlock();
|
||||
|
||||
cond_resched();
|
||||
|
||||
rcu_read_lock();
|
||||
preempt_disable();
|
||||
time_start = ktime_get_ns();
|
||||
}
|
||||
}
|
||||
time_spent += ktime_get_ns() - time_start;
|
||||
preempt_enable();
|
||||
rcu_read_unlock();
|
||||
|
||||
do_div(time_spent, repeat);
|
||||
duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
|
||||
retval, duration);
|
||||
@ -766,3 +783,106 @@ out:
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
struct bpf_test_timer t = { NO_PREEMPT };
|
||||
struct bpf_prog_array *progs = NULL;
|
||||
struct bpf_sk_lookup_kern ctx = {};
|
||||
u32 repeat = kattr->test.repeat;
|
||||
struct bpf_sk_lookup *user_ctx;
|
||||
u32 retval, duration;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
|
||||
return -EINVAL;
|
||||
|
||||
if (kattr->test.flags || kattr->test.cpu)
|
||||
return -EINVAL;
|
||||
|
||||
if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
|
||||
kattr->test.data_size_out)
|
||||
return -EINVAL;
|
||||
|
||||
if (!repeat)
|
||||
repeat = 1;
|
||||
|
||||
user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
|
||||
if (IS_ERR(user_ctx))
|
||||
return PTR_ERR(user_ctx);
|
||||
|
||||
if (!user_ctx)
|
||||
return -EINVAL;
|
||||
|
||||
if (user_ctx->sk)
|
||||
goto out;
|
||||
|
||||
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
|
||||
goto out;
|
||||
|
||||
if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
|
||||
ret = -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ctx.family = (u16)user_ctx->family;
|
||||
ctx.protocol = (u16)user_ctx->protocol;
|
||||
ctx.dport = (u16)user_ctx->local_port;
|
||||
ctx.sport = (__force __be16)user_ctx->remote_port;
|
||||
|
||||
switch (ctx.family) {
|
||||
case AF_INET:
|
||||
ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
|
||||
ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
|
||||
break;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
|
||||
ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
ret = -EAFNOSUPPORT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
progs = bpf_prog_array_alloc(1, GFP_KERNEL);
|
||||
if (!progs) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
progs->items[0].prog = prog;
|
||||
|
||||
bpf_test_timer_enter(&t);
|
||||
do {
|
||||
ctx.selected_sk = NULL;
|
||||
retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
|
||||
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
|
||||
bpf_test_timer_leave(&t);
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
user_ctx->cookie = 0;
|
||||
if (ctx.selected_sk) {
|
||||
if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
|
||||
}
|
||||
|
||||
ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
|
||||
if (!ret)
|
||||
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
|
||||
|
||||
out:
|
||||
bpf_prog_array_free(progs);
|
||||
kfree(user_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
@ -16,7 +16,6 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
|
||||
obj-y += net-sysfs.o
|
||||
obj-$(CONFIG_PAGE_POOL) += page_pool.o
|
||||
obj-$(CONFIG_PROC_FS) += net-procfs.o
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
|
||||
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
|
||||
obj-$(CONFIG_NETPOLL) += netpoll.o
|
||||
obj-$(CONFIG_FIB_RULES) += fib_rules.o
|
||||
@ -28,10 +27,13 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
|
||||
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
|
||||
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
|
||||
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
|
||||
obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
|
||||
obj-$(CONFIG_DST_CACHE) += dst_cache.o
|
||||
obj-$(CONFIG_HWBM) += hwbm.o
|
||||
obj-$(CONFIG_NET_DEVLINK) += devlink.o
|
||||
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
|
||||
obj-$(CONFIG_FAILOVER) += failover.o
|
||||
ifeq ($(CONFIG_INET),y)
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
|
||||
|
@ -89,7 +89,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
|
||||
bpf_local_storage_map_free(smap);
|
||||
bpf_local_storage_map_free(smap, NULL);
|
||||
}
|
||||
|
||||
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
|
||||
|
@ -1863,10 +1863,7 @@ static const struct bpf_func_proto bpf_sk_fullsock_proto = {
|
||||
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
|
||||
unsigned int write_len)
|
||||
{
|
||||
int err = __bpf_try_make_writable(skb, write_len);
|
||||
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return err;
|
||||
return __bpf_try_make_writable(skb, write_len);
|
||||
}
|
||||
|
||||
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
|
||||
@ -3412,6 +3409,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
|
||||
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2( \
|
||||
BPF_ADJ_ROOM_ENCAP_L2_MASK))
|
||||
|
||||
@ -3448,6 +3446,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
|
||||
inner_mac_len < ETH_HLEN)
|
||||
return -EINVAL;
|
||||
|
||||
if (skb->encapsulation)
|
||||
return -EALREADY;
|
||||
|
||||
@ -3466,7 +3468,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
skb->inner_mac_header = inner_net - inner_mac_len;
|
||||
skb->inner_network_header = inner_net;
|
||||
skb->inner_transport_header = inner_trans;
|
||||
skb_set_inner_protocol(skb, skb->protocol);
|
||||
|
||||
if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
|
||||
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
|
||||
else
|
||||
skb_set_inner_protocol(skb, skb->protocol);
|
||||
|
||||
skb->encapsulation = 1;
|
||||
skb_set_network_header(skb, mac_len);
|
||||
@ -3577,7 +3583,6 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
||||
return -ENOMEM;
|
||||
__skb_pull(skb, len_diff_abs);
|
||||
}
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
if (tls_sw_has_ctx_rx(skb->sk)) {
|
||||
struct strp_msg *rxm = strp_msg(skb);
|
||||
|
||||
@ -3742,10 +3747,7 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
|
||||
BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
|
||||
u64, flags)
|
||||
{
|
||||
int ret = __bpf_skb_change_tail(skb, new_len, flags);
|
||||
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return ret;
|
||||
return __bpf_skb_change_tail(skb, new_len, flags);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_skb_change_tail_proto = {
|
||||
@ -3808,10 +3810,7 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
|
||||
BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
|
||||
u64, flags)
|
||||
{
|
||||
int ret = __bpf_skb_change_head(skb, head_room, flags);
|
||||
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return ret;
|
||||
return __bpf_skb_change_head(skb, head_room, flags);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_skb_change_head_proto = {
|
||||
@ -3919,23 +3918,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
|
||||
struct bpf_map *map, struct xdp_buff *xdp)
|
||||
{
|
||||
switch (map->map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
case BPF_MAP_TYPE_DEVMAP_HASH:
|
||||
return dev_map_enqueue(fwd, xdp, dev_rx);
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
return cpu_map_enqueue(fwd, xdp, dev_rx);
|
||||
case BPF_MAP_TYPE_XSKMAP:
|
||||
return __xsk_map_redirect(fwd, xdp);
|
||||
default:
|
||||
return -EBADRQC;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xdp_do_flush(void)
|
||||
{
|
||||
__dev_flush();
|
||||
@ -3944,71 +3926,52 @@ void xdp_do_flush(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_do_flush);
|
||||
|
||||
static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
|
||||
{
|
||||
switch (map->map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
return __dev_map_lookup_elem(map, index);
|
||||
case BPF_MAP_TYPE_DEVMAP_HASH:
|
||||
return __dev_map_hash_lookup_elem(map, index);
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
return __cpu_map_lookup_elem(map, index);
|
||||
case BPF_MAP_TYPE_XSKMAP:
|
||||
return __xsk_map_lookup_elem(map, index);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void bpf_clear_redirect_map(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_redirect_info *ri;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
ri = per_cpu_ptr(&bpf_redirect_info, cpu);
|
||||
/* Avoid polluting remote cacheline due to writes if
|
||||
* not needed. Once we pass this test, we need the
|
||||
* cmpxchg() to make sure it hasn't been changed in
|
||||
* the meantime by remote CPU.
|
||||
*/
|
||||
if (unlikely(READ_ONCE(ri->map) == map))
|
||||
cmpxchg(&ri->map, map, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct bpf_map *map = READ_ONCE(ri->map);
|
||||
u32 index = ri->tgt_index;
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
void *fwd = ri->tgt_value;
|
||||
u32 map_id = ri->map_id;
|
||||
int err;
|
||||
|
||||
ri->tgt_index = 0;
|
||||
ri->tgt_value = NULL;
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
|
||||
if (unlikely(!map)) {
|
||||
fwd = dev_get_by_index_rcu(dev_net(dev), index);
|
||||
if (unlikely(!fwd)) {
|
||||
err = -EINVAL;
|
||||
goto err;
|
||||
switch (map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
fallthrough;
|
||||
case BPF_MAP_TYPE_DEVMAP_HASH:
|
||||
err = dev_map_enqueue(fwd, xdp, dev);
|
||||
break;
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
err = cpu_map_enqueue(fwd, xdp, dev);
|
||||
break;
|
||||
case BPF_MAP_TYPE_XSKMAP:
|
||||
err = __xsk_map_redirect(fwd, xdp);
|
||||
break;
|
||||
case BPF_MAP_TYPE_UNSPEC:
|
||||
if (map_id == INT_MAX) {
|
||||
fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
|
||||
if (unlikely(!fwd)) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
err = dev_xdp_enqueue(fwd, xdp, dev);
|
||||
break;
|
||||
}
|
||||
|
||||
err = dev_xdp_enqueue(fwd, xdp, dev);
|
||||
} else {
|
||||
err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
|
||||
fallthrough;
|
||||
default:
|
||||
err = -EBADRQC;
|
||||
}
|
||||
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
|
||||
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
|
||||
return 0;
|
||||
err:
|
||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
|
||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_do_redirect);
|
||||
@ -4017,41 +3980,36 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
|
||||
struct sk_buff *skb,
|
||||
struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog,
|
||||
struct bpf_map *map)
|
||||
void *fwd,
|
||||
enum bpf_map_type map_type, u32 map_id)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
u32 index = ri->tgt_index;
|
||||
void *fwd = ri->tgt_value;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
ri->tgt_index = 0;
|
||||
ri->tgt_value = NULL;
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
|
||||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
|
||||
struct bpf_dtab_netdev *dst = fwd;
|
||||
|
||||
err = dev_map_generic_redirect(dst, skb, xdp_prog);
|
||||
switch (map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
fallthrough;
|
||||
case BPF_MAP_TYPE_DEVMAP_HASH:
|
||||
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
|
||||
struct xdp_sock *xs = fwd;
|
||||
|
||||
err = xsk_generic_rcv(xs, xdp);
|
||||
break;
|
||||
case BPF_MAP_TYPE_XSKMAP:
|
||||
err = xsk_generic_rcv(fwd, xdp);
|
||||
if (err)
|
||||
goto err;
|
||||
consume_skb(skb);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
|
||||
err = -EBADRQC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
|
||||
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
|
||||
return 0;
|
||||
err:
|
||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
|
||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -4059,31 +4017,34 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
|
||||
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct bpf_map *map = READ_ONCE(ri->map);
|
||||
u32 index = ri->tgt_index;
|
||||
struct net_device *fwd;
|
||||
int err = 0;
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
void *fwd = ri->tgt_value;
|
||||
u32 map_id = ri->map_id;
|
||||
int err;
|
||||
|
||||
if (map)
|
||||
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
|
||||
map);
|
||||
ri->tgt_index = 0;
|
||||
fwd = dev_get_by_index_rcu(dev_net(dev), index);
|
||||
if (unlikely(!fwd)) {
|
||||
err = -EINVAL;
|
||||
goto err;
|
||||
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
|
||||
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
|
||||
fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
|
||||
if (unlikely(!fwd)) {
|
||||
err = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = xdp_ok_fwd_dev(fwd, skb->len);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
skb->dev = fwd;
|
||||
_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
|
||||
generic_xdp_tx(skb, xdp_prog);
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = xdp_ok_fwd_dev(fwd, skb->len);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
skb->dev = fwd;
|
||||
_trace_xdp_redirect(dev, xdp_prog, index);
|
||||
generic_xdp_tx(skb, xdp_prog);
|
||||
return 0;
|
||||
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
|
||||
err:
|
||||
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
|
||||
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -4094,10 +4055,12 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
|
||||
if (unlikely(flags))
|
||||
return XDP_ABORTED;
|
||||
|
||||
ri->flags = flags;
|
||||
/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
|
||||
* by map_idr) is used for ifindex based XDP redirect.
|
||||
*/
|
||||
ri->tgt_index = ifindex;
|
||||
ri->tgt_value = NULL;
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
ri->map_id = INT_MAX;
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
|
||||
return XDP_REDIRECT;
|
||||
}
|
||||
@ -4113,28 +4076,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
|
||||
BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
|
||||
u64, flags)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
/* Lower bits of the flags are used as return code on lookup failure */
|
||||
if (unlikely(flags > XDP_TX))
|
||||
return XDP_ABORTED;
|
||||
|
||||
ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
|
||||
if (unlikely(!ri->tgt_value)) {
|
||||
/* If the lookup fails we want to clear out the state in the
|
||||
* redirect_info struct completely, so that if an eBPF program
|
||||
* performs multiple lookups, the last one always takes
|
||||
* precedence.
|
||||
*/
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
return flags;
|
||||
}
|
||||
|
||||
ri->flags = flags;
|
||||
ri->tgt_index = ifindex;
|
||||
WRITE_ONCE(ri->map, map);
|
||||
|
||||
return XDP_REDIRECT;
|
||||
return map->ops->map_redirect(map, ifindex, flags);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
|
||||
@ -9655,22 +9597,40 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
/* data_end = skb->data + skb_headlen() */
|
||||
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
|
||||
struct bpf_insn *insn)
|
||||
{
|
||||
/* si->dst_reg = skb->data */
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_buff, data));
|
||||
/* AX = skb->len */
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
|
||||
BPF_REG_AX, si->src_reg,
|
||||
offsetof(struct sk_buff, len));
|
||||
/* si->dst_reg = skb->data + skb->len */
|
||||
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
|
||||
/* AX = skb->data_len */
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
|
||||
BPF_REG_AX, si->src_reg,
|
||||
offsetof(struct sk_buff, data_len));
|
||||
/* si->dst_reg = skb->data + skb->len - skb->data_len */
|
||||
*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
int off;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct __sk_buff, data_end):
|
||||
off = si->off;
|
||||
off -= offsetof(struct __sk_buff, data_end);
|
||||
off += offsetof(struct sk_buff, cb);
|
||||
off += offsetof(struct tcp_skb_cb, bpf.data_end);
|
||||
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
|
||||
si->src_reg, off);
|
||||
insn = bpf_convert_data_end_access(si, insn);
|
||||
break;
|
||||
default:
|
||||
return bpf_convert_ctx_access(type, si, insn_buf, prog,
|
||||
@ -10449,6 +10409,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
|
||||
}
|
||||
|
||||
const struct bpf_prog_ops sk_lookup_prog_ops = {
|
||||
.test_run = bpf_prog_test_run_sk_lookup,
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops sk_lookup_verifier_ops = {
|
||||
|
212
net/core/skmsg.c
212
net/core/skmsg.c
@ -525,7 +525,8 @@ static void sk_psock_backlog(struct work_struct *work)
|
||||
len = skb->len;
|
||||
off = 0;
|
||||
start:
|
||||
ingress = tcp_skb_bpf_ingress(skb);
|
||||
ingress = skb_bpf_ingress(skb);
|
||||
skb_bpf_redirect_clear(skb);
|
||||
do {
|
||||
ret = -EIO;
|
||||
if (likely(psock->sk->sk_socket))
|
||||
@ -618,7 +619,7 @@ struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
|
||||
return link;
|
||||
}
|
||||
|
||||
void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
|
||||
static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_msg *msg, *tmp;
|
||||
|
||||
@ -631,7 +632,12 @@ void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
|
||||
|
||||
static void sk_psock_zap_ingress(struct sk_psock *psock)
|
||||
{
|
||||
__skb_queue_purge(&psock->ingress_skb);
|
||||
struct sk_buff *skb;
|
||||
|
||||
while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) {
|
||||
skb_bpf_redirect_clear(skb);
|
||||
kfree_skb(skb);
|
||||
}
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
}
|
||||
|
||||
@ -645,15 +651,15 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_done_strp(struct sk_psock *psock);
|
||||
|
||||
static void sk_psock_destroy_deferred(struct work_struct *gc)
|
||||
{
|
||||
struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
|
||||
|
||||
/* No sk_callback_lock since already detached. */
|
||||
|
||||
/* Parser has been stopped */
|
||||
if (psock->progs.skb_parser)
|
||||
strp_done(&psock->parser.strp);
|
||||
sk_psock_done_strp(psock);
|
||||
|
||||
cancel_work_sync(&psock->work);
|
||||
|
||||
@ -685,9 +691,9 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_restore_proto(sk, psock);
|
||||
rcu_assign_sk_user_data(sk, NULL);
|
||||
if (psock->progs.skb_parser)
|
||||
if (psock->progs.stream_parser)
|
||||
sk_psock_stop_strp(sk, psock);
|
||||
else if (psock->progs.skb_verdict)
|
||||
else if (psock->progs.stream_verdict)
|
||||
sk_psock_stop_verdict(sk, psock);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
@ -743,27 +749,12 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
||||
|
||||
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
return bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
}
|
||||
|
||||
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
|
||||
{
|
||||
struct sk_psock_parser *parser;
|
||||
|
||||
parser = container_of(strp, struct sk_psock_parser, strp);
|
||||
return container_of(parser, struct sk_psock, parser);
|
||||
}
|
||||
|
||||
static void sk_psock_skb_redirect(struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock_other;
|
||||
struct sock *sk_other;
|
||||
|
||||
sk_other = tcp_skb_bpf_redirect_fetch(skb);
|
||||
sk_other = skb_bpf_redirect_fetch(skb);
|
||||
/* This error is a buggy BPF program, it returned a redirect
|
||||
* return code, but then didn't set a redirect interface.
|
||||
*/
|
||||
@ -806,16 +797,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
|
||||
int ret = __SK_PASS;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
prog = READ_ONCE(psock->progs.stream_verdict);
|
||||
if (likely(prog)) {
|
||||
/* We skip full set_owner_r here because if we do a SK_PASS
|
||||
* or SK_DROP we can skip skb memory accounting and use the
|
||||
* TLS context.
|
||||
*/
|
||||
skb->sk = psock->sk;
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
skb_dst_drop(skb);
|
||||
skb_bpf_redirect_clear(skb);
|
||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
|
||||
skb->sk = NULL;
|
||||
}
|
||||
sk_psock_tls_verdict_apply(skb, psock->sk, ret);
|
||||
@ -827,7 +819,6 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
|
||||
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
struct sk_buff *skb, int verdict)
|
||||
{
|
||||
struct tcp_skb_cb *tcp;
|
||||
struct sock *sk_other;
|
||||
int err = -EIO;
|
||||
|
||||
@ -839,8 +830,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
tcp = TCP_SKB_CB(skb);
|
||||
tcp->bpf.flags |= BPF_F_INGRESS;
|
||||
skb_bpf_set_ingress(skb);
|
||||
|
||||
/* If the queue is empty then we can submit directly
|
||||
* into the msg queue. If its not empty we have to
|
||||
@ -866,6 +856,24 @@ out_free:
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_write_space(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
void (*write_space)(struct sock *sk) = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock)) {
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
|
||||
schedule_work(&psock->work);
|
||||
write_space = psock->saved_write_space;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (write_space)
|
||||
write_space(sk);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
@ -881,11 +889,12 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
||||
goto out;
|
||||
}
|
||||
skb_set_owner_r(skb, sk);
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
prog = READ_ONCE(psock->progs.stream_verdict);
|
||||
if (likely(prog)) {
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
skb_dst_drop(skb);
|
||||
skb_bpf_redirect_clear(skb);
|
||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
|
||||
}
|
||||
sk_psock_verdict_apply(psock, skb, ret);
|
||||
out:
|
||||
@ -899,15 +908,15 @@ static int sk_psock_strp_read_done(struct strparser *strp, int err)
|
||||
|
||||
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock = sk_psock_from_strp(strp);
|
||||
struct sk_psock *psock = container_of(strp, struct sk_psock, strp);
|
||||
struct bpf_prog *prog;
|
||||
int ret = skb->len;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_parser);
|
||||
prog = READ_ONCE(psock->progs.stream_parser);
|
||||
if (likely(prog)) {
|
||||
skb->sk = psock->sk;
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
skb->sk = NULL;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -923,16 +932,59 @@ static void sk_psock_strp_data_ready(struct sock *sk)
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock)) {
|
||||
if (tls_sw_has_ctx_rx(sk)) {
|
||||
psock->parser.saved_data_ready(sk);
|
||||
psock->saved_data_ready(sk);
|
||||
} else {
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
strp_data_ready(&psock->parser.strp);
|
||||
strp_data_ready(&psock->strp);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
static const struct strp_callbacks cb = {
|
||||
.rcv_msg = sk_psock_strp_read,
|
||||
.read_sock_done = sk_psock_strp_read_done,
|
||||
.parse_msg = sk_psock_strp_parse,
|
||||
};
|
||||
|
||||
return strp_init(&psock->strp, sk, &cb);
|
||||
}
|
||||
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
if (psock->saved_data_ready)
|
||||
return;
|
||||
|
||||
psock->saved_data_ready = sk->sk_data_ready;
|
||||
sk->sk_data_ready = sk_psock_strp_data_ready;
|
||||
sk->sk_write_space = sk_psock_write_space;
|
||||
}
|
||||
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
if (!psock->saved_data_ready)
|
||||
return;
|
||||
|
||||
sk->sk_data_ready = psock->saved_data_ready;
|
||||
psock->saved_data_ready = NULL;
|
||||
strp_stop(&psock->strp);
|
||||
}
|
||||
|
||||
static void sk_psock_done_strp(struct sk_psock *psock)
|
||||
{
|
||||
/* Parser has been stopped */
|
||||
if (psock->progs.stream_parser)
|
||||
strp_done(&psock->strp);
|
||||
}
|
||||
#else
|
||||
static void sk_psock_done_strp(struct sk_psock *psock)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BPF_STREAM_PARSER */
|
||||
|
||||
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
unsigned int offset, size_t orig_len)
|
||||
{
|
||||
@ -957,11 +1009,12 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
goto out;
|
||||
}
|
||||
skb_set_owner_r(skb, sk);
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
prog = READ_ONCE(psock->progs.stream_verdict);
|
||||
if (likely(prog)) {
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
skb_dst_drop(skb);
|
||||
skb_bpf_redirect_clear(skb);
|
||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
|
||||
}
|
||||
sk_psock_verdict_apply(psock, skb, ret);
|
||||
out:
|
||||
@ -984,82 +1037,21 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
|
||||
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
|
||||
}
|
||||
|
||||
static void sk_psock_write_space(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
void (*write_space)(struct sock *sk) = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock)) {
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
|
||||
schedule_work(&psock->work);
|
||||
write_space = psock->saved_write_space;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (write_space)
|
||||
write_space(sk);
|
||||
}
|
||||
|
||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
static const struct strp_callbacks cb = {
|
||||
.rcv_msg = sk_psock_strp_read,
|
||||
.read_sock_done = sk_psock_strp_read_done,
|
||||
.parse_msg = sk_psock_strp_parse,
|
||||
};
|
||||
|
||||
psock->parser.enabled = false;
|
||||
return strp_init(&psock->parser.strp, sk, &cb);
|
||||
}
|
||||
|
||||
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (parser->enabled)
|
||||
if (psock->saved_data_ready)
|
||||
return;
|
||||
|
||||
parser->saved_data_ready = sk->sk_data_ready;
|
||||
psock->saved_data_ready = sk->sk_data_ready;
|
||||
sk->sk_data_ready = sk_psock_verdict_data_ready;
|
||||
sk->sk_write_space = sk_psock_write_space;
|
||||
parser->enabled = true;
|
||||
}
|
||||
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (parser->enabled)
|
||||
return;
|
||||
|
||||
parser->saved_data_ready = sk->sk_data_ready;
|
||||
sk->sk_data_ready = sk_psock_strp_data_ready;
|
||||
sk->sk_write_space = sk_psock_write_space;
|
||||
parser->enabled = true;
|
||||
}
|
||||
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (!parser->enabled)
|
||||
return;
|
||||
|
||||
sk->sk_data_ready = parser->saved_data_ready;
|
||||
parser->saved_data_ready = NULL;
|
||||
strp_stop(&parser->strp);
|
||||
parser->enabled = false;
|
||||
}
|
||||
|
||||
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (!parser->enabled)
|
||||
if (!psock->saved_data_ready)
|
||||
return;
|
||||
|
||||
sk->sk_data_ready = parser->saved_data_ready;
|
||||
parser->saved_data_ready = NULL;
|
||||
parser->enabled = false;
|
||||
sk->sk_data_ready = psock->saved_data_ready;
|
||||
psock->saved_data_ready = NULL;
|
||||
}
|
||||
|
@ -24,6 +24,9 @@ struct bpf_stab {
|
||||
#define SOCK_CREATE_FLAG_MASK \
|
||||
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
||||
|
||||
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
||||
struct bpf_prog *old, u32 which);
|
||||
|
||||
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_stab *stab;
|
||||
@ -148,9 +151,9 @@ static void sock_map_del_link(struct sock *sk,
|
||||
struct bpf_map *map = link->map;
|
||||
struct bpf_stab *stab = container_of(map, struct bpf_stab,
|
||||
map);
|
||||
if (psock->parser.enabled && stab->progs.skb_parser)
|
||||
if (psock->saved_data_ready && stab->progs.stream_parser)
|
||||
strp_stop = true;
|
||||
if (psock->parser.enabled && stab->progs.skb_verdict)
|
||||
if (psock->saved_data_ready && stab->progs.stream_verdict)
|
||||
verdict_stop = true;
|
||||
list_del(&link->list);
|
||||
sk_psock_free_link(link);
|
||||
@ -224,23 +227,23 @@ out:
|
||||
static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
||||
struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
|
||||
struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
|
||||
struct sk_psock *psock;
|
||||
int ret;
|
||||
|
||||
skb_verdict = READ_ONCE(progs->skb_verdict);
|
||||
if (skb_verdict) {
|
||||
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
|
||||
if (IS_ERR(skb_verdict))
|
||||
return PTR_ERR(skb_verdict);
|
||||
stream_verdict = READ_ONCE(progs->stream_verdict);
|
||||
if (stream_verdict) {
|
||||
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
|
||||
if (IS_ERR(stream_verdict))
|
||||
return PTR_ERR(stream_verdict);
|
||||
}
|
||||
|
||||
skb_parser = READ_ONCE(progs->skb_parser);
|
||||
if (skb_parser) {
|
||||
skb_parser = bpf_prog_inc_not_zero(skb_parser);
|
||||
if (IS_ERR(skb_parser)) {
|
||||
ret = PTR_ERR(skb_parser);
|
||||
goto out_put_skb_verdict;
|
||||
stream_parser = READ_ONCE(progs->stream_parser);
|
||||
if (stream_parser) {
|
||||
stream_parser = bpf_prog_inc_not_zero(stream_parser);
|
||||
if (IS_ERR(stream_parser)) {
|
||||
ret = PTR_ERR(stream_parser);
|
||||
goto out_put_stream_verdict;
|
||||
}
|
||||
}
|
||||
|
||||
@ -249,7 +252,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
||||
msg_parser = bpf_prog_inc_not_zero(msg_parser);
|
||||
if (IS_ERR(msg_parser)) {
|
||||
ret = PTR_ERR(msg_parser);
|
||||
goto out_put_skb_parser;
|
||||
goto out_put_stream_parser;
|
||||
}
|
||||
}
|
||||
|
||||
@ -261,8 +264,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
||||
|
||||
if (psock) {
|
||||
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
|
||||
(skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
|
||||
(skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
|
||||
(stream_parser && READ_ONCE(psock->progs.stream_parser)) ||
|
||||
(stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
|
||||
sk_psock_put(sk, psock);
|
||||
ret = -EBUSY;
|
||||
goto out_progs;
|
||||
@ -283,15 +286,15 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
||||
goto out_drop;
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (skb_parser && skb_verdict && !psock->parser.enabled) {
|
||||
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||
ret = sk_psock_init_strp(sk, psock);
|
||||
if (ret)
|
||||
goto out_unlock_drop;
|
||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||
psock_set_prog(&psock->progs.skb_parser, skb_parser);
|
||||
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
|
||||
psock_set_prog(&psock->progs.stream_parser, stream_parser);
|
||||
sk_psock_start_strp(sk, psock);
|
||||
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
|
||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
|
||||
sk_psock_start_verdict(sk,psock);
|
||||
}
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
@ -303,12 +306,12 @@ out_drop:
|
||||
out_progs:
|
||||
if (msg_parser)
|
||||
bpf_prog_put(msg_parser);
|
||||
out_put_skb_parser:
|
||||
if (skb_parser)
|
||||
bpf_prog_put(skb_parser);
|
||||
out_put_skb_verdict:
|
||||
if (skb_verdict)
|
||||
bpf_prog_put(skb_verdict);
|
||||
out_put_stream_parser:
|
||||
if (stream_parser)
|
||||
bpf_prog_put(stream_parser);
|
||||
out_put_stream_verdict:
|
||||
if (stream_verdict)
|
||||
bpf_prog_put(stream_verdict);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -657,7 +660,6 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
|
||||
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
|
||||
struct bpf_map *, map, u32, key, u64, flags)
|
||||
{
|
||||
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
||||
struct sock *sk;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
@ -667,8 +669,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
|
||||
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
|
||||
return SK_DROP;
|
||||
|
||||
tcb->bpf.flags = flags;
|
||||
tcb->bpf.sk_redir = sk;
|
||||
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
@ -1250,7 +1251,6 @@ const struct bpf_func_proto bpf_sock_hash_update_proto = {
|
||||
BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
|
||||
struct bpf_map *, map, void *, key, u64, flags)
|
||||
{
|
||||
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
||||
struct sock *sk;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
@ -1260,8 +1260,7 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
|
||||
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
|
||||
return SK_DROP;
|
||||
|
||||
tcb->bpf.flags = flags;
|
||||
tcb->bpf.sk_redir = sk;
|
||||
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
@ -1448,8 +1447,8 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
||||
struct bpf_prog *old, u32 which)
|
||||
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
||||
struct bpf_prog *old, u32 which)
|
||||
{
|
||||
struct sk_psock_progs *progs = sock_map_progs(map);
|
||||
struct bpf_prog **pprog;
|
||||
@ -1461,11 +1460,13 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
||||
case BPF_SK_MSG_VERDICT:
|
||||
pprog = &progs->msg_parser;
|
||||
break;
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
pprog = &progs->skb_parser;
|
||||
pprog = &progs->stream_parser;
|
||||
break;
|
||||
#endif
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
pprog = &progs->skb_verdict;
|
||||
pprog = &progs->stream_verdict;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -62,7 +62,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
|
||||
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
|
||||
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
|
||||
obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
|
||||
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
|
||||
|
||||
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
|
||||
|
@ -229,7 +229,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
|
||||
|
||||
#ifdef CONFIG_BPF_STREAM_PARSER
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static bool tcp_bpf_stream_read(const struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
@ -629,4 +629,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
|
||||
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
|
||||
newsk->sk_prot = sk->sk_prot_creator;
|
||||
}
|
||||
#endif /* CONFIG_BPF_STREAM_PARSER */
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
114
net/xdp/xsk.c
114
net/xdp/xsk.c
@ -445,6 +445,97 @@ static void xsk_destruct_skb(struct sk_buff *skb)
|
||||
sock_wfree(skb);
|
||||
}
|
||||
|
||||
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
|
||||
struct xdp_desc *desc)
|
||||
{
|
||||
struct xsk_buff_pool *pool = xs->pool;
|
||||
u32 hr, len, ts, offset, copy, copied;
|
||||
struct sk_buff *skb;
|
||||
struct page *page;
|
||||
void *buffer;
|
||||
int err, i;
|
||||
u64 addr;
|
||||
|
||||
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
|
||||
|
||||
skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
return ERR_PTR(err);
|
||||
|
||||
skb_reserve(skb, hr);
|
||||
|
||||
addr = desc->addr;
|
||||
len = desc->len;
|
||||
ts = pool->unaligned ? len : pool->chunk_size;
|
||||
|
||||
buffer = xsk_buff_raw_get_data(pool, addr);
|
||||
offset = offset_in_page(buffer);
|
||||
addr = buffer - pool->addrs;
|
||||
|
||||
for (copied = 0, i = 0; copied < len; i++) {
|
||||
page = pool->umem->pgs[addr >> PAGE_SHIFT];
|
||||
get_page(page);
|
||||
|
||||
copy = min_t(u32, PAGE_SIZE - offset, len - copied);
|
||||
skb_fill_page_desc(skb, i, page, offset, copy);
|
||||
|
||||
copied += copy;
|
||||
addr += copy;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
skb->len += len;
|
||||
skb->data_len += len;
|
||||
skb->truesize += ts;
|
||||
|
||||
refcount_add(ts, &xs->sk.sk_wmem_alloc);
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
|
||||
struct xdp_desc *desc)
|
||||
{
|
||||
struct net_device *dev = xs->dev;
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
|
||||
skb = xsk_build_skb_zerocopy(xs, desc);
|
||||
if (IS_ERR(skb))
|
||||
return skb;
|
||||
} else {
|
||||
u32 hr, tr, len;
|
||||
void *buffer;
|
||||
int err;
|
||||
|
||||
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
|
||||
tr = dev->needed_tailroom;
|
||||
len = desc->len;
|
||||
|
||||
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
return ERR_PTR(err);
|
||||
|
||||
skb_reserve(skb, hr);
|
||||
skb_put(skb, len);
|
||||
|
||||
buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
|
||||
err = skb_store_bits(skb, 0, buffer, len);
|
||||
if (unlikely(err)) {
|
||||
kfree_skb(skb);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
}
|
||||
|
||||
skb->dev = dev;
|
||||
skb->priority = xs->sk.sk_priority;
|
||||
skb->mark = xs->sk.sk_mark;
|
||||
skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
|
||||
skb->destructor = xsk_destruct_skb;
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
static int xsk_generic_xmit(struct sock *sk)
|
||||
{
|
||||
struct xdp_sock *xs = xdp_sk(sk);
|
||||
@ -461,43 +552,30 @@ static int xsk_generic_xmit(struct sock *sk)
|
||||
goto out;
|
||||
|
||||
while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
|
||||
char *buffer;
|
||||
u64 addr;
|
||||
u32 len;
|
||||
|
||||
if (max_batch-- == 0) {
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = desc.len;
|
||||
skb = sock_alloc_send_skb(sk, len, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
skb = xsk_build_skb(xs, &desc);
|
||||
if (IS_ERR(skb)) {
|
||||
err = PTR_ERR(skb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
skb_put(skb, len);
|
||||
addr = desc.addr;
|
||||
buffer = xsk_buff_raw_get_data(xs->pool, addr);
|
||||
err = skb_store_bits(skb, 0, buffer, len);
|
||||
/* This is the backpressure mechanism for the Tx path.
|
||||
* Reserve space in the completion queue and only proceed
|
||||
* if there is space in it. This avoids having to implement
|
||||
* any buffering in the Tx path.
|
||||
*/
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
|
||||
if (xskq_prod_reserve(xs->pool->cq)) {
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
kfree_skb(skb);
|
||||
goto out;
|
||||
}
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
|
||||
skb->dev = xs->dev;
|
||||
skb->priority = sk->sk_priority;
|
||||
skb->mark = sk->sk_mark;
|
||||
skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
|
||||
skb->destructor = xsk_destruct_skb;
|
||||
|
||||
err = __dev_direct_xmit(skb, xs->queue_id);
|
||||
if (err == NETDEV_TX_BUSY) {
|
||||
/* Tell user-space to retry the send */
|
||||
|
@ -47,19 +47,18 @@ struct xsk_queue {
|
||||
u64 queue_empty_descs;
|
||||
};
|
||||
|
||||
/* The structure of the shared state of the rings are the same as the
|
||||
* ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
|
||||
* ring, the kernel is the producer and user space is the consumer. For
|
||||
* the Tx and fill rings, the kernel is the consumer and user space is
|
||||
* the producer.
|
||||
/* The structure of the shared state of the rings are a simple
|
||||
* circular buffer, as outlined in
|
||||
* Documentation/core-api/circular-buffers.rst. For the Rx and
|
||||
* completion ring, the kernel is the producer and user space is the
|
||||
* consumer. For the Tx and fill rings, the kernel is the consumer and
|
||||
* user space is the producer.
|
||||
*
|
||||
* producer consumer
|
||||
*
|
||||
* if (LOAD ->consumer) { LOAD ->producer
|
||||
* (A) smp_rmb() (C)
|
||||
* if (LOAD ->consumer) { (A) LOAD.acq ->producer (C)
|
||||
* STORE $data LOAD $data
|
||||
* smp_wmb() (B) smp_mb() (D)
|
||||
* STORE ->producer STORE ->consumer
|
||||
* STORE.rel ->producer (B) STORE.rel ->consumer (D)
|
||||
* }
|
||||
*
|
||||
* (A) pairs with (D), and (B) pairs with (C).
|
||||
@ -78,7 +77,8 @@ struct xsk_queue {
|
||||
*
|
||||
* (A) is a control dependency that separates the load of ->consumer
|
||||
* from the stores of $data. In case ->consumer indicates there is no
|
||||
* room in the buffer to store $data we do not. So no barrier is needed.
|
||||
* room in the buffer to store $data we do not. The dependency will
|
||||
* order both of the stores after the loads. So no barrier is needed.
|
||||
*
|
||||
* (D) protects the load of the data to be observed to happen after the
|
||||
* store of the consumer pointer. If we did not have this memory
|
||||
@ -227,15 +227,13 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
|
||||
|
||||
static inline void __xskq_cons_release(struct xsk_queue *q)
|
||||
{
|
||||
smp_mb(); /* D, matches A */
|
||||
WRITE_ONCE(q->ring->consumer, q->cached_cons);
|
||||
smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */
|
||||
}
|
||||
|
||||
static inline void __xskq_cons_peek(struct xsk_queue *q)
|
||||
{
|
||||
/* Refresh the local pointer */
|
||||
q->cached_prod = READ_ONCE(q->ring->producer);
|
||||
smp_rmb(); /* C, matches B */
|
||||
q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */
|
||||
}
|
||||
|
||||
static inline void xskq_cons_get_entries(struct xsk_queue *q)
|
||||
@ -397,9 +395,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
|
||||
|
||||
static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
|
||||
{
|
||||
smp_wmb(); /* B, matches C */
|
||||
|
||||
WRITE_ONCE(q->ring->producer, idx);
|
||||
smp_store_release(&q->ring->producer, idx); /* B, matches C */
|
||||
}
|
||||
|
||||
static inline void xskq_prod_submit(struct xsk_queue *q)
|
||||
|
@ -87,7 +87,6 @@ static void xsk_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
|
||||
bpf_clear_redirect_map(map);
|
||||
synchronize_net();
|
||||
bpf_map_area_free(m);
|
||||
}
|
||||
@ -125,6 +124,16 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
return READ_ONCE(m->xsk_map[key]);
|
||||
}
|
||||
|
||||
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
@ -215,6 +224,11 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
|
||||
}
|
||||
|
||||
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
|
||||
struct xdp_sock **map_entry)
|
||||
{
|
||||
@ -247,4 +261,5 @@ const struct bpf_map_ops xsk_map_ops = {
|
||||
.map_check_btf = map_check_no_btf,
|
||||
.map_btf_name = "xsk_map",
|
||||
.map_btf_id = &xsk_map_btf_id,
|
||||
.map_redirect = xsk_map_redirect,
|
||||
};
|
||||
|
@ -2,6 +2,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# Copyright (C) 2018-2019 Netronome Systems, Inc.
|
||||
# Copyright (C) 2021 Isovalent, Inc.
|
||||
|
||||
# In case user attempts to run with Python 2.
|
||||
from __future__ import print_function
|
||||
@ -13,6 +14,9 @@ import sys, os
|
||||
class NoHelperFound(BaseException):
|
||||
pass
|
||||
|
||||
class NoSyscallCommandFound(BaseException):
|
||||
pass
|
||||
|
||||
class ParsingError(BaseException):
|
||||
def __init__(self, line='<line not provided>', reader=None):
|
||||
if reader:
|
||||
@ -22,18 +26,27 @@ class ParsingError(BaseException):
|
||||
else:
|
||||
BaseException.__init__(self, 'Error parsing line: %s' % line)
|
||||
|
||||
class Helper(object):
|
||||
|
||||
class APIElement(object):
|
||||
"""
|
||||
An object representing the description of an eBPF helper function.
|
||||
@proto: function prototype of the helper function
|
||||
@desc: textual description of the helper function
|
||||
@ret: description of the return value of the helper function
|
||||
An object representing the description of an aspect of the eBPF API.
|
||||
@proto: prototype of the API symbol
|
||||
@desc: textual description of the symbol
|
||||
@ret: (optional) description of any associated return value
|
||||
"""
|
||||
def __init__(self, proto='', desc='', ret=''):
|
||||
self.proto = proto
|
||||
self.desc = desc
|
||||
self.ret = ret
|
||||
|
||||
|
||||
class Helper(APIElement):
|
||||
"""
|
||||
An object representing the description of an eBPF helper function.
|
||||
@proto: function prototype of the helper function
|
||||
@desc: textual description of the helper function
|
||||
@ret: description of the return value of the helper function
|
||||
"""
|
||||
def proto_break_down(self):
|
||||
"""
|
||||
Break down helper function protocol into smaller chunks: return type,
|
||||
@ -60,6 +73,7 @@ class Helper(object):
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class HeaderParser(object):
|
||||
"""
|
||||
An object used to parse a file in order to extract the documentation of a
|
||||
@ -72,6 +86,13 @@ class HeaderParser(object):
|
||||
self.reader = open(filename, 'r')
|
||||
self.line = ''
|
||||
self.helpers = []
|
||||
self.commands = []
|
||||
|
||||
def parse_element(self):
|
||||
proto = self.parse_symbol()
|
||||
desc = self.parse_desc()
|
||||
ret = self.parse_ret()
|
||||
return APIElement(proto=proto, desc=desc, ret=ret)
|
||||
|
||||
def parse_helper(self):
|
||||
proto = self.parse_proto()
|
||||
@ -79,6 +100,18 @@ class HeaderParser(object):
|
||||
ret = self.parse_ret()
|
||||
return Helper(proto=proto, desc=desc, ret=ret)
|
||||
|
||||
def parse_symbol(self):
|
||||
p = re.compile(' \* ?(.+)$')
|
||||
capture = p.match(self.line)
|
||||
if not capture:
|
||||
raise NoSyscallCommandFound
|
||||
end_re = re.compile(' \* ?NOTES$')
|
||||
end = end_re.match(self.line)
|
||||
if end:
|
||||
raise NoSyscallCommandFound
|
||||
self.line = self.reader.readline()
|
||||
return capture.group(1)
|
||||
|
||||
def parse_proto(self):
|
||||
# Argument can be of shape:
|
||||
# - "void"
|
||||
@ -140,16 +173,29 @@ class HeaderParser(object):
|
||||
break
|
||||
return ret
|
||||
|
||||
def run(self):
|
||||
# Advance to start of helper function descriptions.
|
||||
offset = self.reader.read().find('* Start of BPF helper function descriptions:')
|
||||
def seek_to(self, target, help_message):
|
||||
self.reader.seek(0)
|
||||
offset = self.reader.read().find(target)
|
||||
if offset == -1:
|
||||
raise Exception('Could not find start of eBPF helper descriptions list')
|
||||
raise Exception(help_message)
|
||||
self.reader.seek(offset)
|
||||
self.reader.readline()
|
||||
self.reader.readline()
|
||||
self.line = self.reader.readline()
|
||||
|
||||
def parse_syscall(self):
|
||||
self.seek_to('* DOC: eBPF Syscall Commands',
|
||||
'Could not find start of eBPF syscall descriptions list')
|
||||
while True:
|
||||
try:
|
||||
command = self.parse_element()
|
||||
self.commands.append(command)
|
||||
except NoSyscallCommandFound:
|
||||
break
|
||||
|
||||
def parse_helpers(self):
|
||||
self.seek_to('* Start of BPF helper function descriptions:',
|
||||
'Could not find start of eBPF helper descriptions list')
|
||||
while True:
|
||||
try:
|
||||
helper = self.parse_helper()
|
||||
@ -157,6 +203,9 @@ class HeaderParser(object):
|
||||
except NoHelperFound:
|
||||
break
|
||||
|
||||
def run(self):
|
||||
self.parse_syscall()
|
||||
self.parse_helpers()
|
||||
self.reader.close()
|
||||
|
||||
###############################################################################
|
||||
@ -165,10 +214,11 @@ class Printer(object):
|
||||
"""
|
||||
A generic class for printers. Printers should be created with an array of
|
||||
Helper objects, and implement a way to print them in the desired fashion.
|
||||
@helpers: array of Helper objects to print to standard output
|
||||
@parser: A HeaderParser with objects to print to standard output
|
||||
"""
|
||||
def __init__(self, helpers):
|
||||
self.helpers = helpers
|
||||
def __init__(self, parser):
|
||||
self.parser = parser
|
||||
self.elements = []
|
||||
|
||||
def print_header(self):
|
||||
pass
|
||||
@ -181,19 +231,23 @@ class Printer(object):
|
||||
|
||||
def print_all(self):
|
||||
self.print_header()
|
||||
for helper in self.helpers:
|
||||
self.print_one(helper)
|
||||
for elem in self.elements:
|
||||
self.print_one(elem)
|
||||
self.print_footer()
|
||||
|
||||
|
||||
class PrinterRST(Printer):
|
||||
"""
|
||||
A printer for dumping collected information about helpers as a ReStructured
|
||||
Text page compatible with the rst2man program, which can be used to
|
||||
generate a manual page for the helpers.
|
||||
@helpers: array of Helper objects to print to standard output
|
||||
A generic class for printers that print ReStructured Text. Printers should
|
||||
be created with a HeaderParser object, and implement a way to print API
|
||||
elements in the desired fashion.
|
||||
@parser: A HeaderParser with objects to print to standard output
|
||||
"""
|
||||
def print_header(self):
|
||||
header = '''\
|
||||
def __init__(self, parser):
|
||||
self.parser = parser
|
||||
|
||||
def print_license(self):
|
||||
license = '''\
|
||||
.. Copyright (C) All BPF authors and contributors from 2014 to present.
|
||||
.. See git log include/uapi/linux/bpf.h in kernel tree for details.
|
||||
..
|
||||
@ -221,9 +275,39 @@ class PrinterRST(Printer):
|
||||
..
|
||||
.. Please do not edit this file. It was generated from the documentation
|
||||
.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
|
||||
.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
|
||||
.. (helpers description), and from scripts/bpf_doc.py in the same
|
||||
.. repository (header and footer).
|
||||
'''
|
||||
print(license)
|
||||
|
||||
def print_elem(self, elem):
|
||||
if (elem.desc):
|
||||
print('\tDescription')
|
||||
# Do not strip all newline characters: formatted code at the end of
|
||||
# a section must be followed by a blank line.
|
||||
for line in re.sub('\n$', '', elem.desc, count=1).split('\n'):
|
||||
print('{}{}'.format('\t\t' if line else '', line))
|
||||
|
||||
if (elem.ret):
|
||||
print('\tReturn')
|
||||
for line in elem.ret.rstrip().split('\n'):
|
||||
print('{}{}'.format('\t\t' if line else '', line))
|
||||
|
||||
print('')
|
||||
|
||||
|
||||
class PrinterHelpersRST(PrinterRST):
|
||||
"""
|
||||
A printer for dumping collected information about helpers as a ReStructured
|
||||
Text page compatible with the rst2man program, which can be used to
|
||||
generate a manual page for the helpers.
|
||||
@parser: A HeaderParser with Helper objects to print to standard output
|
||||
"""
|
||||
def __init__(self, parser):
|
||||
self.elements = parser.helpers
|
||||
|
||||
def print_header(self):
|
||||
header = '''\
|
||||
===========
|
||||
BPF-HELPERS
|
||||
===========
|
||||
@ -264,6 +348,7 @@ kernel at the top).
|
||||
HELPERS
|
||||
=======
|
||||
'''
|
||||
PrinterRST.print_license(self)
|
||||
print(header)
|
||||
|
||||
def print_footer(self):
|
||||
@ -380,27 +465,50 @@ SEE ALSO
|
||||
|
||||
def print_one(self, helper):
|
||||
self.print_proto(helper)
|
||||
self.print_elem(helper)
|
||||
|
||||
if (helper.desc):
|
||||
print('\tDescription')
|
||||
# Do not strip all newline characters: formatted code at the end of
|
||||
# a section must be followed by a blank line.
|
||||
for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
|
||||
print('{}{}'.format('\t\t' if line else '', line))
|
||||
|
||||
if (helper.ret):
|
||||
print('\tReturn')
|
||||
for line in helper.ret.rstrip().split('\n'):
|
||||
print('{}{}'.format('\t\t' if line else '', line))
|
||||
class PrinterSyscallRST(PrinterRST):
|
||||
"""
|
||||
A printer for dumping collected information about the syscall API as a
|
||||
ReStructured Text page compatible with the rst2man program, which can be
|
||||
used to generate a manual page for the syscall.
|
||||
@parser: A HeaderParser with APIElement objects to print to standard
|
||||
output
|
||||
"""
|
||||
def __init__(self, parser):
|
||||
self.elements = parser.commands
|
||||
|
||||
def print_header(self):
|
||||
header = '''\
|
||||
===
|
||||
bpf
|
||||
===
|
||||
-------------------------------------------------------------------------------
|
||||
Perform a command on an extended BPF object
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
:Manual section: 2
|
||||
|
||||
COMMANDS
|
||||
========
|
||||
'''
|
||||
PrinterRST.print_license(self)
|
||||
print(header)
|
||||
|
||||
def print_one(self, command):
|
||||
print('**%s**' % (command.proto))
|
||||
self.print_elem(command)
|
||||
|
||||
print('')
|
||||
|
||||
class PrinterHelpers(Printer):
|
||||
"""
|
||||
A printer for dumping collected information about helpers as C header to
|
||||
be included from BPF program.
|
||||
@helpers: array of Helper objects to print to standard output
|
||||
@parser: A HeaderParser with Helper objects to print to standard output
|
||||
"""
|
||||
def __init__(self, parser):
|
||||
self.elements = parser.helpers
|
||||
|
||||
type_fwds = [
|
||||
'struct bpf_fib_lookup',
|
||||
@ -511,7 +619,7 @@ class PrinterHelpers(Printer):
|
||||
|
||||
def print_header(self):
|
||||
header = '''\
|
||||
/* This is auto-generated file. See bpf_helpers_doc.py for details. */
|
||||
/* This is auto-generated file. See bpf_doc.py for details. */
|
||||
|
||||
/* Forward declarations of BPF structs */'''
|
||||
|
||||
@ -589,8 +697,13 @@ script = os.path.abspath(sys.argv[0])
|
||||
linuxRoot = os.path.dirname(os.path.dirname(script))
|
||||
bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
|
||||
|
||||
printers = {
|
||||
'helpers': PrinterHelpersRST,
|
||||
'syscall': PrinterSyscallRST,
|
||||
}
|
||||
|
||||
argParser = argparse.ArgumentParser(description="""
|
||||
Parse eBPF header file and generate documentation for eBPF helper functions.
|
||||
Parse eBPF header file and generate documentation for the eBPF API.
|
||||
The RST-formatted output produced can be turned into a manual page with the
|
||||
rst2man utility.
|
||||
""")
|
||||
@ -601,6 +714,8 @@ if (os.path.isfile(bpfh)):
|
||||
default=bpfh)
|
||||
else:
|
||||
argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
|
||||
argParser.add_argument('target', nargs='?', default='helpers',
|
||||
choices=printers.keys(), help='eBPF API target')
|
||||
args = argParser.parse_args()
|
||||
|
||||
# Parse file.
|
||||
@ -609,7 +724,9 @@ headerParser.run()
|
||||
|
||||
# Print formatted output to standard output.
|
||||
if args.header:
|
||||
printer = PrinterHelpers(headerParser.helpers)
|
||||
if args.target != 'helpers':
|
||||
raise NotImplementedError('Only helpers header generation is supported')
|
||||
printer = PrinterHelpers(headerParser)
|
||||
else:
|
||||
printer = PrinterRST(headerParser.helpers)
|
||||
printer = printers[args.target](headerParser)
|
||||
printer.print_all()
|
@ -1,60 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
ifndef allow-override
|
||||
include ../scripts/Makefile.include
|
||||
include ../scripts/utilities.mak
|
||||
else
|
||||
# Assume Makefile.helpers is being run from bpftool/Documentation
|
||||
# subdirectory. Go up two more directories to fetch bpf.h header and
|
||||
# associated script.
|
||||
UP2DIR := ../../
|
||||
endif
|
||||
|
||||
INSTALL ?= install
|
||||
RM ?= rm -f
|
||||
RMDIR ?= rmdir --ignore-fail-on-non-empty
|
||||
|
||||
ifeq ($(V),1)
|
||||
Q =
|
||||
else
|
||||
Q = @
|
||||
endif
|
||||
|
||||
prefix ?= /usr/local
|
||||
mandir ?= $(prefix)/man
|
||||
man7dir = $(mandir)/man7
|
||||
|
||||
HELPERS_RST = bpf-helpers.rst
|
||||
MAN7_RST = $(HELPERS_RST)
|
||||
|
||||
_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
|
||||
DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
|
||||
|
||||
helpers: man7
|
||||
man7: $(DOC_MAN7)
|
||||
|
||||
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
|
||||
|
||||
$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
|
||||
$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
|
||||
|
||||
$(OUTPUT)%.7: $(OUTPUT)%.rst
|
||||
ifndef RST2MAN_DEP
|
||||
$(error "rst2man not found, but required to generate man pages")
|
||||
endif
|
||||
$(QUIET_GEN)rst2man $< > $@
|
||||
|
||||
helpers-clean:
|
||||
$(call QUIET_CLEAN, eBPF_helpers-manpage)
|
||||
$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
|
||||
|
||||
helpers-install: helpers
|
||||
$(call QUIET_INSTALL, eBPF_helpers-manpage)
|
||||
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
|
||||
$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
|
||||
|
||||
helpers-uninstall:
|
||||
$(call QUIET_UNINST, eBPF_helpers-manpage)
|
||||
$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
|
||||
$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
|
||||
|
||||
.PHONY: helpers helpers-clean helpers-install helpers-uninstall
|
@ -1198,7 +1198,7 @@ static int cmd_run(char *num)
|
||||
else
|
||||
return CMD_OK;
|
||||
bpf_reset();
|
||||
} while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
|
||||
} while (pcap_next_pkt() && (!has_limit || (++i < pkts)));
|
||||
|
||||
rl_printf("bpf passes:%u fails:%u\n", pass, fail);
|
||||
|
||||
|
@ -185,13 +185,13 @@ ldx
|
||||
| OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
|
||||
if ($2 != 4 || $9 != 0xf) {
|
||||
fprintf(stderr, "ldxb offset not supported!\n");
|
||||
exit(0);
|
||||
exit(1);
|
||||
} else {
|
||||
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
|
||||
| OP_LDX number '*' '(' '[' number ']' '&' number ')' {
|
||||
if ($2 != 4 || $9 != 0xf) {
|
||||
fprintf(stderr, "ldxb offset not supported!\n");
|
||||
exit(0);
|
||||
exit(1);
|
||||
} else {
|
||||
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
|
||||
;
|
||||
@ -472,7 +472,7 @@ static void bpf_assert_max(void)
|
||||
{
|
||||
if (curr_instr >= BPF_MAXINSNS) {
|
||||
fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
|
||||
exit(0);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -522,7 +522,7 @@ static int bpf_find_insns_offset(const char *label)
|
||||
|
||||
if (ret == -ENOENT) {
|
||||
fprintf(stderr, "no such label \'%s\'!\n", label);
|
||||
exit(0);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -549,9 +549,11 @@ static uint8_t bpf_encode_jt_jf_offset(int off, int i)
|
||||
{
|
||||
int delta = off - i - 1;
|
||||
|
||||
if (delta < 0 || delta > 255)
|
||||
fprintf(stderr, "warning: insn #%d jumps to insn #%d, "
|
||||
if (delta < 0 || delta > 255) {
|
||||
fprintf(stderr, "error: insn #%d jumps to insn #%d, "
|
||||
"which is out of range\n", i, off);
|
||||
exit(1);
|
||||
}
|
||||
return (uint8_t) delta;
|
||||
}
|
||||
|
||||
|
1
tools/bpf/bpftool/.gitignore
vendored
1
tools/bpf/bpftool/.gitignore
vendored
@ -3,7 +3,6 @@
|
||||
/bootstrap/
|
||||
/bpftool
|
||||
bpftool*.8
|
||||
bpf-helpers.*
|
||||
FEATURE-DUMP.bpftool
|
||||
feature
|
||||
libbpf
|
||||
|
@ -16,15 +16,12 @@ prefix ?= /usr/local
|
||||
mandir ?= $(prefix)/man
|
||||
man8dir = $(mandir)/man8
|
||||
|
||||
# Load targets for building eBPF helpers man page.
|
||||
include ../../Makefile.helpers
|
||||
|
||||
MAN8_RST = $(wildcard bpftool*.rst)
|
||||
|
||||
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
|
||||
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
|
||||
|
||||
man: man8 helpers
|
||||
man: man8
|
||||
man8: $(DOC_MAN8)
|
||||
|
||||
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
|
||||
@ -46,16 +43,16 @@ ifndef RST2MAN_DEP
|
||||
endif
|
||||
$(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
|
||||
|
||||
clean: helpers-clean
|
||||
clean:
|
||||
$(call QUIET_CLEAN, Documentation)
|
||||
$(Q)$(RM) $(DOC_MAN8)
|
||||
|
||||
install: man helpers-install
|
||||
install: man
|
||||
$(call QUIET_INSTALL, Documentation-man)
|
||||
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
|
||||
$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
|
||||
|
||||
uninstall: helpers-uninstall
|
||||
uninstall:
|
||||
$(call QUIET_UNINST, Documentation-man)
|
||||
$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
|
||||
$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
|
||||
|
@ -36,6 +36,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
|
||||
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
|
||||
[BTF_KIND_VAR] = "VAR",
|
||||
[BTF_KIND_DATASEC] = "DATASEC",
|
||||
[BTF_KIND_FLOAT] = "FLOAT",
|
||||
};
|
||||
|
||||
struct btf_attach_table {
|
||||
@ -327,6 +328,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
|
||||
jsonw_end_array(w);
|
||||
break;
|
||||
}
|
||||
case BTF_KIND_FLOAT: {
|
||||
if (json_output)
|
||||
jsonw_uint_field(w, "size", t->size);
|
||||
else
|
||||
printf(" size=%u", t->size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -596,6 +596,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
|
||||
switch (BTF_INFO_KIND(t->info)) {
|
||||
case BTF_KIND_INT:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FLOAT:
|
||||
BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
|
||||
break;
|
||||
case BTF_KIND_STRUCT:
|
||||
|
@ -336,6 +336,10 @@ static void probe_kernel_image_config(const char *define_prefix)
|
||||
{ "CONFIG_BPF_JIT", },
|
||||
/* Avoid compiling eBPF interpreter (use JIT only) */
|
||||
{ "CONFIG_BPF_JIT_ALWAYS_ON", },
|
||||
/* Kernel BTF debug information available */
|
||||
{ "CONFIG_DEBUG_INFO_BTF", },
|
||||
/* Kernel module BTF debug information available */
|
||||
{ "CONFIG_DEBUG_INFO_BTF_MODULES", },
|
||||
|
||||
/* cgroups */
|
||||
{ "CONFIG_CGROUPS", },
|
||||
|
@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
|
||||
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
|
||||
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
|
||||
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
|
||||
else if (insn->src_reg == BPF_PSEUDO_FUNC)
|
||||
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
|
||||
"subprog[%+d]", insn->imm);
|
||||
else
|
||||
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
|
||||
"0x%llx", (unsigned long long)full_imm);
|
||||
|
@ -16,7 +16,10 @@ CFLAGS := -g -Wall
|
||||
|
||||
# Try to detect best kernel BTF source
|
||||
KERNEL_REL := $(shell uname -r)
|
||||
VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
|
||||
VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
|
||||
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
|
||||
../../../vmlinux /sys/kernel/btf/vmlinux \
|
||||
/boot/vmlinux-$(KERNEL_REL)
|
||||
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
|
||||
$(wildcard $(VMLINUX_BTF_PATHS))))
|
||||
|
||||
@ -66,12 +69,16 @@ $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
|
||||
$(QUIET_MKDIR)mkdir -p $@
|
||||
|
||||
$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
|
||||
ifeq ($(VMLINUX_H),)
|
||||
$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
|
||||
echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
|
||||
"specify its location." >&2; \
|
||||
exit 1;\
|
||||
fi
|
||||
$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
|
||||
else
|
||||
$(Q)cp "$(VMLINUX_H)" $@
|
||||
endif
|
||||
|
||||
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
|
||||
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
|
||||
|
@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
|
||||
const volatile pid_t targ_pid = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 10240);
|
||||
__type(key, u32);
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, u64);
|
||||
} start SEC(".maps");
|
||||
|
||||
@ -25,15 +25,20 @@ struct {
|
||||
|
||||
/* record enqueue timestamp */
|
||||
__always_inline
|
||||
static int trace_enqueue(u32 tgid, u32 pid)
|
||||
static int trace_enqueue(struct task_struct *t)
|
||||
{
|
||||
u64 ts;
|
||||
u32 pid = t->pid;
|
||||
u64 *ptr;
|
||||
|
||||
if (!pid || (targ_pid && targ_pid != pid))
|
||||
return 0;
|
||||
|
||||
ts = bpf_ktime_get_ns();
|
||||
bpf_map_update_elem(&start, &pid, &ts, 0);
|
||||
ptr = bpf_task_storage_get(&start, t, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!ptr)
|
||||
return 0;
|
||||
|
||||
*ptr = bpf_ktime_get_ns();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
|
||||
/* TP_PROTO(struct task_struct *p) */
|
||||
struct task_struct *p = (void *)ctx[0];
|
||||
|
||||
return trace_enqueue(p->tgid, p->pid);
|
||||
return trace_enqueue(p);
|
||||
}
|
||||
|
||||
SEC("tp_btf/sched_wakeup_new")
|
||||
@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
|
||||
/* TP_PROTO(struct task_struct *p) */
|
||||
struct task_struct *p = (void *)ctx[0];
|
||||
|
||||
return trace_enqueue(p->tgid, p->pid);
|
||||
return trace_enqueue(p);
|
||||
}
|
||||
|
||||
SEC("tp_btf/sched_switch")
|
||||
@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
|
||||
|
||||
/* ivcsw: treat like an enqueue event and store timestamp */
|
||||
if (prev->state == TASK_RUNNING)
|
||||
trace_enqueue(prev->tgid, prev->pid);
|
||||
trace_enqueue(prev);
|
||||
|
||||
pid = next->pid;
|
||||
|
||||
/* For pid mismatch, save a bpf_task_storage_get */
|
||||
if (!pid || (targ_pid && targ_pid != pid))
|
||||
return 0;
|
||||
|
||||
/* fetch timestamp and calculate delta */
|
||||
tsp = bpf_map_lookup_elem(&start, &pid);
|
||||
tsp = bpf_task_storage_get(&start, next, 0, 0);
|
||||
if (!tsp)
|
||||
return 0; /* missed enqueue */
|
||||
|
||||
@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
|
||||
&event, sizeof(event));
|
||||
|
||||
bpf_map_delete_elem(&start, &pid);
|
||||
bpf_task_storage_delete(&start, next);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -93,7 +93,717 @@ union bpf_iter_link_info {
|
||||
} map;
|
||||
};
|
||||
|
||||
/* BPF syscall commands, see bpf(2) man-page for details. */
|
||||
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
||||
/**
|
||||
* DOC: eBPF Syscall Preamble
|
||||
*
|
||||
* The operation to be performed by the **bpf**\ () system call is determined
|
||||
* by the *cmd* argument. Each operation takes an accompanying argument,
|
||||
* provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
|
||||
* below). The size argument is the size of the union pointed to by *attr*.
|
||||
*/
|
||||
/**
|
||||
* DOC: eBPF Syscall Commands
|
||||
*
|
||||
* BPF_MAP_CREATE
|
||||
* Description
|
||||
* Create a map and return a file descriptor that refers to the
|
||||
* map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
|
||||
* is automatically enabled for the new file descriptor.
|
||||
*
|
||||
* Applying **close**\ (2) to the file descriptor returned by
|
||||
* **BPF_MAP_CREATE** will delete the map (but see NOTES).
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_MAP_LOOKUP_ELEM
|
||||
* Description
|
||||
* Look up an element with a given *key* in the map referred to
|
||||
* by the file descriptor *map_fd*.
|
||||
*
|
||||
* The *flags* argument may be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up the value of a spin-locked map without
|
||||
* returning the lock. This must be specified if the
|
||||
* elements contain a spinlock.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_UPDATE_ELEM
|
||||
* Description
|
||||
* Create or update an element (key/value pair) in a specified map.
|
||||
*
|
||||
* The *flags* argument should be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_ANY**
|
||||
* Create a new element or update an existing element.
|
||||
* **BPF_NOEXIST**
|
||||
* Create a new element only if it did not exist.
|
||||
* **BPF_EXIST**
|
||||
* Update an existing element.
|
||||
* **BPF_F_LOCK**
|
||||
* Update a spin_lock-ed map element.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
|
||||
* **E2BIG**, **EEXIST**, or **ENOENT**.
|
||||
*
|
||||
* **E2BIG**
|
||||
* The number of elements in the map reached the
|
||||
* *max_entries* limit specified at map creation time.
|
||||
* **EEXIST**
|
||||
* If *flags* specifies **BPF_NOEXIST** and the element
|
||||
* with *key* already exists in the map.
|
||||
* **ENOENT**
|
||||
* If *flags* specifies **BPF_EXIST** and the element with
|
||||
* *key* does not exist in the map.
|
||||
*
|
||||
* BPF_MAP_DELETE_ELEM
|
||||
* Description
|
||||
* Look up and delete an element by key in a specified map.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_GET_NEXT_KEY
|
||||
* Description
|
||||
* Look up an element by key in a specified map and return the key
|
||||
* of the next element. Can be used to iterate over all elements
|
||||
* in the map.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* The following cases can be used to iterate over all elements of
|
||||
* the map:
|
||||
*
|
||||
* * If *key* is not found, the operation returns zero and sets
|
||||
* the *next_key* pointer to the key of the first element.
|
||||
* * If *key* is found, the operation returns zero and sets the
|
||||
* *next_key* pointer to the key of the next element.
|
||||
* * If *key* is the last element, returns -1 and *errno* is set
|
||||
* to **ENOENT**.
|
||||
*
|
||||
* May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
|
||||
* **EINVAL** on error.
|
||||
*
|
||||
* BPF_PROG_LOAD
|
||||
* Description
|
||||
* Verify and load an eBPF program, returning a new file
|
||||
* descriptor associated with the program.
|
||||
*
|
||||
* Applying **close**\ (2) to the file descriptor returned by
|
||||
* **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
|
||||
*
|
||||
* The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
|
||||
* automatically enabled for the new file descriptor.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_OBJ_PIN
|
||||
* Description
|
||||
* Pin an eBPF program or map referred by the specified *bpf_fd*
|
||||
* to the provided *pathname* on the filesystem.
|
||||
*
|
||||
* The *pathname* argument must not contain a dot (".").
|
||||
*
|
||||
* On success, *pathname* retains a reference to the eBPF object,
|
||||
* preventing deallocation of the object when the original
|
||||
* *bpf_fd* is closed. This allow the eBPF object to live beyond
|
||||
* **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
|
||||
* process.
|
||||
*
|
||||
* Applying **unlink**\ (2) or similar calls to the *pathname*
|
||||
* unpins the object from the filesystem, removing the reference.
|
||||
* If no other file descriptors or filesystem nodes refer to the
|
||||
* same object, it will be deallocated (see NOTES).
|
||||
*
|
||||
* The filesystem type for the parent directory of *pathname* must
|
||||
* be **BPF_FS_MAGIC**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_OBJ_GET
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF object pinned to the
|
||||
* specified *pathname*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_PROG_ATTACH
|
||||
* Description
|
||||
* Attach an eBPF program to a *target_fd* at the specified
|
||||
* *attach_type* hook.
|
||||
*
|
||||
* The *attach_type* specifies the eBPF attachment point to
|
||||
* attach the program to, and must be one of *bpf_attach_type*
|
||||
* (see below).
|
||||
*
|
||||
* The *attach_bpf_fd* must be a valid file descriptor for a
|
||||
* loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
|
||||
* or sock_ops type corresponding to the specified *attach_type*.
|
||||
*
|
||||
* The *target_fd* must be a valid file descriptor for a kernel
|
||||
* object which depends on the attach type of *attach_bpf_fd*:
|
||||
*
|
||||
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SKB**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
|
||||
* **BPF_PROG_TYPE_SOCK_OPS**
|
||||
*
|
||||
* Control Group v2 hierarchy with the eBPF controller
|
||||
* enabled. Requires the kernel to be compiled with
|
||||
* **CONFIG_CGROUP_BPF**.
|
||||
*
|
||||
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
|
||||
*
|
||||
* Network namespace (eg /proc/self/ns/net).
|
||||
*
|
||||
* **BPF_PROG_TYPE_LIRC_MODE2**
|
||||
*
|
||||
* LIRC device path (eg /dev/lircN). Requires the kernel
|
||||
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
|
||||
*
|
||||
* **BPF_PROG_TYPE_SK_SKB**,
|
||||
* **BPF_PROG_TYPE_SK_MSG**
|
||||
*
|
||||
* eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_DETACH
|
||||
* Description
|
||||
* Detach the eBPF program associated with the *target_fd* at the
|
||||
* hook specified by *attach_type*. The program must have been
|
||||
* previously attached using **BPF_PROG_ATTACH**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_TEST_RUN
|
||||
* Description
|
||||
* Run the eBPF program associated with the *prog_fd* a *repeat*
|
||||
* number of times against a provided program context *ctx_in* and
|
||||
* data *data_in*, and return the modified program context
|
||||
* *ctx_out*, *data_out* (for example, packet data), result of the
|
||||
* execution *retval*, and *duration* of the test run.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* **ENOSPC**
|
||||
* Either *data_size_out* or *ctx_size_out* is too small.
|
||||
* **ENOTSUPP**
|
||||
* This command is not supported by the program type of
|
||||
* the program referred to by *prog_fd*.
|
||||
*
|
||||
* BPF_PROG_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next eBPF program currently loaded into the kernel.
|
||||
*
|
||||
* Looks for the eBPF program with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other eBPF programs
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next eBPF map currently loaded into the kernel.
|
||||
*
|
||||
* Looks for the eBPF map with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other eBPF maps
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF program corresponding to
|
||||
* *prog_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_MAP_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF map corresponding to
|
||||
* *map_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_OBJ_GET_INFO_BY_FD
|
||||
* Description
|
||||
* Obtain information about the eBPF object corresponding to
|
||||
* *bpf_fd*.
|
||||
*
|
||||
* Populates up to *info_len* bytes of *info*, which will be in
|
||||
* one of the following formats depending on the eBPF object type
|
||||
* of *bpf_fd*:
|
||||
*
|
||||
* * **struct bpf_prog_info**
|
||||
* * **struct bpf_map_info**
|
||||
* * **struct bpf_btf_info**
|
||||
* * **struct bpf_link_info**
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_QUERY
|
||||
* Description
|
||||
* Obtain information about eBPF programs associated with the
|
||||
* specified *attach_type* hook.
|
||||
*
|
||||
* The *target_fd* must be a valid file descriptor for a kernel
|
||||
* object which depends on the attach type of *attach_bpf_fd*:
|
||||
*
|
||||
* **BPF_PROG_TYPE_CGROUP_DEVICE**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SKB**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
|
||||
* **BPF_PROG_TYPE_CGROUP_SYSCTL**,
|
||||
* **BPF_PROG_TYPE_SOCK_OPS**
|
||||
*
|
||||
* Control Group v2 hierarchy with the eBPF controller
|
||||
* enabled. Requires the kernel to be compiled with
|
||||
* **CONFIG_CGROUP_BPF**.
|
||||
*
|
||||
* **BPF_PROG_TYPE_FLOW_DISSECTOR**
|
||||
*
|
||||
* Network namespace (eg /proc/self/ns/net).
|
||||
*
|
||||
* **BPF_PROG_TYPE_LIRC_MODE2**
|
||||
*
|
||||
* LIRC device path (eg /dev/lircN). Requires the kernel
|
||||
* to be compiled with **CONFIG_BPF_LIRC_MODE2**.
|
||||
*
|
||||
* **BPF_PROG_QUERY** always fetches the number of programs
|
||||
* attached and the *attach_flags* which were used to attach those
|
||||
* programs. Additionally, if *prog_ids* is nonzero and the number
|
||||
* of attached programs is less than *prog_cnt*, populates
|
||||
* *prog_ids* with the eBPF program ids of the programs attached
|
||||
* at *target_fd*.
|
||||
*
|
||||
* The following flags may alter the result:
|
||||
*
|
||||
* **BPF_F_QUERY_EFFECTIVE**
|
||||
* Only return information regarding programs which are
|
||||
* currently effective at the specified *target_fd*.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_RAW_TRACEPOINT_OPEN
|
||||
* Description
|
||||
* Attach an eBPF program to a tracepoint *name* to access kernel
|
||||
* internal arguments of the tracepoint in their raw form.
|
||||
*
|
||||
* The *prog_fd* must be a valid file descriptor associated with
|
||||
* a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
|
||||
*
|
||||
* No ABI guarantees are made about the content of tracepoint
|
||||
* arguments exposed to the corresponding eBPF program.
|
||||
*
|
||||
* Applying **close**\ (2) to the file descriptor returned by
|
||||
* **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_BTF_LOAD
|
||||
* Description
|
||||
* Verify and load BPF Type Format (BTF) metadata into the kernel,
|
||||
* returning a new file descriptor associated with the metadata.
|
||||
* BTF is described in more detail at
|
||||
* https://www.kernel.org/doc/html/latest/bpf/btf.html.
|
||||
*
|
||||
* The *btf* parameter must point to valid memory providing
|
||||
* *btf_size* bytes of BTF binary metadata.
|
||||
*
|
||||
* The returned file descriptor can be passed to other **bpf**\ ()
|
||||
* subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
|
||||
* associate the BTF with those objects.
|
||||
*
|
||||
* Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
|
||||
* parameters to specify a *btf_log_buf*, *btf_log_size* and
|
||||
* *btf_log_level* which allow the kernel to return freeform log
|
||||
* output regarding the BTF verification process.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_BTF_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the BPF Type Format (BTF)
|
||||
* corresponding to *btf_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_TASK_FD_QUERY
|
||||
* Description
|
||||
* Obtain information about eBPF programs associated with the
|
||||
* target process identified by *pid* and *fd*.
|
||||
*
|
||||
* If the *pid* and *fd* are associated with a tracepoint, kprobe
|
||||
* or uprobe perf event, then the *prog_id* and *fd_type* will
|
||||
* be populated with the eBPF program id and file descriptor type
|
||||
* of type **bpf_task_fd_type**. If associated with a kprobe or
|
||||
* uprobe, the *probe_offset* and *probe_addr* will also be
|
||||
* populated. Optionally, if *buf* is provided, then up to
|
||||
* *buf_len* bytes of *buf* will be populated with the name of
|
||||
* the tracepoint, kprobe or uprobe.
|
||||
*
|
||||
* The resulting *prog_id* may be introspected in deeper detail
|
||||
* using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_LOOKUP_AND_DELETE_ELEM
|
||||
* Description
|
||||
* Look up an element with the given *key* in the map referred to
|
||||
* by the file descriptor *fd*, and if found, delete the element.
|
||||
*
|
||||
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
|
||||
* implement this command as a "pop" operation, deleting the top
|
||||
* element rather than one corresponding to *key*.
|
||||
* The *key* and *key_len* parameters should be zeroed when
|
||||
* issuing this operation for these map types.
|
||||
*
|
||||
* This command is only valid for the following map types:
|
||||
* * **BPF_MAP_TYPE_QUEUE**
|
||||
* * **BPF_MAP_TYPE_STACK**
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_FREEZE
|
||||
* Description
|
||||
* Freeze the permissions of the specified map.
|
||||
*
|
||||
* Write permissions may be frozen by passing zero *flags*.
|
||||
* Upon success, no future syscall invocations may alter the
|
||||
* map state of *map_fd*. Write operations from eBPF programs
|
||||
* are still possible for a frozen map.
|
||||
*
|
||||
* Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_BTF_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next BPF Type Format (BTF) object currently loaded
|
||||
* into the kernel.
|
||||
*
|
||||
* Looks for the BTF object with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other BTF objects
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_LOOKUP_BATCH
|
||||
* Description
|
||||
* Iterate and fetch multiple elements in a map.
|
||||
*
|
||||
* Two opaque values are used to manage batch operations,
|
||||
* *in_batch* and *out_batch*. Initially, *in_batch* must be set
|
||||
* to NULL to begin the batched operation. After each subsequent
|
||||
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
|
||||
* *out_batch* as the *in_batch* for the next operation to
|
||||
* continue iteration from the current point.
|
||||
*
|
||||
* The *keys* and *values* are output parameters which must point
|
||||
* to memory large enough to hold *count* items based on the key
|
||||
* and value size of the map *map_fd*. The *keys* buffer must be
|
||||
* of *key_size* * *count*. The *values* buffer must be of
|
||||
* *value_size* * *count*.
|
||||
*
|
||||
* The *elem_flags* argument may be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up the value of a spin-locked map without
|
||||
* returning the lock. This must be specified if the
|
||||
* elements contain a spinlock.
|
||||
*
|
||||
* On success, *count* elements from the map are copied into the
|
||||
* user buffer, with the keys copied into *keys* and the values
|
||||
* copied into the corresponding indices in *values*.
|
||||
*
|
||||
* If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
* is set to the number of successfully processed elements.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* May set *errno* to **ENOSPC** to indicate that *keys* or
|
||||
* *values* is too small to dump an entire bucket during
|
||||
* iteration of a hash-based map type.
|
||||
*
|
||||
* BPF_MAP_LOOKUP_AND_DELETE_BATCH
|
||||
* Description
|
||||
* Iterate and delete all elements in a map.
|
||||
*
|
||||
* This operation has the same behavior as
|
||||
* **BPF_MAP_LOOKUP_BATCH** with two exceptions:
|
||||
*
|
||||
* * Every element that is successfully returned is also deleted
|
||||
* from the map. This is at least *count* elements. Note that
|
||||
* *count* is both an input and an output parameter.
|
||||
* * Upon returning with *errno* set to **EFAULT**, up to
|
||||
* *count* elements may be deleted without returning the keys
|
||||
* and values of the deleted elements.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_MAP_UPDATE_BATCH
|
||||
* Description
|
||||
* Update multiple elements in a map by *key*.
|
||||
*
|
||||
* The *keys* and *values* are input parameters which must point
|
||||
* to memory large enough to hold *count* items based on the key
|
||||
* and value size of the map *map_fd*. The *keys* buffer must be
|
||||
* of *key_size* * *count*. The *values* buffer must be of
|
||||
* *value_size* * *count*.
|
||||
*
|
||||
* Each element specified in *keys* is sequentially updated to the
|
||||
* value in the corresponding index in *values*. The *in_batch*
|
||||
* and *out_batch* parameters are ignored and should be zeroed.
|
||||
*
|
||||
* The *elem_flags* argument should be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_ANY**
|
||||
* Create new elements or update a existing elements.
|
||||
* **BPF_NOEXIST**
|
||||
* Create new elements only if they do not exist.
|
||||
* **BPF_EXIST**
|
||||
* Update existing elements.
|
||||
* **BPF_F_LOCK**
|
||||
* Update spin_lock-ed map elements. This must be
|
||||
* specified if the map value contains a spinlock.
|
||||
*
|
||||
* On success, *count* elements from the map are updated.
|
||||
*
|
||||
* If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
* is set to the number of successfully processed elements.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
|
||||
* **E2BIG**. **E2BIG** indicates that the number of elements in
|
||||
* the map reached the *max_entries* limit specified at map
|
||||
* creation time.
|
||||
*
|
||||
* May set *errno* to one of the following error codes under
|
||||
* specific circumstances:
|
||||
*
|
||||
* **EEXIST**
|
||||
* If *flags* specifies **BPF_NOEXIST** and the element
|
||||
* with *key* already exists in the map.
|
||||
* **ENOENT**
|
||||
* If *flags* specifies **BPF_EXIST** and the element with
|
||||
* *key* does not exist in the map.
|
||||
*
|
||||
* BPF_MAP_DELETE_BATCH
|
||||
* Description
|
||||
* Delete multiple elements in a map by *key*.
|
||||
*
|
||||
* The *keys* parameter is an input parameter which must point
|
||||
* to memory large enough to hold *count* items based on the key
|
||||
* size of the map *map_fd*, that is, *key_size* * *count*.
|
||||
*
|
||||
* Each element specified in *keys* is sequentially deleted. The
|
||||
* *in_batch*, *out_batch*, and *values* parameters are ignored
|
||||
* and should be zeroed.
|
||||
*
|
||||
* The *elem_flags* argument may be specified as one of the
|
||||
* following:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up the value of a spin-locked map without
|
||||
* returning the lock. This must be specified if the
|
||||
* elements contain a spinlock.
|
||||
*
|
||||
* On success, *count* elements from the map are updated.
|
||||
*
|
||||
* If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
* is set to the number of successfully processed elements. If
|
||||
* *errno* is **EFAULT**, up to *count* elements may be been
|
||||
* deleted.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_LINK_CREATE
|
||||
* Description
|
||||
* Attach an eBPF program to a *target_fd* at the specified
|
||||
* *attach_type* hook and return a file descriptor handle for
|
||||
* managing the link.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_LINK_UPDATE
|
||||
* Description
|
||||
* Update the eBPF program in the specified *link_fd* to
|
||||
* *new_prog_fd*.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_LINK_GET_FD_BY_ID
|
||||
* Description
|
||||
* Open a file descriptor for the eBPF Link corresponding to
|
||||
* *link_id*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_LINK_GET_NEXT_ID
|
||||
* Description
|
||||
* Fetch the next eBPF link currently loaded into the kernel.
|
||||
*
|
||||
* Looks for the eBPF link with an id greater than *start_id*
|
||||
* and updates *next_id* on success. If no other eBPF links
|
||||
* remain with ids higher than *start_id*, returns -1 and sets
|
||||
* *errno* to **ENOENT**.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, or when no id remains, -1
|
||||
* is returned and *errno* is set appropriately.
|
||||
*
|
||||
* BPF_ENABLE_STATS
|
||||
* Description
|
||||
* Enable eBPF runtime statistics gathering.
|
||||
*
|
||||
* Runtime statistics gathering for the eBPF runtime is disabled
|
||||
* by default to minimize the corresponding performance overhead.
|
||||
* This command enables statistics globally.
|
||||
*
|
||||
* Multiple programs may independently enable statistics.
|
||||
* After gathering the desired statistics, eBPF runtime statistics
|
||||
* may be disabled again by calling **close**\ (2) for the file
|
||||
* descriptor returned by this function. Statistics will only be
|
||||
* disabled system-wide when all outstanding file descriptors
|
||||
* returned by prior calls for this subcommand are closed.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_ITER_CREATE
|
||||
* Description
|
||||
* Create an iterator on top of the specified *link_fd* (as
|
||||
* previously created using **BPF_LINK_CREATE**) and return a
|
||||
* file descriptor that can be used to trigger the iteration.
|
||||
*
|
||||
* If the resulting file descriptor is pinned to the filesystem
|
||||
* using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
|
||||
* for that path will trigger the iterator to read kernel state
|
||||
* using the eBPF program attached to *link_fd*.
|
||||
*
|
||||
* Return
|
||||
* A new file descriptor (a nonnegative integer), or -1 if an
|
||||
* error occurred (in which case, *errno* is set appropriately).
|
||||
*
|
||||
* BPF_LINK_DETACH
|
||||
* Description
|
||||
* Forcefully detach the specified *link_fd* from its
|
||||
* corresponding attachment point.
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* BPF_PROG_BIND_MAP
|
||||
* Description
|
||||
* Bind a map to the lifetime of an eBPF program.
|
||||
*
|
||||
* The map identified by *map_fd* is bound to the program
|
||||
* identified by *prog_fd* and only released when *prog_fd* is
|
||||
* released. This may be used in cases where metadata should be
|
||||
* associated with a program which otherwise does not contain any
|
||||
* references to the map (for example, embedded in the eBPF
|
||||
* program instructions).
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
* is set appropriately.
|
||||
*
|
||||
* NOTES
|
||||
* eBPF objects (maps and programs) can be shared between processes.
|
||||
*
|
||||
* * After **fork**\ (2), the child inherits file descriptors
|
||||
* referring to the same eBPF objects.
|
||||
* * File descriptors referring to eBPF objects can be transferred over
|
||||
* **unix**\ (7) domain sockets.
|
||||
* * File descriptors referring to eBPF objects can be duplicated in the
|
||||
* usual way, using **dup**\ (2) and similar calls.
|
||||
* * File descriptors referring to eBPF objects can be pinned to the
|
||||
* filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
|
||||
*
|
||||
* An eBPF object is deallocated only after all file descriptors referring
|
||||
* to the object have been closed and no references remain pinned to the
|
||||
* filesystem or attached (for example, bound to a program or device).
|
||||
*/
|
||||
enum bpf_cmd {
|
||||
BPF_MAP_CREATE,
|
||||
BPF_MAP_LOOKUP_ELEM,
|
||||
@ -393,6 +1103,15 @@ enum bpf_link_type {
|
||||
* is struct/union.
|
||||
*/
|
||||
#define BPF_PSEUDO_BTF_ID 3
|
||||
/* insn[0].src_reg: BPF_PSEUDO_FUNC
|
||||
* insn[0].imm: insn offset to the func
|
||||
* insn[1].imm: 0
|
||||
* insn[0].off: 0
|
||||
* insn[1].off: 0
|
||||
* ldimm64 rewrite: address of the function
|
||||
* verifier type: PTR_TO_FUNC.
|
||||
*/
|
||||
#define BPF_PSEUDO_FUNC 4
|
||||
|
||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||
* offset to another bpf function
|
||||
@ -720,7 +1439,7 @@ union bpf_attr {
|
||||
* parsed and used to produce a manual page. The workflow is the following,
|
||||
* and requires the rst2man utility:
|
||||
*
|
||||
* $ ./scripts/bpf_helpers_doc.py \
|
||||
* $ ./scripts/bpf_doc.py \
|
||||
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
|
||||
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
|
||||
* $ man /tmp/bpf-helpers.7
|
||||
@ -1765,6 +2484,10 @@ union bpf_attr {
|
||||
* Use with ENCAP_L3/L4 flags to further specify the tunnel
|
||||
* type; *len* is the length of the inner MAC header.
|
||||
*
|
||||
* * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
|
||||
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
|
||||
* L2 type as Ethernet.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
@ -3909,6 +4632,34 @@ union bpf_attr {
|
||||
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
|
||||
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
|
||||
*
|
||||
* long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
|
||||
* Description
|
||||
* For each element in **map**, call **callback_fn** function with
|
||||
* **map**, **callback_ctx** and other map-specific parameters.
|
||||
* The **callback_fn** should be a static function and
|
||||
* the **callback_ctx** should be a pointer to the stack.
|
||||
* The **flags** is used to control certain aspects of the helper.
|
||||
* Currently, the **flags** must be 0.
|
||||
*
|
||||
* The following are a list of supported map types and their
|
||||
* respective expected callback signatures:
|
||||
*
|
||||
* BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
|
||||
* BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
||||
* BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
|
||||
*
|
||||
* long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
|
||||
*
|
||||
* For per_cpu maps, the map_value is the value on the cpu where the
|
||||
* bpf_prog is running.
|
||||
*
|
||||
* If **callback_fn** return 0, the helper will continue to the next
|
||||
* element. If return value is 1, the helper will skip the rest of
|
||||
* elements and return. Other return values are not used now.
|
||||
*
|
||||
* Return
|
||||
* The number of traversed map elements for success, **-EINVAL** for
|
||||
* invalid **flags**.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -4075,6 +4826,7 @@ union bpf_attr {
|
||||
FN(ima_inode_hash), \
|
||||
FN(sock_from_file), \
|
||||
FN(check_mtu), \
|
||||
FN(for_each_map_elem), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
@ -4168,6 +4920,7 @@ enum {
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -5205,7 +5958,10 @@ struct bpf_pidns_info {
|
||||
|
||||
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
|
||||
struct bpf_sk_lookup {
|
||||
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
|
||||
union {
|
||||
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
|
||||
__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
|
||||
};
|
||||
|
||||
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
|
||||
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
|
||||
|
@ -52,7 +52,7 @@ struct btf_type {
|
||||
};
|
||||
};
|
||||
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
|
||||
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
|
||||
#define BTF_INFO_KFLAG(info) ((info) >> 31)
|
||||
|
||||
@ -72,7 +72,8 @@ struct btf_type {
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
#define BTF_KIND_VAR 14 /* Variable */
|
||||
#define BTF_KIND_DATASEC 15 /* Section */
|
||||
#define BTF_KIND_MAX BTF_KIND_DATASEC
|
||||
#define BTF_KIND_FLOAT 16 /* Floating point */
|
||||
#define BTF_KIND_MAX BTF_KIND_FLOAT
|
||||
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
|
||||
|
||||
/* For some specific BTF_KIND, "struct btf_type" is immediately
|
||||
|
@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
|
||||
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
|
||||
|
||||
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
|
||||
$(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
|
||||
$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
|
||||
--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
|
||||
|
||||
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
|
||||
|
@ -291,6 +291,7 @@ static int btf_type_size(const struct btf_type *t)
|
||||
case BTF_KIND_PTR:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FUNC:
|
||||
case BTF_KIND_FLOAT:
|
||||
return base_size;
|
||||
case BTF_KIND_INT:
|
||||
return base_size + sizeof(__u32);
|
||||
@ -338,6 +339,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
|
||||
case BTF_KIND_PTR:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FUNC:
|
||||
case BTF_KIND_FLOAT:
|
||||
return 0;
|
||||
case BTF_KIND_INT:
|
||||
*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
|
||||
@ -578,6 +580,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
|
||||
case BTF_KIND_UNION:
|
||||
case BTF_KIND_ENUM:
|
||||
case BTF_KIND_DATASEC:
|
||||
case BTF_KIND_FLOAT:
|
||||
size = t->size;
|
||||
goto done;
|
||||
case BTF_KIND_PTR:
|
||||
@ -621,6 +624,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
|
||||
switch (kind) {
|
||||
case BTF_KIND_INT:
|
||||
case BTF_KIND_ENUM:
|
||||
case BTF_KIND_FLOAT:
|
||||
return min(btf_ptr_sz(btf), (size_t)t->size);
|
||||
case BTF_KIND_PTR:
|
||||
return btf_ptr_sz(btf);
|
||||
@ -1756,6 +1760,47 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
|
||||
return btf_commit_type(btf, sz);
|
||||
}
|
||||
|
||||
/*
|
||||
* Append new BTF_KIND_FLOAT type with:
|
||||
* - *name* - non-empty, non-NULL type name;
|
||||
* - *sz* - size of the type, in bytes;
|
||||
* Returns:
|
||||
* - >0, type ID of newly added BTF type;
|
||||
* - <0, on error.
|
||||
*/
|
||||
int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
|
||||
{
|
||||
struct btf_type *t;
|
||||
int sz, name_off;
|
||||
|
||||
/* non-empty name */
|
||||
if (!name || !name[0])
|
||||
return -EINVAL;
|
||||
|
||||
/* byte_sz must be one of the explicitly allowed values */
|
||||
if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
|
||||
byte_sz != 16)
|
||||
return -EINVAL;
|
||||
|
||||
if (btf_ensure_modifiable(btf))
|
||||
return -ENOMEM;
|
||||
|
||||
sz = sizeof(struct btf_type);
|
||||
t = btf_add_type_mem(btf, sz);
|
||||
if (!t)
|
||||
return -ENOMEM;
|
||||
|
||||
name_off = btf__add_str(btf, name);
|
||||
if (name_off < 0)
|
||||
return name_off;
|
||||
|
||||
t->name_off = name_off;
|
||||
t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
|
||||
t->size = byte_sz;
|
||||
|
||||
return btf_commit_type(btf, sz);
|
||||
}
|
||||
|
||||
/* it's completely legal to append BTF types with type IDs pointing forward to
|
||||
* types that haven't been appended yet, so we only make sure that id looks
|
||||
* sane, we can't guarantee that ID will always be valid
|
||||
@ -1883,7 +1928,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
|
||||
* - *byte_sz* - size of the struct, in bytes;
|
||||
*
|
||||
* Struct initially has no fields in it. Fields can be added by
|
||||
* btf__add_field() right after btf__add_struct() succeeds.
|
||||
* btf__add_field() right after btf__add_struct() succeeds.
|
||||
*
|
||||
* Returns:
|
||||
* - >0, type ID of newly added BTF type;
|
||||
@ -3626,6 +3671,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
|
||||
case BTF_KIND_FWD:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FUNC:
|
||||
case BTF_KIND_FLOAT:
|
||||
h = btf_hash_common(t);
|
||||
break;
|
||||
case BTF_KIND_INT:
|
||||
@ -3722,6 +3768,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
|
||||
break;
|
||||
|
||||
case BTF_KIND_FWD:
|
||||
case BTF_KIND_FLOAT:
|
||||
h = btf_hash_common(t);
|
||||
for_each_dedup_cand(d, hash_entry, h) {
|
||||
cand_id = (__u32)(long)hash_entry->value;
|
||||
@ -3983,6 +4030,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
|
||||
return btf_compat_enum(cand_type, canon_type);
|
||||
|
||||
case BTF_KIND_FWD:
|
||||
case BTF_KIND_FLOAT:
|
||||
return btf_equal_common(cand_type, canon_type);
|
||||
|
||||
case BTF_KIND_CONST:
|
||||
@ -4479,6 +4527,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
|
||||
switch (btf_kind(t)) {
|
||||
case BTF_KIND_INT:
|
||||
case BTF_KIND_ENUM:
|
||||
case BTF_KIND_FLOAT:
|
||||
break;
|
||||
|
||||
case BTF_KIND_FWD:
|
||||
|
@ -95,6 +95,7 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
|
||||
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
|
||||
|
||||
LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
|
||||
LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
|
||||
LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
|
||||
LIBBPF_API int btf__add_array(struct btf *btf,
|
||||
int index_type_id, int elem_type_id, __u32 nr_elems);
|
||||
@ -294,6 +295,11 @@ static inline bool btf_is_datasec(const struct btf_type *t)
|
||||
return btf_kind(t) == BTF_KIND_DATASEC;
|
||||
}
|
||||
|
||||
static inline bool btf_is_float(const struct btf_type *t)
|
||||
{
|
||||
return btf_kind(t) == BTF_KIND_FLOAT;
|
||||
}
|
||||
|
||||
static inline __u8 btf_int_encoding(const struct btf_type *t)
|
||||
{
|
||||
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
|
||||
|
@ -279,6 +279,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
|
||||
case BTF_KIND_INT:
|
||||
case BTF_KIND_ENUM:
|
||||
case BTF_KIND_FWD:
|
||||
case BTF_KIND_FLOAT:
|
||||
break;
|
||||
|
||||
case BTF_KIND_VOLATILE:
|
||||
@ -453,6 +454,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
|
||||
|
||||
switch (btf_kind(t)) {
|
||||
case BTF_KIND_INT:
|
||||
case BTF_KIND_FLOAT:
|
||||
tstate->order_state = ORDERED;
|
||||
return 0;
|
||||
|
||||
@ -1133,6 +1135,7 @@ skip_mod:
|
||||
case BTF_KIND_STRUCT:
|
||||
case BTF_KIND_UNION:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FLOAT:
|
||||
goto done;
|
||||
default:
|
||||
pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
|
||||
@ -1247,6 +1250,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
|
||||
|
||||
switch (kind) {
|
||||
case BTF_KIND_INT:
|
||||
case BTF_KIND_FLOAT:
|
||||
btf_dump_emit_mods(d, decls);
|
||||
name = btf_name_of(d, t->name_off);
|
||||
btf_dump_printf(d, "%s", name);
|
||||
|
@ -178,6 +178,8 @@ enum kern_feature_id {
|
||||
FEAT_PROG_BIND_MAP,
|
||||
/* Kernel support for module BTFs */
|
||||
FEAT_MODULE_BTF,
|
||||
/* BTF_KIND_FLOAT support */
|
||||
FEAT_BTF_FLOAT,
|
||||
__FEAT_CNT,
|
||||
};
|
||||
|
||||
@ -188,6 +190,7 @@ enum reloc_type {
|
||||
RELO_CALL,
|
||||
RELO_DATA,
|
||||
RELO_EXTERN,
|
||||
RELO_SUBPROG_ADDR,
|
||||
};
|
||||
|
||||
struct reloc_desc {
|
||||
@ -574,6 +577,16 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
|
||||
insn->off == 0;
|
||||
}
|
||||
|
||||
static bool is_ldimm64(struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
|
||||
}
|
||||
|
||||
static bool insn_is_pseudo_func(struct bpf_insn *insn)
|
||||
{
|
||||
return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
|
||||
const char *name, size_t sec_idx, const char *sec_name,
|
||||
@ -1935,6 +1948,7 @@ static const char *btf_kind_str(const struct btf_type *t)
|
||||
case BTF_KIND_FUNC_PROTO: return "func_proto";
|
||||
case BTF_KIND_VAR: return "var";
|
||||
case BTF_KIND_DATASEC: return "datasec";
|
||||
case BTF_KIND_FLOAT: return "float";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
@ -2384,15 +2398,17 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
|
||||
{
|
||||
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
|
||||
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
|
||||
bool has_float = kernel_supports(FEAT_BTF_FLOAT);
|
||||
bool has_func = kernel_supports(FEAT_BTF_FUNC);
|
||||
|
||||
return !has_func || !has_datasec || !has_func_global;
|
||||
return !has_func || !has_datasec || !has_func_global || !has_float;
|
||||
}
|
||||
|
||||
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
|
||||
{
|
||||
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
|
||||
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
|
||||
bool has_float = kernel_supports(FEAT_BTF_FLOAT);
|
||||
bool has_func = kernel_supports(FEAT_BTF_FUNC);
|
||||
struct btf_type *t;
|
||||
int i, j, vlen;
|
||||
@ -2445,6 +2461,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
|
||||
} else if (!has_func_global && btf_is_func(t)) {
|
||||
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
|
||||
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
|
||||
} else if (!has_float && btf_is_float(t)) {
|
||||
/* replace FLOAT with an equally-sized empty STRUCT;
|
||||
* since C compilers do not accept e.g. "float" as a
|
||||
* valid struct name, make it anonymous
|
||||
*/
|
||||
t->name_off = 0;
|
||||
t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2974,6 +2997,23 @@ static bool sym_is_extern(const GElf_Sym *sym)
|
||||
GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
|
||||
}
|
||||
|
||||
static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
|
||||
{
|
||||
int bind = GELF_ST_BIND(sym->st_info);
|
||||
int type = GELF_ST_TYPE(sym->st_info);
|
||||
|
||||
/* in .text section */
|
||||
if (sym->st_shndx != text_shndx)
|
||||
return false;
|
||||
|
||||
/* local function */
|
||||
if (bind == STB_LOCAL && type == STT_SECTION)
|
||||
return true;
|
||||
|
||||
/* global function */
|
||||
return bind == STB_GLOBAL && type == STT_FUNC;
|
||||
}
|
||||
|
||||
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
|
||||
{
|
||||
const struct btf_type *t;
|
||||
@ -3395,7 +3435,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
|
||||
if (!is_ldimm64(insn)) {
|
||||
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
|
||||
prog->name, sym_name, insn_idx, insn->code);
|
||||
return -LIBBPF_ERRNO__RELOC;
|
||||
@ -3430,6 +3470,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
|
||||
return -LIBBPF_ERRNO__RELOC;
|
||||
}
|
||||
|
||||
/* loading subprog addresses */
|
||||
if (sym_is_subprog(sym, obj->efile.text_shndx)) {
|
||||
/* global_func: sym->st_value = offset in the section, insn->imm = 0.
|
||||
* local_func: sym->st_value = 0, insn->imm = offset in the section.
|
||||
*/
|
||||
if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
|
||||
pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
|
||||
prog->name, sym_name, (size_t)sym->st_value, insn->imm);
|
||||
return -LIBBPF_ERRNO__RELOC;
|
||||
}
|
||||
|
||||
reloc_desc->type = RELO_SUBPROG_ADDR;
|
||||
reloc_desc->insn_idx = insn_idx;
|
||||
reloc_desc->sym_off = sym->st_value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
|
||||
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
|
||||
|
||||
@ -3882,6 +3939,18 @@ static int probe_kern_btf_datasec(void)
|
||||
strs, sizeof(strs)));
|
||||
}
|
||||
|
||||
static int probe_kern_btf_float(void)
|
||||
{
|
||||
static const char strs[] = "\0float";
|
||||
__u32 types[] = {
|
||||
/* float */
|
||||
BTF_TYPE_FLOAT_ENC(1, 4),
|
||||
};
|
||||
|
||||
return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
|
||||
strs, sizeof(strs)));
|
||||
}
|
||||
|
||||
static int probe_kern_array_mmap(void)
|
||||
{
|
||||
struct bpf_create_map_attr attr = {
|
||||
@ -4061,6 +4130,9 @@ static struct kern_feature_desc {
|
||||
[FEAT_MODULE_BTF] = {
|
||||
"module BTF support", probe_module_btf,
|
||||
},
|
||||
[FEAT_BTF_FLOAT] = {
|
||||
"BTF_KIND_FLOAT support", probe_kern_btf_float,
|
||||
},
|
||||
};
|
||||
|
||||
static bool kernel_supports(enum kern_feature_id feat_id)
|
||||
@ -5566,11 +5638,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
|
||||
insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
|
||||
}
|
||||
|
||||
static bool is_ldimm64(struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
|
||||
}
|
||||
|
||||
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
|
||||
{
|
||||
switch (BPF_SIZE(insn->code)) {
|
||||
@ -6172,6 +6239,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
|
||||
}
|
||||
relo->processed = true;
|
||||
break;
|
||||
case RELO_SUBPROG_ADDR:
|
||||
insn[0].src_reg = BPF_PSEUDO_FUNC;
|
||||
/* will be handled as a follow up pass */
|
||||
break;
|
||||
case RELO_CALL:
|
||||
/* will be handled as a follow up pass */
|
||||
break;
|
||||
@ -6358,11 +6429,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
|
||||
|
||||
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
|
||||
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
|
||||
if (!insn_is_subprog_call(insn))
|
||||
if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
|
||||
continue;
|
||||
|
||||
relo = find_prog_insn_relo(prog, insn_idx);
|
||||
if (relo && relo->type != RELO_CALL) {
|
||||
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
|
||||
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
|
||||
prog->name, insn_idx, relo->type);
|
||||
return -LIBBPF_ERRNO__RELOC;
|
||||
@ -6374,8 +6445,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
|
||||
* call always has imm = -1, but for static functions
|
||||
* relocation is against STT_SECTION and insn->imm
|
||||
* points to a start of a static function
|
||||
*
|
||||
* for subprog addr relocation, the relo->sym_off + insn->imm is
|
||||
* the byte offset in the corresponding section.
|
||||
*/
|
||||
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
|
||||
if (relo->type == RELO_CALL)
|
||||
sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
|
||||
else
|
||||
sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
|
||||
} else if (insn_is_pseudo_func(insn)) {
|
||||
/*
|
||||
* RELO_SUBPROG_ADDR relo is always emitted even if both
|
||||
* functions are in the same section, so it shouldn't reach here.
|
||||
*/
|
||||
pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
|
||||
prog->name, insn_idx);
|
||||
return -LIBBPF_ERRNO__RELOC;
|
||||
} else {
|
||||
/* if subprogram call is to a static function within
|
||||
* the same ELF section, there won't be any relocation
|
||||
|
@ -350,3 +350,8 @@ LIBBPF_0.3.0 {
|
||||
xsk_setup_xdp_prog;
|
||||
xsk_socket__update_xskmap;
|
||||
} LIBBPF_0.2.0;
|
||||
|
||||
LIBBPF_0.4.0 {
|
||||
global:
|
||||
btf__add_float;
|
||||
} LIBBPF_0.3.0;
|
||||
|
@ -31,6 +31,8 @@
|
||||
#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
|
||||
#define BTF_PARAM_ENC(name, type) (name), (type)
|
||||
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
|
||||
#define BTF_TYPE_FLOAT_ENC(name, sz) \
|
||||
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
|
||||
|
||||
#ifndef likely
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#define __LIBBPF_LIBBPF_UTIL_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -15,29 +16,56 @@ extern "C" {
|
||||
* application that uses libbpf.
|
||||
*/
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
# define libbpf_smp_rmb() asm volatile("" : : : "memory")
|
||||
# define libbpf_smp_wmb() asm volatile("" : : : "memory")
|
||||
# define libbpf_smp_mb() \
|
||||
asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
|
||||
/* Hinders stores to be observed before older loads. */
|
||||
# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
|
||||
# define libbpf_smp_store_release(p, v) \
|
||||
do { \
|
||||
asm volatile("" : : : "memory"); \
|
||||
WRITE_ONCE(*p, v); \
|
||||
} while (0)
|
||||
# define libbpf_smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = READ_ONCE(*p); \
|
||||
asm volatile("" : : : "memory"); \
|
||||
___p1; \
|
||||
})
|
||||
#elif defined(__aarch64__)
|
||||
# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
|
||||
# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
|
||||
# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
|
||||
# define libbpf_smp_rwmb() libbpf_smp_mb()
|
||||
#elif defined(__arm__)
|
||||
/* These are only valid for armv7 and above */
|
||||
# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
|
||||
# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
|
||||
# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
|
||||
# define libbpf_smp_rwmb() libbpf_smp_mb()
|
||||
#else
|
||||
/* Architecture missing native barrier functions. */
|
||||
# define libbpf_smp_rmb() __sync_synchronize()
|
||||
# define libbpf_smp_wmb() __sync_synchronize()
|
||||
# define libbpf_smp_mb() __sync_synchronize()
|
||||
# define libbpf_smp_rwmb() __sync_synchronize()
|
||||
# define libbpf_smp_store_release(p, v) \
|
||||
asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
|
||||
# define libbpf_smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1; \
|
||||
asm volatile ("ldar %w0, %1" \
|
||||
: "=r" (___p1) : "Q" (*p) : "memory"); \
|
||||
___p1; \
|
||||
})
|
||||
#elif defined(__riscv)
|
||||
# define libbpf_smp_store_release(p, v) \
|
||||
do { \
|
||||
asm volatile ("fence rw,w" : : : "memory"); \
|
||||
WRITE_ONCE(*p, v); \
|
||||
} while (0)
|
||||
# define libbpf_smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = READ_ONCE(*p); \
|
||||
asm volatile ("fence r,rw" : : : "memory"); \
|
||||
___p1; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifndef libbpf_smp_store_release
|
||||
#define libbpf_smp_store_release(p, v) \
|
||||
do { \
|
||||
__sync_synchronize(); \
|
||||
WRITE_ONCE(*p, v); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifndef libbpf_smp_load_acquire
|
||||
#define libbpf_smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = READ_ONCE(*p); \
|
||||
__sync_synchronize(); \
|
||||
___p1; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -96,7 +96,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
|
||||
* this function. Without this optimization it whould have been
|
||||
* free_entries = r->cached_prod - r->cached_cons + r->size.
|
||||
*/
|
||||
r->cached_cons = *r->consumer + r->size;
|
||||
r->cached_cons = libbpf_smp_load_acquire(r->consumer);
|
||||
r->cached_cons += r->size;
|
||||
|
||||
return r->cached_cons - r->cached_prod;
|
||||
}
|
||||
@ -106,7 +107,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
|
||||
__u32 entries = r->cached_prod - r->cached_cons;
|
||||
|
||||
if (entries == 0) {
|
||||
r->cached_prod = *r->producer;
|
||||
r->cached_prod = libbpf_smp_load_acquire(r->producer);
|
||||
entries = r->cached_prod - r->cached_cons;
|
||||
}
|
||||
|
||||
@ -129,9 +130,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
|
||||
/* Make sure everything has been written to the ring before indicating
|
||||
* this to the kernel by writing the producer pointer.
|
||||
*/
|
||||
libbpf_smp_wmb();
|
||||
|
||||
*prod->producer += nb;
|
||||
libbpf_smp_store_release(prod->producer, *prod->producer + nb);
|
||||
}
|
||||
|
||||
static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
|
||||
@ -139,11 +138,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
|
||||
__u32 entries = xsk_cons_nb_avail(cons, nb);
|
||||
|
||||
if (entries > 0) {
|
||||
/* Make sure we do not speculatively read the data before
|
||||
* we have received the packet buffers from the ring.
|
||||
*/
|
||||
libbpf_smp_rmb();
|
||||
|
||||
*idx = cons->cached_cons;
|
||||
cons->cached_cons += entries;
|
||||
}
|
||||
@ -161,9 +155,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
|
||||
/* Make sure data has been read before indicating we are done
|
||||
* with the entries by updating the consumer pointer.
|
||||
*/
|
||||
libbpf_smp_rwmb();
|
||||
libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
|
||||
|
||||
*cons->consumer += nb;
|
||||
}
|
||||
|
||||
static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
|
||||
|
@ -20,4 +20,4 @@ tools/lib/bitmap.c
|
||||
tools/lib/str_error_r.c
|
||||
tools/lib/vsprintf.c
|
||||
tools/lib/zalloc.c
|
||||
scripts/bpf_helpers_doc.py
|
||||
scripts/bpf_doc.py
|
||||
|
2
tools/testing/selftests/bpf/.gitignore
vendored
2
tools/testing/selftests/bpf/.gitignore
vendored
@ -1,4 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
bpf-helpers*
|
||||
bpf-syscall*
|
||||
test_verifier
|
||||
test_maps
|
||||
test_lru_map
|
||||
|
@ -68,6 +68,7 @@ TEST_PROGS := test_kmod.sh \
|
||||
test_bpftool_build.sh \
|
||||
test_bpftool.sh \
|
||||
test_bpftool_metadata.sh \
|
||||
test_doc_build.sh \
|
||||
test_xsk.sh
|
||||
|
||||
TEST_PROGS_EXTENDED := with_addr.sh \
|
||||
@ -103,6 +104,7 @@ override define CLEAN
|
||||
$(call msg,CLEAN)
|
||||
$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
|
||||
$(Q)$(MAKE) -C bpf_testmod clean
|
||||
$(Q)$(MAKE) docs-clean
|
||||
endef
|
||||
|
||||
include ../lib.mk
|
||||
@ -180,6 +182,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
|
||||
cp $(SCRATCH_DIR)/runqslower $@
|
||||
|
||||
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
|
||||
$(TEST_GEN_FILES): docs
|
||||
|
||||
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
|
||||
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
|
||||
@ -200,11 +203,16 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
|
||||
CC=$(HOSTCC) LD=$(HOSTLD) \
|
||||
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
|
||||
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
|
||||
$(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
|
||||
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
|
||||
-C $(BPFTOOLDIR)/Documentation \
|
||||
OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
|
||||
prefix= DESTDIR=$(SCRATCH_DIR)/ install
|
||||
|
||||
docs:
|
||||
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
|
||||
-f Makefile.docs \
|
||||
prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
|
||||
|
||||
docs-clean:
|
||||
$(Q)$(MAKE) $(submake_extras) \
|
||||
-f Makefile.docs \
|
||||
prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
|
||||
|
||||
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
|
||||
../../../include/uapi/linux/bpf.h \
|
||||
@ -382,11 +390,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
|
||||
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
|
||||
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
|
||||
|
||||
# only copy extra resources if in flavored build
|
||||
# non-flavored in-srctree builds receive special treatment, in particular, we
|
||||
# do not need to copy extra resources (see e.g. test_btf_dump_case())
|
||||
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
|
||||
ifneq ($2,)
|
||||
ifneq ($2:$(OUTPUT),:$(shell pwd))
|
||||
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
|
||||
$(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
|
||||
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
|
||||
endif
|
||||
|
||||
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
|
||||
@ -476,3 +485,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
|
||||
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
|
||||
feature \
|
||||
$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
|
||||
|
||||
.PHONY: docs docs-clean
|
||||
|
82
tools/testing/selftests/bpf/Makefile.docs
Normal file
82
tools/testing/selftests/bpf/Makefile.docs
Normal file
@ -0,0 +1,82 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
include ../../../scripts/Makefile.include
|
||||
include ../../../scripts/utilities.mak
|
||||
|
||||
INSTALL ?= install
|
||||
RM ?= rm -f
|
||||
RMDIR ?= rmdir --ignore-fail-on-non-empty
|
||||
|
||||
ifeq ($(V),1)
|
||||
Q =
|
||||
else
|
||||
Q = @
|
||||
endif
|
||||
|
||||
prefix ?= /usr/local
|
||||
mandir ?= $(prefix)/man
|
||||
man2dir = $(mandir)/man2
|
||||
man7dir = $(mandir)/man7
|
||||
|
||||
SYSCALL_RST = bpf-syscall.rst
|
||||
MAN2_RST = $(SYSCALL_RST)
|
||||
|
||||
HELPERS_RST = bpf-helpers.rst
|
||||
MAN7_RST = $(HELPERS_RST)
|
||||
|
||||
_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
|
||||
DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
|
||||
|
||||
_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
|
||||
DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
|
||||
|
||||
DOCTARGETS := helpers syscall
|
||||
|
||||
docs: $(DOCTARGETS)
|
||||
syscall: man2
|
||||
helpers: man7
|
||||
man2: $(DOC_MAN2)
|
||||
man7: $(DOC_MAN7)
|
||||
|
||||
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
|
||||
|
||||
# Configure make rules for the man page bpf-$1.$2.
|
||||
# $1 - target for scripts/bpf_doc.py
|
||||
# $2 - man page section to generate the troff file
|
||||
define DOCS_RULES =
|
||||
$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
|
||||
$$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
|
||||
--filename $$< > $$@
|
||||
|
||||
$(OUTPUT)%.$2: $(OUTPUT)%.rst
|
||||
ifndef RST2MAN_DEP
|
||||
$$(error "rst2man not found, but required to generate man pages")
|
||||
endif
|
||||
$$(QUIET_GEN)rst2man $$< > $$@
|
||||
|
||||
docs-clean-$1:
|
||||
$$(call QUIET_CLEAN, eBPF_$1-manpage)
|
||||
$(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
|
||||
|
||||
docs-install-$1: docs
|
||||
$$(call QUIET_INSTALL, eBPF_$1-manpage)
|
||||
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
|
||||
$(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
|
||||
|
||||
docs-uninstall-$1:
|
||||
$$(call QUIET_UNINST, eBPF_$1-manpage)
|
||||
$(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
|
||||
$(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
|
||||
|
||||
.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
|
||||
endef
|
||||
|
||||
# Create the make targets to generate manual pages by name and section
|
||||
$(eval $(call DOCS_RULES,helpers,7))
|
||||
$(eval $(call DOCS_RULES,syscall,2))
|
||||
|
||||
docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
|
||||
docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
|
||||
docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
|
||||
|
||||
.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
|
@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
|
||||
|
||||
__ https://reviews.llvm.org/D78466
|
||||
|
||||
bpf_verif_scale/loop6.o test failure with Clang 12
|
||||
==================================================
|
||||
|
||||
With Clang 12, the following bpf_verif_scale test failed:
|
||||
* ``bpf_verif_scale/loop6.o``
|
||||
|
||||
The verifier output looks like
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
R1 type=ctx expected=fp
|
||||
The sequence of 8193 jumps is too complex.
|
||||
|
||||
The reason is compiler generating the following code
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
|
||||
14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
|
||||
15: bc 51 00 00 00 00 00 00 w1 = w5
|
||||
16: 04 01 00 00 ff ff ff ff w1 += -1
|
||||
17: 67 05 00 00 20 00 00 00 r5 <<= 32
|
||||
18: 77 05 00 00 20 00 00 00 r5 >>= 32
|
||||
19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
|
||||
20: b7 05 00 00 06 00 00 00 r5 = 6
|
||||
00000000000000a8 <LBB0_4>:
|
||||
21: b7 02 00 00 00 00 00 00 r2 = 0
|
||||
22: b7 01 00 00 00 00 00 00 r1 = 0
|
||||
; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
|
||||
23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
|
||||
24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
|
||||
|
||||
Note that insn #15 has w1 = w5 and w1 is refined later but
|
||||
r5(w5) is eventually saved on stack at insn #24 for later use.
|
||||
This cause later verifier failure. The bug has been `fixed`__ in
|
||||
Clang 13.
|
||||
|
||||
__ https://reviews.llvm.org/D97479
|
||||
|
||||
BPF CO-RE-based tests and Clang version
|
||||
=======================================
|
||||
|
||||
@ -131,3 +170,12 @@ failures:
|
||||
.. _2: https://reviews.llvm.org/D85174
|
||||
.. _3: https://reviews.llvm.org/D83878
|
||||
.. _4: https://reviews.llvm.org/D83242
|
||||
|
||||
Floating-point tests and Clang version
|
||||
======================================
|
||||
|
||||
Certain selftests, e.g. core_reloc, require support for the floating-point
|
||||
types, which was introduced in `Clang 13`__. The older Clang versions will
|
||||
either crash when compiling these tests, or generate an incorrect BTF.
|
||||
|
||||
__ https://reviews.llvm.org/D83289
|
||||
|
@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = {
|
||||
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
|
||||
[BTF_KIND_VAR] = "VAR",
|
||||
[BTF_KIND_DATASEC] = "DATASEC",
|
||||
[BTF_KIND_FLOAT] = "FLOAT",
|
||||
};
|
||||
|
||||
static const char *btf_kind_str(__u16 kind)
|
||||
@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BTF_KIND_FLOAT:
|
||||
fprintf(out, " size=%u", t->size);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -2,6 +2,44 @@
|
||||
#include <test_progs.h>
|
||||
#include "test_attach_probe.skel.h"
|
||||
|
||||
#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
|
||||
|
||||
#define OP_RT_RA_MASK 0xffff0000UL
|
||||
#define LIS_R2 0x3c400000UL
|
||||
#define ADDIS_R2_R12 0x3c4c0000UL
|
||||
#define ADDI_R2_R2 0x38420000UL
|
||||
|
||||
static ssize_t get_offset(ssize_t addr, ssize_t base)
|
||||
{
|
||||
u32 *insn = (u32 *) addr;
|
||||
|
||||
/*
|
||||
* A PPC64 ABIv2 function may have a local and a global entry
|
||||
* point. We need to use the local entry point when patching
|
||||
* functions, so identify and step over the global entry point
|
||||
* sequence.
|
||||
*
|
||||
* The global entry point sequence is always of the form:
|
||||
*
|
||||
* addis r2,r12,XXXX
|
||||
* addi r2,r2,XXXX
|
||||
*
|
||||
* A linker optimisation may convert the addis to lis:
|
||||
*
|
||||
* lis r2,XXXX
|
||||
* addi r2,r2,XXXX
|
||||
*/
|
||||
if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
|
||||
((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
|
||||
((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
|
||||
return (ssize_t)(insn + 2) - base;
|
||||
else
|
||||
return addr - base;
|
||||
}
|
||||
#else
|
||||
#define get_offset(addr, base) (addr - base)
|
||||
#endif
|
||||
|
||||
ssize_t get_base_addr() {
|
||||
size_t start, offset;
|
||||
char buf[256];
|
||||
@ -36,7 +74,7 @@ void test_attach_probe(void)
|
||||
if (CHECK(base_addr < 0, "get_base_addr",
|
||||
"failed to find base addr: %zd", base_addr))
|
||||
return;
|
||||
uprobe_offset = (size_t)&get_base_addr - base_addr;
|
||||
uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
|
||||
|
||||
skel = test_attach_probe__open_and_load();
|
||||
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
|
||||
|
@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
|
||||
{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
|
||||
{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
|
||||
{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
|
||||
{ "loop6.o", BPF_PROG_TYPE_KPROBE },
|
||||
|
||||
/* partial unroll. 19k insn in a loop.
|
||||
* Total program size 20.8k insn.
|
||||
|
@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = {
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
BTF_TYPE_ENC(0, 0x10000000, 4),
|
||||
BTF_TYPE_ENC(0, 0x20000000, 4),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = {
|
||||
.max_entries = 1,
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "float test #1, well-formed",
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* [1] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */
|
||||
BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 2, 0),
|
||||
BTF_MEMBER_ENC(NAME_TBD, 3, 32),
|
||||
BTF_MEMBER_ENC(NAME_TBD, 4, 64),
|
||||
BTF_MEMBER_ENC(NAME_TBD, 5, 128),
|
||||
BTF_MEMBER_ENC(NAME_TBD, 6, 256),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
|
||||
"\0floats\0a\0b\0c\0d\0e"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "float_type_check_btf",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 48,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 7,
|
||||
.max_entries = 1,
|
||||
},
|
||||
{
|
||||
.descr = "float test #2, invalid vlen",
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* [1] */
|
||||
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
|
||||
/* [2] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0float"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "float_type_check_btf",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 4,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 1,
|
||||
.btf_load_err = true,
|
||||
.err_str = "vlen != 0",
|
||||
},
|
||||
{
|
||||
.descr = "float test #3, invalid kind_flag",
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* [1] */
|
||||
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
|
||||
/* [2] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0float"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "float_type_check_btf",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 4,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 1,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid btf_info kind_flag",
|
||||
},
|
||||
{
|
||||
.descr = "float test #4, member does not fit",
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* [1] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
|
||||
BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 2, 0),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0float\0floats\0x"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "float_type_check_btf",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 4,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 1,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Member exceeds struct_size",
|
||||
},
|
||||
{
|
||||
.descr = "float test #5, member is not properly aligned",
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* [1] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
|
||||
BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 2, 8),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0float\0floats\0x"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "float_type_check_btf",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 4,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 1,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Member is not properly aligned",
|
||||
},
|
||||
{
|
||||
.descr = "float test #6, invalid size",
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* [1] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0float"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "float_type_check_btf",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 6,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 1,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid type_size",
|
||||
},
|
||||
|
||||
}; /* struct btf_raw_test raw_tests[] */
|
||||
|
||||
static const char *get_next_str(const char *start, const char *end)
|
||||
@ -6281,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = {
|
||||
/* int[16] */
|
||||
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
|
||||
/* struct s { */
|
||||
BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */
|
||||
BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */
|
||||
BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */
|
||||
/* ptr -> [3] struct s */
|
||||
BTF_PTR_ENC(3), /* [4] */
|
||||
/* ptr -> [6] const int */
|
||||
@ -6296,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = {
|
||||
/* full copy of the above */
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */
|
||||
BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */
|
||||
BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */
|
||||
BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */
|
||||
BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
|
||||
BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
|
||||
BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
|
||||
BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
|
||||
BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
|
||||
BTF_PTR_ENC(9), /* [10] */
|
||||
BTF_PTR_ENC(12), /* [11] */
|
||||
BTF_CONST_ENC(7), /* [12] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
|
||||
BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
|
||||
},
|
||||
.expect = {
|
||||
.raw_types = {
|
||||
/* int */
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
|
||||
/* int[16] */
|
||||
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
|
||||
/* struct s { */
|
||||
BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */
|
||||
BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */
|
||||
BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */
|
||||
BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */
|
||||
BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */
|
||||
/* ptr -> [3] struct s */
|
||||
BTF_PTR_ENC(3), /* [4] */
|
||||
/* ptr -> [6] const int */
|
||||
BTF_PTR_ENC(6), /* [5] */
|
||||
/* const -> [1] int */
|
||||
BTF_CONST_ENC(1), /* [6] */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
|
||||
BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
|
||||
},
|
||||
.opts = {
|
||||
.dont_resolve_fwds = false,
|
||||
@ -6449,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = {
|
||||
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
|
||||
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
|
||||
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
|
||||
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
|
||||
},
|
||||
.expect = {
|
||||
.raw_types = {
|
||||
@ -6474,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = {
|
||||
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
|
||||
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
|
||||
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
|
||||
BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
|
||||
},
|
||||
.opts = {
|
||||
.dont_resolve_fwds = false,
|
||||
},
|
||||
},
|
||||
{
|
||||
.descr = "dedup: no int duplicates",
|
||||
.descr = "dedup: no int/float duplicates",
|
||||
.input = {
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
|
||||
@ -6498,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = {
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
|
||||
/* different byte size */
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* all allowed sizes */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0some other int"),
|
||||
BTF_STR_SEC("\0int\0some other int\0float"),
|
||||
},
|
||||
.expect = {
|
||||
.raw_types = {
|
||||
@ -6516,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = {
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
|
||||
/* different byte size */
|
||||
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* all allowed sizes */
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
|
||||
BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0int\0some other int"),
|
||||
BTF_STR_SEC("\0int\0some other int\0float"),
|
||||
},
|
||||
.opts = {
|
||||
.dont_resolve_fwds = false,
|
||||
@ -6630,6 +6779,7 @@ static int btf_type_size(const struct btf_type *t)
|
||||
case BTF_KIND_PTR:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FUNC:
|
||||
case BTF_KIND_FLOAT:
|
||||
return base_size;
|
||||
case BTF_KIND_INT:
|
||||
return base_size + sizeof(__u32);
|
||||
|
@ -266,6 +266,7 @@ static int duration = 0;
|
||||
.arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
|
||||
.ptr_sz = 8, /* always 8-byte pointer for BPF */ \
|
||||
.enum_sz = sizeof(((type *)0)->enum_field), \
|
||||
.float_sz = sizeof(((type *)0)->float_field), \
|
||||
}
|
||||
|
||||
#define SIZE_CASE(name) { \
|
||||
|
130
tools/testing/selftests/bpf/prog_tests/for_each.c
Normal file
130
tools/testing/selftests/bpf/prog_tests/for_each.c
Normal file
@ -0,0 +1,130 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
#include "for_each_hash_map_elem.skel.h"
|
||||
#include "for_each_array_map_elem.skel.h"
|
||||
|
||||
static unsigned int duration;
|
||||
|
||||
static void test_hash_map(void)
|
||||
{
|
||||
int i, err, hashmap_fd, max_entries, percpu_map_fd;
|
||||
struct for_each_hash_map_elem *skel;
|
||||
__u64 *percpu_valbuf = NULL;
|
||||
__u32 key, num_cpus, retval;
|
||||
__u64 val;
|
||||
|
||||
skel = for_each_hash_map_elem__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
|
||||
return;
|
||||
|
||||
hashmap_fd = bpf_map__fd(skel->maps.hashmap);
|
||||
max_entries = bpf_map__max_entries(skel->maps.hashmap);
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
key = i;
|
||||
val = i + 1;
|
||||
err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "map_update"))
|
||||
goto out;
|
||||
}
|
||||
|
||||
num_cpus = bpf_num_possible_cpus();
|
||||
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
|
||||
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
|
||||
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
|
||||
goto out;
|
||||
|
||||
key = 1;
|
||||
for (i = 0; i < num_cpus; i++)
|
||||
percpu_valbuf[i] = i + 1;
|
||||
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "percpu_map_update"))
|
||||
goto out;
|
||||
|
||||
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
|
||||
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
|
||||
&retval, &duration);
|
||||
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
|
||||
err, errno, retval))
|
||||
goto out;
|
||||
|
||||
ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
|
||||
ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
|
||||
|
||||
key = 1;
|
||||
err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
|
||||
ASSERT_ERR(err, "hashmap_lookup");
|
||||
|
||||
ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
|
||||
ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
|
||||
ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
|
||||
ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
|
||||
ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
|
||||
ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
|
||||
out:
|
||||
free(percpu_valbuf);
|
||||
for_each_hash_map_elem__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_array_map(void)
|
||||
{
|
||||
__u32 key, num_cpus, max_entries, retval;
|
||||
int i, arraymap_fd, percpu_map_fd, err;
|
||||
struct for_each_array_map_elem *skel;
|
||||
__u64 *percpu_valbuf = NULL;
|
||||
__u64 val, expected_total;
|
||||
|
||||
skel = for_each_array_map_elem__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
|
||||
return;
|
||||
|
||||
arraymap_fd = bpf_map__fd(skel->maps.arraymap);
|
||||
expected_total = 0;
|
||||
max_entries = bpf_map__max_entries(skel->maps.arraymap);
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
key = i;
|
||||
val = i + 1;
|
||||
/* skip the last iteration for expected total */
|
||||
if (i != max_entries - 1)
|
||||
expected_total += val;
|
||||
err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "map_update"))
|
||||
goto out;
|
||||
}
|
||||
|
||||
num_cpus = bpf_num_possible_cpus();
|
||||
percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
|
||||
percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
|
||||
if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
|
||||
goto out;
|
||||
|
||||
key = 0;
|
||||
for (i = 0; i < num_cpus; i++)
|
||||
percpu_valbuf[i] = i + 1;
|
||||
err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "percpu_map_update"))
|
||||
goto out;
|
||||
|
||||
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
|
||||
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
|
||||
&retval, &duration);
|
||||
if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
|
||||
err, errno, retval))
|
||||
goto out;
|
||||
|
||||
ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
|
||||
ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
|
||||
|
||||
out:
|
||||
free(percpu_valbuf);
|
||||
for_each_array_map_elem__destroy(skel);
|
||||
}
|
||||
|
||||
void test_for_each(void)
|
||||
{
|
||||
if (test__start_subtest("hash_map"))
|
||||
test_hash_map();
|
||||
if (test__start_subtest("array_map"))
|
||||
test_array_map();
|
||||
}
|
@ -2,12 +2,31 @@
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
|
||||
#include "test_pkt_access.skel.h"
|
||||
|
||||
static const __u32 duration;
|
||||
|
||||
static void check_run_cnt(int prog_fd, __u64 run_cnt)
|
||||
{
|
||||
struct bpf_prog_info info = {};
|
||||
__u32 info_len = sizeof(info);
|
||||
int err;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
|
||||
if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
|
||||
return;
|
||||
|
||||
CHECK(run_cnt != info.run_cnt, "run_cnt",
|
||||
"incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
|
||||
}
|
||||
|
||||
void test_prog_run_xattr(void)
|
||||
{
|
||||
const char *file = "./test_pkt_access.o";
|
||||
struct bpf_object *obj;
|
||||
char buf[10];
|
||||
int err;
|
||||
struct test_pkt_access *skel;
|
||||
int err, stats_fd = -1;
|
||||
char buf[10] = {};
|
||||
__u64 run_cnt = 0;
|
||||
|
||||
struct bpf_prog_test_run_attr tattr = {
|
||||
.repeat = 1,
|
||||
.data_in = &pkt_v4,
|
||||
@ -16,12 +35,15 @@ void test_prog_run_xattr(void)
|
||||
.data_size_out = 5,
|
||||
};
|
||||
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
|
||||
&tattr.prog_fd);
|
||||
if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
|
||||
stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
|
||||
if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
|
||||
return;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
skel = test_pkt_access__open_and_load();
|
||||
if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
|
||||
goto cleanup;
|
||||
|
||||
tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
|
||||
|
||||
err = bpf_prog_test_run_xattr(&tattr);
|
||||
CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
|
||||
@ -34,8 +56,12 @@ void test_prog_run_xattr(void)
|
||||
CHECK_ATTR(buf[5] != 0, "overflow",
|
||||
"BPF_PROG_TEST_RUN ignored size hint\n");
|
||||
|
||||
run_cnt += tattr.repeat;
|
||||
check_run_cnt(tattr.prog_fd, run_cnt);
|
||||
|
||||
tattr.data_out = NULL;
|
||||
tattr.data_size_out = 0;
|
||||
tattr.repeat = 2;
|
||||
errno = 0;
|
||||
|
||||
err = bpf_prog_test_run_xattr(&tattr);
|
||||
@ -46,5 +72,12 @@ void test_prog_run_xattr(void)
|
||||
err = bpf_prog_test_run_xattr(&tattr);
|
||||
CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
|
||||
|
||||
bpf_object__close(obj);
|
||||
run_cnt += tattr.repeat;
|
||||
check_run_cnt(tattr.prog_fd, run_cnt);
|
||||
|
||||
cleanup:
|
||||
if (skel)
|
||||
test_pkt_access__destroy(skel);
|
||||
if (stats_fd != -1)
|
||||
close(stats_fd);
|
||||
}
|
||||
|
@ -241,6 +241,48 @@ fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static __u64 socket_cookie(int fd)
|
||||
{
|
||||
__u64 cookie;
|
||||
socklen_t cookie_len = sizeof(cookie);
|
||||
|
||||
if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
|
||||
"getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
|
||||
return 0;
|
||||
return cookie;
|
||||
}
|
||||
|
||||
static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
|
||||
const char *remote_ip, __u16 remote_port)
|
||||
{
|
||||
void *local, *remote;
|
||||
int err;
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
ctx->local_port = local_port;
|
||||
ctx->remote_port = htons(remote_port);
|
||||
|
||||
if (is_ipv6(local_ip)) {
|
||||
ctx->family = AF_INET6;
|
||||
local = &ctx->local_ip6[0];
|
||||
remote = &ctx->remote_ip6[0];
|
||||
} else {
|
||||
ctx->family = AF_INET;
|
||||
local = &ctx->local_ip4;
|
||||
remote = &ctx->remote_ip4;
|
||||
}
|
||||
|
||||
err = inet_pton(ctx->family, local_ip, local);
|
||||
if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
|
||||
return 1;
|
||||
|
||||
err = inet_pton(ctx->family, remote_ip, remote);
|
||||
if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int send_byte(int fd)
|
||||
{
|
||||
ssize_t n;
|
||||
@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
|
||||
|
||||
static void run_sk_assign(struct test_sk_lookup *skel,
|
||||
struct bpf_program *lookup_prog,
|
||||
const char *listen_ip, const char *connect_ip)
|
||||
const char *remote_ip, const char *local_ip)
|
||||
{
|
||||
int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
|
||||
struct bpf_link *lookup_link;
|
||||
int server_fds[MAX_SERVERS] = { -1 };
|
||||
struct bpf_sk_lookup ctx;
|
||||
__u64 server_cookie;
|
||||
int i, err;
|
||||
|
||||
lookup_link = attach_lookup_prog(lookup_prog);
|
||||
if (!lookup_link)
|
||||
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
|
||||
.ctx_in = &ctx,
|
||||
.ctx_size_in = sizeof(ctx),
|
||||
.ctx_out = &ctx,
|
||||
.ctx_size_out = sizeof(ctx),
|
||||
);
|
||||
|
||||
if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
|
||||
return;
|
||||
|
||||
ctx.protocol = IPPROTO_TCP;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
|
||||
server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
|
||||
server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
|
||||
if (server_fds[i] < 0)
|
||||
goto close_servers;
|
||||
|
||||
@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
|
||||
goto close_servers;
|
||||
}
|
||||
|
||||
client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
|
||||
if (client_fd < 0)
|
||||
server_cookie = socket_cookie(server_fds[SERVER_B]);
|
||||
if (!server_cookie)
|
||||
return;
|
||||
|
||||
err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
|
||||
if (CHECK(err, "test_run", "failed with error %d\n", errno))
|
||||
goto close_servers;
|
||||
|
||||
peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
|
||||
if (CHECK(peer_fd < 0, "accept", "failed\n"))
|
||||
goto close_client;
|
||||
if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
|
||||
goto close_servers;
|
||||
|
||||
CHECK(ctx.cookie != server_cookie, "ctx.cookie",
|
||||
"selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
|
||||
|
||||
close(peer_fd);
|
||||
close_client:
|
||||
close(client_fd);
|
||||
close_servers:
|
||||
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
|
||||
if (server_fds[i] != -1)
|
||||
close(server_fds[i]);
|
||||
}
|
||||
bpf_link__destroy(lookup_link);
|
||||
}
|
||||
|
||||
static void run_sk_assign_v4(struct test_sk_lookup *skel,
|
||||
|
@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
|
||||
struct bpf_map *inner_map, int family,
|
||||
int sotype)
|
||||
{
|
||||
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
|
||||
int parser = bpf_program__fd(skel->progs.prog_skb_parser);
|
||||
int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
|
||||
int parser = bpf_program__fd(skel->progs.prog_stream_parser);
|
||||
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
|
||||
int sock_map = bpf_map__fd(inner_map);
|
||||
int err;
|
||||
@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
|
||||
struct bpf_map *inner_map, int family,
|
||||
int sotype)
|
||||
{
|
||||
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
|
||||
int parser = bpf_program__fd(skel->progs.prog_skb_parser);
|
||||
int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
|
||||
int parser = bpf_program__fd(skel->progs.prog_stream_parser);
|
||||
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
|
||||
int sock_map = bpf_map__fd(inner_map);
|
||||
int err;
|
||||
|
92
tools/testing/selftests/bpf/prog_tests/task_local_storage.c
Normal file
92
tools/testing/selftests/bpf/prog_tests/task_local_storage.c
Normal file
@ -0,0 +1,92 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#define _GNU_SOURCE /* See feature_test_macros(7) */
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
#include <sys/types.h>
|
||||
#include <test_progs.h>
|
||||
#include "task_local_storage.skel.h"
|
||||
#include "task_local_storage_exit_creds.skel.h"
|
||||
#include "task_ls_recursion.skel.h"
|
||||
|
||||
static void test_sys_enter_exit(void)
|
||||
{
|
||||
struct task_local_storage *skel;
|
||||
int err;
|
||||
|
||||
skel = task_local_storage__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
|
||||
return;
|
||||
|
||||
skel->bss->target_pid = syscall(SYS_gettid);
|
||||
|
||||
err = task_local_storage__attach(skel);
|
||||
if (!ASSERT_OK(err, "skel_attach"))
|
||||
goto out;
|
||||
|
||||
syscall(SYS_gettid);
|
||||
syscall(SYS_gettid);
|
||||
|
||||
/* 3x syscalls: 1x attach and 2x gettid */
|
||||
ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
|
||||
ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
|
||||
ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
|
||||
out:
|
||||
task_local_storage__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_exit_creds(void)
|
||||
{
|
||||
struct task_local_storage_exit_creds *skel;
|
||||
int err;
|
||||
|
||||
skel = task_local_storage_exit_creds__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
|
||||
return;
|
||||
|
||||
err = task_local_storage_exit_creds__attach(skel);
|
||||
if (!ASSERT_OK(err, "skel_attach"))
|
||||
goto out;
|
||||
|
||||
/* trigger at least one exit_creds() */
|
||||
if (CHECK_FAIL(system("ls > /dev/null")))
|
||||
goto out;
|
||||
|
||||
/* sync rcu to make sure exit_creds() is called for "ls" */
|
||||
kern_sync_rcu();
|
||||
ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
|
||||
ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
|
||||
out:
|
||||
task_local_storage_exit_creds__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_recursion(void)
|
||||
{
|
||||
struct task_ls_recursion *skel;
|
||||
int err;
|
||||
|
||||
skel = task_ls_recursion__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
|
||||
return;
|
||||
|
||||
err = task_ls_recursion__attach(skel);
|
||||
if (!ASSERT_OK(err, "skel_attach"))
|
||||
goto out;
|
||||
|
||||
/* trigger sys_enter, make sure it does not cause deadlock */
|
||||
syscall(SYS_gettid);
|
||||
|
||||
out:
|
||||
task_ls_recursion__destroy(skel);
|
||||
}
|
||||
|
||||
void test_task_local_storage(void)
|
||||
{
|
||||
if (test__start_subtest("sys_enter_exit"))
|
||||
test_sys_enter_exit();
|
||||
if (test__start_subtest("exit_creds"))
|
||||
test_exit_creds();
|
||||
if (test__start_subtest("recursion"))
|
||||
test_recursion();
|
||||
}
|
@ -205,6 +205,12 @@ struct struct_with_embedded_stuff {
|
||||
int t[11];
|
||||
};
|
||||
|
||||
struct float_struct {
|
||||
float f;
|
||||
const double *d;
|
||||
volatile long double *ld;
|
||||
};
|
||||
|
||||
struct root_struct {
|
||||
enum e1 _1;
|
||||
enum e2 _2;
|
||||
@ -219,6 +225,7 @@ struct root_struct {
|
||||
union_fwd_t *_12;
|
||||
union_fwd_ptr_t _13;
|
||||
struct struct_with_embedded_stuff _14;
|
||||
struct float_struct _15;
|
||||
};
|
||||
|
||||
/* ------ END-EXPECTED-OUTPUT ------ */
|
||||
|
@ -807,6 +807,7 @@ struct core_reloc_size_output {
|
||||
int arr_elem_sz;
|
||||
int ptr_sz;
|
||||
int enum_sz;
|
||||
int float_sz;
|
||||
};
|
||||
|
||||
struct core_reloc_size {
|
||||
@ -816,6 +817,7 @@ struct core_reloc_size {
|
||||
int arr_field[4];
|
||||
void *ptr_field;
|
||||
enum { VALUE = 123 } enum_field;
|
||||
float float_field;
|
||||
};
|
||||
|
||||
struct core_reloc_size___diff_sz {
|
||||
@ -825,6 +827,7 @@ struct core_reloc_size___diff_sz {
|
||||
char arr_field[10];
|
||||
void *ptr_field;
|
||||
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
|
||||
double float_field;
|
||||
};
|
||||
|
||||
/* Error case of two candidates with the fields (int_field) at the same
|
||||
@ -839,6 +842,7 @@ struct core_reloc_size___err_ambiguous1 {
|
||||
int arr_field[4];
|
||||
void *ptr_field;
|
||||
enum { VALUE___1 = 123 } enum_field;
|
||||
float float_field;
|
||||
};
|
||||
|
||||
struct core_reloc_size___err_ambiguous2 {
|
||||
@ -850,6 +854,7 @@ struct core_reloc_size___err_ambiguous2 {
|
||||
int arr_field[4];
|
||||
void *ptr_field;
|
||||
enum { VALUE___2 = 123 } enum_field;
|
||||
float float_field;
|
||||
};
|
||||
|
||||
/*
|
||||
|
61
tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
Normal file
61
tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
Normal file
@ -0,0 +1,61 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 3);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} arraymap SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} percpu_map SEC(".maps");
|
||||
|
||||
struct callback_ctx {
|
||||
int output;
|
||||
};
|
||||
|
||||
static __u64
|
||||
check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
|
||||
struct callback_ctx *data)
|
||||
{
|
||||
data->output += *val;
|
||||
if (*key == 1)
|
||||
return 1; /* stop the iteration */
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u32 cpu = 0;
|
||||
__u64 percpu_val = 0;
|
||||
|
||||
static __u64
|
||||
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
|
||||
struct callback_ctx *data)
|
||||
{
|
||||
cpu = bpf_get_smp_processor_id();
|
||||
percpu_val = *val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 arraymap_output = 0;
|
||||
|
||||
SEC("classifier")
|
||||
int test_pkt_access(struct __sk_buff *skb)
|
||||
{
|
||||
struct callback_ctx data;
|
||||
|
||||
data.output = 0;
|
||||
bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
|
||||
arraymap_output = data.output;
|
||||
|
||||
bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
|
||||
return 0;
|
||||
}
|
95
tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
Normal file
95
tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
Normal file
@ -0,0 +1,95 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 3);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} hashmap SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} percpu_map SEC(".maps");
|
||||
|
||||
struct callback_ctx {
|
||||
struct __sk_buff *ctx;
|
||||
int input;
|
||||
int output;
|
||||
};
|
||||
|
||||
static __u64
|
||||
check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
|
||||
struct callback_ctx *data)
|
||||
{
|
||||
struct __sk_buff *skb = data->ctx;
|
||||
__u32 k;
|
||||
__u64 v;
|
||||
|
||||
if (skb) {
|
||||
k = *key;
|
||||
v = *val;
|
||||
if (skb->len == 10000 && k == 10 && v == 10)
|
||||
data->output = 3; /* impossible path */
|
||||
else
|
||||
data->output = 4;
|
||||
} else {
|
||||
data->output = data->input;
|
||||
bpf_map_delete_elem(map, key);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u32 cpu = 0;
|
||||
__u32 percpu_called = 0;
|
||||
__u32 percpu_key = 0;
|
||||
__u64 percpu_val = 0;
|
||||
int percpu_output = 0;
|
||||
|
||||
static __u64
|
||||
check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
|
||||
struct callback_ctx *unused)
|
||||
{
|
||||
struct callback_ctx data;
|
||||
|
||||
percpu_called++;
|
||||
cpu = bpf_get_smp_processor_id();
|
||||
percpu_key = *key;
|
||||
percpu_val = *val;
|
||||
|
||||
data.ctx = 0;
|
||||
data.input = 100;
|
||||
data.output = 0;
|
||||
bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
|
||||
percpu_output = data.output;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hashmap_output = 0;
|
||||
int hashmap_elems = 0;
|
||||
int percpu_map_elems = 0;
|
||||
|
||||
SEC("classifier")
|
||||
int test_pkt_access(struct __sk_buff *skb)
|
||||
{
|
||||
struct callback_ctx data;
|
||||
|
||||
data.ctx = skb;
|
||||
data.input = 10;
|
||||
data.output = 0;
|
||||
hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
|
||||
hashmap_output = data.output;
|
||||
|
||||
percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
|
||||
(void *)0, 0);
|
||||
return 0;
|
||||
}
|
99
tools/testing/selftests/bpf/progs/loop6.c
Normal file
99
tools/testing/selftests/bpf/progs/loop6.c
Normal file
@ -0,0 +1,99 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/ptrace.h>
|
||||
#include <stddef.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
/* typically virtio scsi has max SGs of 6 */
|
||||
#define VIRTIO_MAX_SGS 6
|
||||
|
||||
/* Verifier will fail with SG_MAX = 128. The failure can be
|
||||
* workarounded with a smaller SG_MAX, e.g. 10.
|
||||
*/
|
||||
#define WORKAROUND
|
||||
#ifdef WORKAROUND
|
||||
#define SG_MAX 10
|
||||
#else
|
||||
/* typically virtio blk has max SEG of 128 */
|
||||
#define SG_MAX 128
|
||||
#endif
|
||||
|
||||
#define SG_CHAIN 0x01UL
|
||||
#define SG_END 0x02UL
|
||||
|
||||
struct scatterlist {
|
||||
unsigned long page_link;
|
||||
unsigned int offset;
|
||||
unsigned int length;
|
||||
};
|
||||
|
||||
#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
|
||||
#define sg_is_last(sg) ((sg)->page_link & SG_END)
|
||||
#define sg_chain_ptr(sg) \
|
||||
((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
|
||||
|
||||
static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
|
||||
{
|
||||
struct scatterlist sg;
|
||||
|
||||
bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
|
||||
if (sg_is_last(&sg))
|
||||
return NULL;
|
||||
|
||||
sgp++;
|
||||
|
||||
bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
|
||||
if (sg_is_chain(&sg))
|
||||
sgp = sg_chain_ptr(&sg);
|
||||
|
||||
return sgp;
|
||||
}
|
||||
|
||||
static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
|
||||
{
|
||||
struct scatterlist *sgp;
|
||||
|
||||
bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
|
||||
return sgp;
|
||||
}
|
||||
|
||||
int config = 0;
|
||||
int result = 0;
|
||||
|
||||
SEC("kprobe/virtqueue_add_sgs")
|
||||
int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
|
||||
unsigned int out_sgs, unsigned int in_sgs)
|
||||
{
|
||||
struct scatterlist *sgp = NULL;
|
||||
__u64 length1 = 0, length2 = 0;
|
||||
unsigned int i, n, len;
|
||||
|
||||
if (config != 0)
|
||||
return 0;
|
||||
|
||||
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
|
||||
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
|
||||
sgp = __sg_next(sgp)) {
|
||||
bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
|
||||
length1 += len;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
|
||||
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
|
||||
sgp = __sg_next(sgp)) {
|
||||
bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
|
||||
length2 += len;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
config = 1;
|
||||
result = length2 - length1;
|
||||
return 0;
|
||||
}
|
64
tools/testing/selftests/bpf/progs/task_local_storage.c
Normal file
64
tools/testing/selftests/bpf/progs/task_local_storage.c
Normal file
@ -0,0 +1,64 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, long);
|
||||
} enter_id SEC(".maps");
|
||||
|
||||
#define MAGIC_VALUE 0xabcd1234
|
||||
|
||||
pid_t target_pid = 0;
|
||||
int mismatch_cnt = 0;
|
||||
int enter_cnt = 0;
|
||||
int exit_cnt = 0;
|
||||
|
||||
SEC("tp_btf/sys_enter")
|
||||
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
|
||||
{
|
||||
struct task_struct *task;
|
||||
long *ptr;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
if (task->pid != target_pid)
|
||||
return 0;
|
||||
|
||||
ptr = bpf_task_storage_get(&enter_id, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!ptr)
|
||||
return 0;
|
||||
|
||||
__sync_fetch_and_add(&enter_cnt, 1);
|
||||
*ptr = MAGIC_VALUE + enter_cnt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/sys_exit")
|
||||
int BPF_PROG(on_exit, struct pt_regs *regs, long id)
|
||||
{
|
||||
struct task_struct *task;
|
||||
long *ptr;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
if (task->pid != target_pid)
|
||||
return 0;
|
||||
|
||||
ptr = bpf_task_storage_get(&enter_id, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!ptr)
|
||||
return 0;
|
||||
|
||||
__sync_fetch_and_add(&exit_cnt, 1);
|
||||
if (*ptr != MAGIC_VALUE + exit_cnt)
|
||||
__sync_fetch_and_add(&mismatch_cnt, 1);
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, __u64);
|
||||
} task_storage SEC(".maps");
|
||||
|
||||
int valid_ptr_count = 0;
|
||||
int null_ptr_count = 0;
|
||||
|
||||
SEC("fentry/exit_creds")
|
||||
int BPF_PROG(trace_exit_creds, struct task_struct *task)
|
||||
{
|
||||
__u64 *ptr;
|
||||
|
||||
ptr = bpf_task_storage_get(&task_storage, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
__sync_fetch_and_add(&valid_ptr_count, 1);
|
||||
else
|
||||
__sync_fetch_and_add(&null_ptr_count, 1);
|
||||
return 0;
|
||||
}
|
70
tools/testing/selftests/bpf/progs/task_ls_recursion.c
Normal file
70
tools/testing/selftests/bpf/progs/task_ls_recursion.c
Normal file
@ -0,0 +1,70 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, long);
|
||||
} map_a SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, long);
|
||||
} map_b SEC(".maps");
|
||||
|
||||
SEC("fentry/bpf_local_storage_lookup")
|
||||
int BPF_PROG(on_lookup)
|
||||
{
|
||||
struct task_struct *task = bpf_get_current_task_btf();
|
||||
|
||||
bpf_task_storage_delete(&map_a, task);
|
||||
bpf_task_storage_delete(&map_b, task);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/bpf_local_storage_update")
|
||||
int BPF_PROG(on_update)
|
||||
{
|
||||
struct task_struct *task = bpf_get_current_task_btf();
|
||||
long *ptr;
|
||||
|
||||
ptr = bpf_task_storage_get(&map_a, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
*ptr += 1;
|
||||
|
||||
ptr = bpf_task_storage_get(&map_b, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
*ptr += 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/sys_enter")
|
||||
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
|
||||
{
|
||||
struct task_struct *task;
|
||||
long *ptr;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
ptr = bpf_task_storage_get(&map_a, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
*ptr = 200;
|
||||
|
||||
ptr = bpf_task_storage_get(&map_b, task, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
*ptr = 100;
|
||||
return 0;
|
||||
}
|
@ -21,6 +21,7 @@ struct core_reloc_size_output {
|
||||
int arr_elem_sz;
|
||||
int ptr_sz;
|
||||
int enum_sz;
|
||||
int float_sz;
|
||||
};
|
||||
|
||||
struct core_reloc_size {
|
||||
@ -30,6 +31,7 @@ struct core_reloc_size {
|
||||
int arr_field[4];
|
||||
void *ptr_field;
|
||||
enum { VALUE = 123 } enum_field;
|
||||
float float_field;
|
||||
};
|
||||
|
||||
SEC("raw_tracepoint/sys_enter")
|
||||
@ -45,6 +47,7 @@ int test_core_size(void *ctx)
|
||||
out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
|
||||
out->ptr_sz = bpf_core_field_size(in->ptr_field);
|
||||
out->enum_sz = bpf_core_field_size(in->enum_field);
|
||||
out->float_sz = bpf_core_field_size(in->float_field);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -64,6 +64,10 @@ static const int PROG_DONE = 1;
|
||||
static const __u32 KEY_SERVER_A = SERVER_A;
|
||||
static const __u32 KEY_SERVER_B = SERVER_B;
|
||||
|
||||
static const __u16 SRC_PORT = bpf_htons(8008);
|
||||
static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
|
||||
static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
|
||||
|
||||
static const __u16 DST_PORT = 7007; /* Host byte order */
|
||||
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
|
||||
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
|
||||
@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
|
||||
if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
|
||||
return SK_DROP;
|
||||
|
||||
/* Narrow loads from remote_port field. Expect non-0 value. */
|
||||
if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
|
||||
LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
|
||||
/* Narrow loads from remote_port field. Expect SRC_PORT. */
|
||||
if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
|
||||
LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
|
||||
LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
|
||||
return SK_DROP;
|
||||
if (LSW(ctx->remote_port, 0) == 0)
|
||||
if (LSW(ctx->remote_port, 0) != SRC_PORT)
|
||||
return SK_DROP;
|
||||
|
||||
/* Narrow loads from local_port field. Expect DST_PORT. */
|
||||
@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
|
||||
|
||||
/* Narrow loads from IPv4 fields */
|
||||
if (v4) {
|
||||
/* Expect non-0.0.0.0 in remote_ip4 */
|
||||
if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
|
||||
LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
|
||||
/* Expect SRC_IP4 in remote_ip4 */
|
||||
if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
|
||||
LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
|
||||
LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
|
||||
LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
|
||||
return SK_DROP;
|
||||
if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
|
||||
if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
|
||||
LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
|
||||
return SK_DROP;
|
||||
|
||||
/* Expect DST_IP4 in local_ip4 */
|
||||
@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
|
||||
|
||||
/* Narrow loads from IPv6 fields */
|
||||
if (!v4) {
|
||||
/* Expect non-:: IP in remote_ip6 */
|
||||
if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
|
||||
LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
|
||||
LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
|
||||
LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
|
||||
LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
|
||||
LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
|
||||
LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
|
||||
LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
|
||||
/* Expect SRC_IP6 in remote_ip6 */
|
||||
if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
|
||||
LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
|
||||
return SK_DROP;
|
||||
if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
|
||||
LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
|
||||
LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
|
||||
LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
|
||||
if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
|
||||
LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
|
||||
return SK_DROP;
|
||||
/* Expect DST_IP6 in local_ip6 */
|
||||
if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
|
||||
|
@ -31,13 +31,13 @@ struct {
|
||||
static volatile bool test_sockmap; /* toggled by user-space */
|
||||
|
||||
SEC("sk_skb/stream_parser")
|
||||
int prog_skb_parser(struct __sk_buff *skb)
|
||||
int prog_stream_parser(struct __sk_buff *skb)
|
||||
{
|
||||
return skb->len;
|
||||
}
|
||||
|
||||
SEC("sk_skb/stream_verdict")
|
||||
int prog_skb_verdict(struct __sk_buff *skb)
|
||||
int prog_stream_verdict(struct __sk_buff *skb)
|
||||
{
|
||||
unsigned int *count;
|
||||
__u32 zero = 0;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user