Alexei Starovoitov says:

====================
pull-request: bpf-next 2022-03-21 v2

We've added 137 non-merge commits during the last 17 day(s) which contain
a total of 143 files changed, 7123 insertions(+), 1092 deletions(-).

The main changes are:

1) Custom SEC() handling in libbpf, from Andrii.

2) subskeleton support, from Delyan.

3) Use btf_tag to recognize __percpu pointers in the verifier, from Hao.

4) Fix net.core.bpf_jit_harden race, from Hou.

5) Fix bpf_sk_lookup remote_port on big-endian, from Jakub.

6) Introduce fprobe (multi kprobe) _without_ arch bits, from Masami.
The arch specific bits will come later.

7) Introduce multi_kprobe bpf programs on top of fprobe, from Jiri.

8) Enable non-atomic allocations in local storage, from Joanne.

9) Various var_off ptr_to_btf_id fixed, from Kumar.

10) bpf_ima_file_hash helper, from Roberto.

11) Add "live packet" mode for XDP in BPF_PROG_RUN, from Toke.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (137 commits)
  selftests/bpf: Fix kprobe_multi test.
  Revert "rethook: x86: Add rethook x86 implementation"
  Revert "arm64: rethook: Add arm64 rethook implementation"
  Revert "powerpc: Add rethook support"
  Revert "ARM: rethook: Add rethook arm implementation"
  bpftool: Fix a bug in subskeleton code generation
  bpf: Fix bpf_prog_pack when PMU_SIZE is not defined
  bpf: Fix bpf_prog_pack for multi-node setup
  bpf: Fix warning for cast from restricted gfp_t in verifier
  bpf, arm: Fix various typos in comments
  libbpf: Close fd in bpf_object__reuse_map
  bpftool: Fix print error when show bpf map
  bpf: Fix kprobe_multi return probe backtrace
  Revert "bpf: Add support to inline bpf_get_func_ip helper on x86"
  bpf: Simplify check in btf_parse_hdr()
  selftests/bpf/test_lirc_mode2.sh: Exit with proper code
  bpf: Check for NULL return from bpf_get_btf_vmlinux
  selftests/bpf: Test skipping stacktrace
  bpf: Adjust BPF stack helper functions to accommodate skip > 0
  bpf: Select proper size for bpf_prog_pack
  ...
====================

Link: https://lore.kernel.org/r/20220322050159.5507-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2022-03-22 10:36:56 -07:00
143 changed files with 7139 additions and 1108 deletions

View File

@@ -30,6 +30,7 @@ config BPF_SYSCALL
select TASKS_TRACE_RCU
select BINARY_PRINTF
select NET_SOCK_MSG if NET
select PAGE_POOL if NET
default n
help
Enable the bpf() system call that allows to manipulate BPF programs

View File

@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
sdata = bpf_local_storage_update(f->f_inode,
(struct bpf_local_storage_map *)map,
value, map_flags);
value, map_flags, GFP_ATOMIC);
fput(f);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
void *, value, u64, flags)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update(
inode, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
BPF_NOEXIST, gfp_flags);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}

View File

@@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
void *value, bool charge_mem)
void *value, bool charge_mem, gfp_t gfp_flags)
{
struct bpf_local_storage_elem *selem;
@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
GFP_ATOMIC | __GFP_NOWARN);
gfp_flags | __GFP_NOWARN);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
int bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *first_selem)
struct bpf_local_storage_elem *first_selem,
gfp_t gfp_flags)
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
@@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner,
return err;
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
GFP_ATOMIC | __GFP_NOWARN);
gfp_flags | __GFP_NOWARN);
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -350,10 +351,10 @@ uncharge:
*/
struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags)
void *value, u64 map_flags, gfp_t gfp_flags)
{
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_elem *selem = NULL;
struct bpf_local_storage *local_storage;
unsigned long flags;
int err;
@@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
!map_value_has_spin_lock(&smap->map)))
return ERR_PTR(-EINVAL);
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
return ERR_PTR(-EINVAL);
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held());
if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (err)
return ERR_PTR(err);
selem = bpf_selem_alloc(smap, owner, value, true);
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
if (!selem)
return ERR_PTR(-ENOMEM);
err = bpf_local_storage_alloc(owner, smap, selem);
err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
if (err) {
kfree(selem);
mem_uncharge(smap, owner, smap->elem_size);
@@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
}
if (gfp_flags == GFP_KERNEL) {
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
if (!selem)
return ERR_PTR(-ENOMEM);
}
raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */
@@ -429,19 +439,21 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
}
/* local_storage->lock is held. Hence, we are sure
* we can unlink and uncharge the old_sdata successfully
* later. Hence, instead of charging the new selem now
* and then uncharge the old selem later (which may cause
* a potential but unnecessary charge failure), avoid taking
* a charge at all here (the "!old_sdata" check) and the
* old_sdata will not be uncharged later during
* bpf_selem_unlink_storage_nolock().
*/
selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
if (!selem) {
err = -ENOMEM;
goto unlock_err;
if (gfp_flags != GFP_KERNEL) {
/* local_storage->lock is held. Hence, we are sure
* we can unlink and uncharge the old_sdata successfully
* later. Hence, instead of charging the new selem now
* and then uncharge the old selem later (which may cause
* a potential but unnecessary charge failure), avoid taking
* a charge at all here (the "!old_sdata" check) and the
* old_sdata will not be uncharged later during
* bpf_selem_unlink_storage_nolock().
*/
selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
if (!selem) {
err = -ENOMEM;
goto unlock_err;
}
}
/* First, link the new selem to the map */
@@ -463,6 +475,10 @@ unlock:
unlock_err:
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (selem) {
mem_uncharge(smap, owner, smap->elem_size);
kfree(selem);
}
return ERR_PTR(err);
}

View File

@@ -99,6 +99,24 @@ static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
.allowed = bpf_ima_inode_hash_allowed,
};
BPF_CALL_3(bpf_ima_file_hash, struct file *, file, void *, dst, u32, size)
{
return ima_file_hash(file, dst, size);
}
BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
static const struct bpf_func_proto bpf_ima_file_hash_proto = {
.func = bpf_ima_file_hash,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_file_hash_btf_ids[0],
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
.allowed = bpf_ima_inode_hash_allowed,
};
static const struct bpf_func_proto *
bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -121,6 +139,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
case BPF_FUNC_ima_file_hash:
return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
default:
return tracing_prog_func_proto(func_id, prog);
}
@@ -167,6 +187,7 @@ BTF_ID(func, bpf_lsm_inode_setxattr)
BTF_ID(func, bpf_lsm_inode_symlink)
BTF_ID(func, bpf_lsm_inode_unlink)
BTF_ID(func, bpf_lsm_kernel_module_request)
BTF_ID(func, bpf_lsm_kernel_read_file)
BTF_ID(func, bpf_lsm_kernfs_init_security)
#ifdef CONFIG_KEYS

View File

@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags);
task, (struct bpf_local_storage_map *)map, value, map_flags,
GFP_ATOMIC);
bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
@@ -226,8 +227,9 @@ out:
return err;
}
BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
task, void *, value, u64, flags)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
task, void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
BPF_NOEXIST, gfp_flags);
unlock:
bpf_task_storage_unlock();

View File

@@ -525,6 +525,50 @@ s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
return -ENOENT;
}
static s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
{
struct btf *btf;
s32 ret;
int id;
btf = bpf_get_btf_vmlinux();
if (IS_ERR(btf))
return PTR_ERR(btf);
if (!btf)
return -EINVAL;
ret = btf_find_by_name_kind(btf, name, kind);
/* ret is never zero, since btf_find_by_name_kind returns
* positive btf_id or negative error.
*/
if (ret > 0) {
btf_get(btf);
*btf_p = btf;
return ret;
}
/* If name is not found in vmlinux's BTF then search in module's BTFs */
spin_lock_bh(&btf_idr_lock);
idr_for_each_entry(&btf_idr, btf, id) {
if (!btf_is_module(btf))
continue;
/* linear search could be slow hence unlock/lock
* the IDR to avoiding holding it for too long
*/
btf_get(btf);
spin_unlock_bh(&btf_idr_lock);
ret = btf_find_by_name_kind(btf, name, kind);
if (ret > 0) {
*btf_p = btf;
return ret;
}
spin_lock_bh(&btf_idr_lock);
btf_put(btf);
}
spin_unlock_bh(&btf_idr_lock);
return ret;
}
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
u32 id, u32 *res_id)
{
@@ -4438,8 +4482,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
btf = env->btf;
btf_data_size = btf->data_size;
if (btf_data_size <
offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) {
if (btf_data_size < offsetofend(struct btf_header, hdr_len)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
@@ -5057,6 +5100,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
tag_value = __btf_name_by_offset(btf, t->name_off);
if (strcmp(tag_value, "user") == 0)
info->reg_type |= MEM_USER;
if (strcmp(tag_value, "percpu") == 0)
info->reg_type |= MEM_PERCPU;
}
/* skip modifiers */
@@ -5285,12 +5330,16 @@ error:
return -EACCES;
}
/* check __user tag */
/* check type tag */
t = btf_type_by_id(btf, mtype->type);
if (btf_type_is_type_tag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
/* check __user tag */
if (strcmp(tag_value, "user") == 0)
tmp_flag = MEM_USER;
/* check __percpu tag */
if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU;
}
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
@@ -5726,7 +5775,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
int ref_regno = 0;
int ref_regno = 0, ret;
bool rel = false;
t = btf_type_by_id(btf, func_id);
@@ -5753,6 +5802,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
/* Only kfunc can be release func */
if (is_kfunc)
rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
BTF_KFUNC_TYPE_RELEASE, func_id);
/* check that BTF function arguments match actual types that the
* verifier sees.
*/
@@ -5776,6 +5829,11 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE, rel);
if (ret < 0)
return ret;
if (btf_get_prog_ctx_type(log, btf, t,
env->prog->type, i)) {
/* If function expects ctx type in BTF check that caller
@@ -5787,8 +5845,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
i, btf_type_str(t));
return -EINVAL;
}
if (check_ptr_off_reg(env, reg, regno))
return -EINVAL;
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID ||
(reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) {
const struct btf_type *reg_ref_t;
@@ -5806,7 +5862,11 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (reg->type == PTR_TO_BTF_ID) {
reg_btf = reg->btf;
reg_ref_id = reg->btf_id;
/* Ensure only one argument is referenced PTR_TO_BTF_ID */
/* Ensure only one argument is referenced
* PTR_TO_BTF_ID, check_func_arg_reg_off relies
* on only one referenced register being allowed
* for kfuncs.
*/
if (reg->ref_obj_id) {
if (ref_obj_id) {
bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
@@ -5888,18 +5948,15 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
/* Either both are set, or neither */
WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
if (is_kfunc) {
rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
BTF_KFUNC_TYPE_RELEASE, func_id);
/* We already made sure ref_obj_id is set only for one argument */
if (rel && !ref_obj_id) {
bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
func_name);
return -EINVAL;
}
/* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
* other kfuncs works
*/
/* We already made sure ref_obj_id is set only for one argument. We do
* allow (!rel && ref_obj_id), so that passing such referenced
* PTR_TO_BTF_ID to other kfuncs works. Note that rel is only true when
* is_kfunc is true.
*/
if (rel && !ref_obj_id) {
bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
func_name);
return -EINVAL;
}
/* returns argument register number > 0 in case of reference release kfunc */
return rel ? ref_regno : 0;
@@ -6516,20 +6573,23 @@ struct module *btf_try_get_module(const struct btf *btf)
return res;
}
/* Returns struct btf corresponding to the struct module
*
* This function can return NULL or ERR_PTR. Note that caller must
* release reference for struct btf iff btf_is_module is true.
/* Returns struct btf corresponding to the struct module.
* This function can return NULL or ERR_PTR.
*/
static struct btf *btf_get_module_btf(const struct module *module)
{
struct btf *btf = NULL;
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module *btf_mod, *tmp;
#endif
struct btf *btf = NULL;
if (!module) {
btf = bpf_get_btf_vmlinux();
if (!IS_ERR_OR_NULL(btf))
btf_get(btf);
return btf;
}
if (!module)
return bpf_get_btf_vmlinux();
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
mutex_lock(&btf_module_mutex);
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6548,7 +6608,8 @@ static struct btf *btf_get_module_btf(const struct module *module)
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
{
struct btf *btf;
struct btf *btf = NULL;
int btf_obj_fd = 0;
long ret;
if (flags)
@@ -6557,44 +6618,17 @@ BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int
if (name_sz <= 1 || name[name_sz - 1])
return -EINVAL;
btf = bpf_get_btf_vmlinux();
if (IS_ERR(btf))
return PTR_ERR(btf);
ret = btf_find_by_name_kind(btf, name, kind);
/* ret is never zero, since btf_find_by_name_kind returns
* positive btf_id or negative error.
*/
if (ret < 0) {
struct btf *mod_btf;
int id;
/* If name is not found in vmlinux's BTF then search in module's BTFs */
spin_lock_bh(&btf_idr_lock);
idr_for_each_entry(&btf_idr, mod_btf, id) {
if (!btf_is_module(mod_btf))
continue;
/* linear search could be slow hence unlock/lock
* the IDR to avoiding holding it for too long
*/
btf_get(mod_btf);
spin_unlock_bh(&btf_idr_lock);
ret = btf_find_by_name_kind(mod_btf, name, kind);
if (ret > 0) {
int btf_obj_fd;
btf_obj_fd = __btf_new_fd(mod_btf);
if (btf_obj_fd < 0) {
btf_put(mod_btf);
return btf_obj_fd;
}
return ret | (((u64)btf_obj_fd) << 32);
}
spin_lock_bh(&btf_idr_lock);
btf_put(mod_btf);
ret = bpf_find_btf_id(name, kind, &btf);
if (ret > 0 && btf_is_module(btf)) {
btf_obj_fd = __btf_new_fd(btf);
if (btf_obj_fd < 0) {
btf_put(btf);
return btf_obj_fd;
}
spin_unlock_bh(&btf_idr_lock);
return ret | (((u64)btf_obj_fd) << 32);
}
if (ret > 0)
btf_put(btf);
return ret;
}
@@ -6793,9 +6827,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
hook = bpf_prog_type_to_kfunc_hook(prog_type);
ret = btf_populate_kfunc_set(btf, hook, kset);
/* reference is only taken for module BTF */
if (btf_is_module(btf))
btf_put(btf);
btf_put(btf);
return ret;
}
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
@@ -7149,6 +7181,8 @@ bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
main_btf = bpf_get_btf_vmlinux();
if (IS_ERR(main_btf))
return ERR_CAST(main_btf);
if (!main_btf)
return ERR_PTR(-EINVAL);
local_type = btf_type_by_id(local_btf, local_type_id);
if (!local_type)

View File

@@ -33,6 +33,7 @@
#include <linux/extable.h>
#include <linux/log2.h>
#include <linux/bpf_verifier.h>
#include <linux/nodemask.h>
#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -105,6 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->aux = aux;
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
mutex_init(&fp->aux->used_maps_mutex);
@@ -814,15 +816,9 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
* allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
* to host BPF programs.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define BPF_PROG_PACK_SIZE HPAGE_PMD_SIZE
#else
#define BPF_PROG_PACK_SIZE PAGE_SIZE
#endif
#define BPF_PROG_CHUNK_SHIFT 6
#define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT)
#define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1))
#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
struct bpf_prog_pack {
struct list_head list;
@@ -830,30 +826,72 @@ struct bpf_prog_pack {
unsigned long bitmap[];
};
#define BPF_PROG_MAX_PACK_PROG_SIZE BPF_PROG_PACK_SIZE
#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
static size_t bpf_prog_pack_size = -1;
static size_t bpf_prog_pack_mask = -1;
static int bpf_prog_chunk_count(void)
{
WARN_ON_ONCE(bpf_prog_pack_size == -1);
return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE;
}
static DEFINE_MUTEX(pack_mutex);
static LIST_HEAD(pack_list);
/* PMD_SIZE is not available in some special config, e.g. ARCH=arm with
* CONFIG_MMU=n. Use PAGE_SIZE in these cases.
*/
#ifdef PMD_SIZE
#define BPF_HPAGE_SIZE PMD_SIZE
#define BPF_HPAGE_MASK PMD_MASK
#else
#define BPF_HPAGE_SIZE PAGE_SIZE
#define BPF_HPAGE_MASK PAGE_MASK
#endif
static size_t select_bpf_prog_pack_size(void)
{
size_t size;
void *ptr;
size = BPF_HPAGE_SIZE * num_online_nodes();
ptr = module_alloc(size);
/* Test whether we can get huge pages. If not just use PAGE_SIZE
* packs.
*/
if (!ptr || !is_vm_area_hugepages(ptr)) {
size = PAGE_SIZE;
bpf_prog_pack_mask = PAGE_MASK;
} else {
bpf_prog_pack_mask = BPF_HPAGE_MASK;
}
vfree(ptr);
return size;
}
static struct bpf_prog_pack *alloc_new_pack(void)
{
struct bpf_prog_pack *pack;
pack = kzalloc(sizeof(*pack) + BITS_TO_BYTES(BPF_PROG_CHUNK_COUNT), GFP_KERNEL);
pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())),
GFP_KERNEL);
if (!pack)
return NULL;
pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
pack->ptr = module_alloc(bpf_prog_pack_size);
if (!pack->ptr) {
kfree(pack);
return NULL;
}
bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE);
list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr);
set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
return pack;
}
@@ -864,7 +902,11 @@ static void *bpf_prog_pack_alloc(u32 size)
unsigned long pos;
void *ptr = NULL;
if (size > BPF_PROG_MAX_PACK_PROG_SIZE) {
mutex_lock(&pack_mutex);
if (bpf_prog_pack_size == -1)
bpf_prog_pack_size = select_bpf_prog_pack_size();
if (size > bpf_prog_pack_size) {
size = round_up(size, PAGE_SIZE);
ptr = module_alloc(size);
if (ptr) {
@@ -872,13 +914,12 @@ static void *bpf_prog_pack_alloc(u32 size)
set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
}
return ptr;
goto out;
}
mutex_lock(&pack_mutex);
list_for_each_entry(pack, &pack_list, list) {
pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
pos = bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
nbits, 0);
if (pos < BPF_PROG_CHUNK_COUNT)
if (pos < bpf_prog_chunk_count())
goto found_free_area;
}
@@ -904,13 +945,13 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
unsigned long pos;
void *pack_ptr;
if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) {
mutex_lock(&pack_mutex);
if (hdr->size > bpf_prog_pack_size) {
module_memfree(hdr);
return;
goto out;
}
pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1));
mutex_lock(&pack_mutex);
pack_ptr = (void *)((unsigned long)hdr & bpf_prog_pack_mask);
list_for_each_entry(tmp, &pack_list, list) {
if (tmp->ptr == pack_ptr) {
@@ -926,8 +967,8 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
bitmap_clear(pack->bitmap, pos, nbits);
if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
BPF_PROG_CHUNK_COUNT, 0) == 0) {
if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
bpf_prog_chunk_count(), 0) == 0) {
list_del(&pack->list);
module_memfree(pack->ptr);
kfree(pack);
@@ -1382,7 +1423,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
struct bpf_insn *insn;
int i, rewritten;
if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
if (!prog->blinding_requested || prog->blinded)
return prog;
clone = bpf_prog_clone_create(prog, GFP_USER);

View File

@@ -225,13 +225,8 @@ BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
if (unlikely(!task))
goto err_clear;
strncpy(buf, task->comm, size);
/* Verifier guarantees that size > 0. For task->comm exceeding
* size, guarantee that buf is %NUL-terminated. Unconditionally
* done here to save the size test.
*/
buf[size - 1] = 0;
/* Verifier guarantees that size > 0 */
strscpy(buf, task->comm, size);
return 0;
err_clear:
memset(buf, 0, size);

View File

@@ -1,8 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
LIBBPF_INCLUDE = $(LIBBPF_SRCS)/..
LIBBPF_INCLUDE = $(srctree)/tools/lib
obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o
CFLAGS_bpf_preload_kern.o += -I $(LIBBPF_INCLUDE)
CFLAGS_bpf_preload_kern.o += -I$(LIBBPF_INCLUDE)
bpf_preload-objs += bpf_preload_kern.o

View File

@@ -176,7 +176,7 @@ build_id_valid:
}
static struct perf_callchain_entry *
get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
{
#ifdef CONFIG_STACKTRACE
struct perf_callchain_entry *entry;
@@ -187,9 +187,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
if (!entry)
return NULL;
entry->nr = init_nr +
stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
sysctl_perf_event_max_stack - init_nr, 0);
entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
max_depth, 0);
/* stack_trace_save_tsk() works on unsigned long array, while
* perf_callchain_entry uses u64 array. For 32-bit systems, it is
@@ -201,7 +200,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
int i;
/* copy data from the end to avoid using extra buffer */
for (i = entry->nr - 1; i >= (int)init_nr; i--)
for (i = entry->nr - 1; i >= 0; i--)
to[i] = (u64)(from[i]);
}
@@ -218,27 +217,19 @@ static long __bpf_get_stackid(struct bpf_map *map,
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 max_depth = map->value_size / stack_map_data_size(map);
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
u64 *ips;
bool hash_matches;
/* get_perf_callchain() guarantees that trace->nr >= init_nr
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
*/
trace_nr = trace->nr - init_nr;
if (trace_nr <= skip)
if (trace->nr <= skip)
/* skipping more than usable stack trace */
return -EFAULT;
trace_nr -= skip;
trace_nr = trace->nr - skip;
trace_len = trace_nr * sizeof(u64);
ips = trace->ip + skip + init_nr;
ips = trace->ip + skip;
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
id = hash & (smap->n_buckets - 1);
bucket = READ_ONCE(smap->buckets[id]);
@@ -295,8 +286,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags)
{
u32 max_depth = map->value_size / stack_map_data_size(map);
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
bool kernel = !user;
@@ -305,8 +295,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
return -EINVAL;
trace = get_perf_callchain(regs, init_nr, kernel, user,
sysctl_perf_event_max_stack, false, false);
max_depth += skip;
if (max_depth > sysctl_perf_event_max_stack)
max_depth = sysctl_perf_event_max_stack;
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
false, false);
if (unlikely(!trace))
/* couldn't fetch the stack trace */
@@ -397,7 +391,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
{
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
@@ -422,30 +416,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
goto err_fault;
num_elem = size / elem_size;
if (sysctl_perf_event_max_stack < num_elem)
init_nr = 0;
else
init_nr = sysctl_perf_event_max_stack - num_elem;
max_depth = num_elem + skip;
if (sysctl_perf_event_max_stack < max_depth)
max_depth = sysctl_perf_event_max_stack;
if (trace_in)
trace = trace_in;
else if (kernel && task)
trace = get_callchain_entry_for_task(task, init_nr);
trace = get_callchain_entry_for_task(task, max_depth);
else
trace = get_perf_callchain(regs, init_nr, kernel, user,
sysctl_perf_event_max_stack,
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
false, false);
if (unlikely(!trace))
goto err_fault;
trace_nr = trace->nr - init_nr;
if (trace_nr < skip)
if (trace->nr < skip)
goto err_fault;
trace_nr -= skip;
trace_nr = trace->nr - skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
ips = trace->ip + skip + init_nr;
ips = trace->ip + skip;
if (user && user_build_id)
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
else

View File

@@ -32,6 +32,7 @@
#include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -3022,6 +3023,11 @@ out_put_file:
fput(perf_file);
return err;
}
#else
static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_PERF_EVENTS */
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
@@ -3336,7 +3342,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
}
}
#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
#define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -4255,7 +4261,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
return -EINVAL;
}
#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
#define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies
static int link_create(union bpf_attr *attr, bpfptr_t uattr)
{
enum bpf_prog_type ptype;
@@ -4279,7 +4285,6 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = tracing_bpf_link_attach(attr, uattr, prog);
goto out;
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_KPROBE:
case BPF_PROG_TYPE_TRACEPOINT:
if (attr->link_create.attach_type != BPF_PERF_EVENT) {
ret = -EINVAL;
@@ -4287,6 +4292,14 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
}
ptype = prog->type;
break;
case BPF_PROG_TYPE_KPROBE:
if (attr->link_create.attach_type != BPF_PERF_EVENT &&
attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) {
ret = -EINVAL;
goto out;
}
ptype = prog->type;
break;
default:
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
@@ -4318,13 +4331,16 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_xdp_link_attach(attr, prog);
break;
#endif
#ifdef CONFIG_PERF_EVENTS
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_KPROBE:
ret = bpf_perf_link_attach(attr, prog);
break;
#endif
case BPF_PROG_TYPE_KPROBE:
if (attr->link_create.attach_type == BPF_PERF_EVENT)
ret = bpf_perf_link_attach(attr, prog);
else
ret = bpf_kprobe_multi_link_attach(attr, prog);
break;
default:
ret = -EINVAL;
}

View File

@@ -554,7 +554,6 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
@@ -562,8 +561,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
};
if (type & PTR_MAYBE_NULL) {
if (base_type(type) == PTR_TO_BTF_ID ||
base_type(type) == PTR_TO_PERCPU_BTF_ID)
if (base_type(type) == PTR_TO_BTF_ID)
strncpy(postfix, "or_null_", 16);
else
strncpy(postfix, "_or_null", 16);
@@ -575,6 +573,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
strncpy(prefix, "alloc_", 32);
if (type & MEM_USER)
strncpy(prefix, "user_", 32);
if (type & MEM_PERCPU)
strncpy(prefix, "percpu_", 32);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -697,8 +697,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
const char *sep = "";
verbose(env, "%s", reg_type_str(env, t));
if (base_type(t) == PTR_TO_BTF_ID ||
base_type(t) == PTR_TO_PERCPU_BTF_ID)
if (base_type(t) == PTR_TO_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
verbose(env, "(");
/*
@@ -2783,7 +2782,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BUF:
case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
@@ -3990,6 +3988,12 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env,
* is only allowed in its original, unmodified form.
*/
if (reg->off < 0) {
verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
reg_type_str(env, reg->type), regno, reg->off);
return -EACCES;
}
if (!fixed_off_ok && reg->off) {
verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
reg_type_str(env, reg->type), regno, reg->off);
@@ -4058,9 +4062,9 @@ static int check_buffer_access(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int regno, int off, int size,
bool zero_size_allowed,
const char *buf_info,
u32 *max_access)
{
const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
int err;
err = __check_buffer_access(env, buf_info, reg, regno, off, size);
@@ -4197,6 +4201,13 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
if (reg->type & MEM_PERCPU) {
verbose(env,
"R%d is ptr_%s access percpu memory: off=%d\n",
regno, tname, off);
return -EACCES;
}
if (env->ops->btf_struct_access) {
ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
off, size, atype, &btf_id, &flag);
@@ -4556,7 +4567,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_BTF_ID) {
} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
!type_may_be_null(reg->type)) {
err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
value_regno);
} else if (reg->type == CONST_PTR_TO_MAP) {
@@ -4564,7 +4576,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
value_regno);
} else if (base_type(reg->type) == PTR_TO_BUF) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
const char *buf_info;
u32 *max_access;
if (rdonly_mem) {
@@ -4573,15 +4584,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
regno, reg_type_str(env, reg->type));
return -EACCES;
}
buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
err = check_buffer_access(env, reg, regno, off, size, false,
buf_info, max_access);
max_access);
if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
mark_reg_unknown(env, regs, value_regno);
@@ -4802,7 +4811,7 @@ static int check_stack_range_initialized(
}
if (is_spilled_reg(&state->stack[spi]) &&
state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
goto mark;
if (is_spilled_reg(&state->stack[spi]) &&
@@ -4844,7 +4853,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
const char *buf_info;
u32 *max_access;
switch (base_type(reg->type)) {
@@ -4871,15 +4879,13 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
if (meta && meta->raw_mode)
return -EACCES;
buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
return check_buffer_access(env, reg, regno, reg->off,
access_size, zero_size_allowed,
buf_info, max_access);
max_access);
case PTR_TO_STACK:
return check_stack_range_initialized(
env,
@@ -5258,7 +5264,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | ME
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5359,6 +5365,60 @@ found:
return 0;
}
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type,
bool is_release_func)
{
bool fixed_off_ok = false, release_reg;
enum bpf_reg_type type = reg->type;
switch ((u32)type) {
case SCALAR_VALUE:
/* Pointer types where reg offset is explicitly allowed: */
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
case PTR_TO_MEM:
case PTR_TO_MEM | MEM_RDONLY:
case PTR_TO_MEM | MEM_ALLOC:
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
case PTR_TO_STACK:
/* Some of the argument types nevertheless require a
* zero register offset.
*/
if (arg_type != ARG_PTR_TO_ALLOC_MEM)
return 0;
break;
/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
* fixed offset.
*/
case PTR_TO_BTF_ID:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* it's fixed offset must be 0. We rely on the property that
* only one referenced register can be passed to BPF helpers and
* kfuncs. In the other cases, fixed offset can be non-zero.
*/
release_reg = is_release_func && reg->ref_obj_id;
if (release_reg && reg->off) {
verbose(env, "R%d must have zero offset when passed to release func\n",
regno);
return -EINVAL;
}
/* For release_reg == true, fixed_off_ok must be false, but we
* already checked and rejected reg->off != 0 above, so set to
* true to allow fixed offset for all other cases.
*/
fixed_off_ok = true;
break;
default:
break;
}
return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
@@ -5408,36 +5468,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
if (err)
return err;
switch ((u32)type) {
case SCALAR_VALUE:
/* Pointer types where reg offset is explicitly allowed: */
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
case PTR_TO_MEM:
case PTR_TO_MEM | MEM_RDONLY:
case PTR_TO_MEM | MEM_ALLOC:
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
case PTR_TO_STACK:
/* Some of the argument types nevertheless require a
* zero register offset.
*/
if (arg_type == ARG_PTR_TO_ALLOC_MEM)
goto force_off_check;
break;
/* All the rest must be rejected: */
default:
force_off_check:
err = __check_ptr_off_reg(env, reg, regno,
type == PTR_TO_BTF_ID);
if (err < 0)
return err;
break;
}
err = check_func_arg_reg_off(env, reg, regno, arg_type, is_release_function(meta->func_id));
if (err)
return err;
skip_type_check:
/* check_func_arg_reg_off relies on only one referenced register being
* allowed for BPF helpers.
*/
if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
@@ -9638,7 +9676,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
dst_reg->mem_size = aux->btf_var.mem_size;
break;
case PTR_TO_BTF_ID:
case PTR_TO_PERCPU_BTF_ID:
dst_reg->btf = aux->btf_var.btf;
dst_reg->btf_id = aux->btf_var.btf_id;
break;
@@ -10363,8 +10400,7 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
aux->func_info[i].insn_off = env->subprog_info[i].start;
}
#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
sizeof(((struct bpf_line_info *)(0))->line_col))
#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
static int check_btf_line(struct bpf_verifier_env *env,
@@ -11838,7 +11874,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
type = t->type;
t = btf_type_skip_modifiers(btf, type, NULL);
if (percpu) {
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
@@ -12987,6 +13023,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
func[i]->blinding_requested = prog->blinding_requested;
func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
func[i]->aux->linfo = prog->aux->linfo;
@@ -13110,6 +13147,7 @@ out_free:
out_undo_insn:
/* cleanup main prog to be interpreted */
prog->jit_requested = 0;
prog->blinding_requested = 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (!bpf_pseudo_call(insn))
continue;
@@ -13203,7 +13241,6 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
enum bpf_attach_type eatype = prog->expected_attach_type;
bool expect_blinding = bpf_jit_blinding_enabled(prog);
enum bpf_prog_type prog_type = resolve_prog_type(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
@@ -13367,7 +13404,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
if (env->bpf_capable && !expect_blinding &&
if (env->bpf_capable && !prog->blinding_requested &&
prog->jit_requested &&
!bpf_map_key_poisoned(aux) &&
!bpf_map_ptr_poisoned(aux) &&
@@ -13455,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
if (insn->imm == BPF_FUNC_task_storage_get ||
insn->imm == BPF_FUNC_sk_storage_get ||
insn->imm == BPF_FUNC_inode_storage_get) {
if (env->prog->aux->sleepable)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
else
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
insn_buf[1] = *insn;
cnt = 2;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto patch_call_imm;
}
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* and other inlining handlers are currently limited to 64 bit
* only.

View File

@@ -64,6 +64,7 @@
#include <linux/compat.h>
#include <linux/io_uring.h>
#include <linux/kprobes.h>
#include <linux/rethook.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -169,6 +170,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
kprobe_flush_task(tsk);
rethook_flush_task(tsk);
perf_event_delayed_put(tsk);
trace_sched_process_free(tsk);
put_task_struct(tsk);

View File

@@ -2255,6 +2255,9 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_KRETPROBES
p->kretprobe_instances.first = NULL;
#endif
#ifdef CONFIG_RETHOOK
p->rethooks.first = NULL;
#endif
/*
* Ensure that the cgroup subsystem policies allow the new process to be

View File

@@ -212,6 +212,10 @@ unsigned long kallsyms_lookup_name(const char *name)
unsigned long i;
unsigned int off;
/* Skip the search for empty string. */
if (!*name)
return 0;
for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));

View File

@@ -10,6 +10,17 @@ config USER_STACKTRACE_SUPPORT
config NOP_TRACER
bool
config HAVE_RETHOOK
bool
config RETHOOK
bool
depends on HAVE_RETHOOK
help
Enable generic return hooking feature. This is an internal
API, which will be used by other function-entry hooking
features like fprobe and kprobes.
config HAVE_FUNCTION_TRACER
bool
help
@@ -236,6 +247,21 @@ config DYNAMIC_FTRACE_WITH_ARGS
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_RETHOOK
select RETHOOK
default n
help
This option enables kernel function probe (fprobe) based on ftrace.
The fprobe is similar to kprobes, but probes only for kernel function
entries and exits. This also can probe multiple functions by one
fprobe.
If unsure, say N.
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER

View File

@@ -97,6 +97,8 @@ obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
obj-$(CONFIG_FPROBE) += fprobe.o
obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o

View File

@@ -17,6 +17,9 @@
#include <linux/error-injection.h>
#include <linux/btf_ids.h>
#include <linux/bpf_lsm.h>
#include <linux/fprobe.h>
#include <linux/bsearch.h>
#include <linux/sort.h>
#include <net/bpf_sk_storage.h>
@@ -77,6 +80,8 @@ u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
u64 flags, const struct btf **btf,
s32 *btf_id);
static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
/**
* trace_call_bpf - invoke BPF program
@@ -1036,6 +1041,30 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
{
return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
}
static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
.func = bpf_get_func_ip_kprobe_multi,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
{
return bpf_kprobe_multi_cookie(current->bpf_ctx);
}
static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
.func = bpf_get_attach_cookie_kprobe_multi,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
{
struct bpf_trace_run_ctx *run_ctx;
@@ -1279,9 +1308,13 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
return &bpf_get_func_ip_proto_kprobe;
return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
&bpf_get_func_ip_proto_kprobe_multi :
&bpf_get_func_ip_proto_kprobe;
case BPF_FUNC_get_attach_cookie:
return &bpf_get_attach_cookie_proto_trace;
return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
&bpf_get_attach_cookie_proto_kmulti :
&bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -2181,3 +2214,314 @@ static int __init bpf_event_init(void)
fs_initcall(bpf_event_init);
#endif /* CONFIG_MODULES */
#ifdef CONFIG_FPROBE
struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
unsigned long *addrs;
u64 *cookies;
u32 cnt;
};
struct bpf_kprobe_multi_run_ctx {
struct bpf_run_ctx run_ctx;
struct bpf_kprobe_multi_link *link;
unsigned long entry_ip;
};
static void bpf_kprobe_multi_link_release(struct bpf_link *link)
{
struct bpf_kprobe_multi_link *kmulti_link;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
unregister_fprobe(&kmulti_link->fp);
}
static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
{
struct bpf_kprobe_multi_link *kmulti_link;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
kvfree(kmulti_link->addrs);
kvfree(kmulti_link->cookies);
kfree(kmulti_link);
}
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
.dealloc = bpf_kprobe_multi_link_dealloc,
};
static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
{
const struct bpf_kprobe_multi_link *link = priv;
unsigned long *addr_a = a, *addr_b = b;
u64 *cookie_a, *cookie_b;
unsigned long tmp1;
u64 tmp2;
cookie_a = link->cookies + (addr_a - link->addrs);
cookie_b = link->cookies + (addr_b - link->addrs);
/* swap addr_a/addr_b and cookie_a/cookie_b values */
tmp1 = *addr_a; *addr_a = *addr_b; *addr_b = tmp1;
tmp2 = *cookie_a; *cookie_a = *cookie_b; *cookie_b = tmp2;
}
static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b)
{
const unsigned long *addr_a = a, *addr_b = b;
if (*addr_a == *addr_b)
return 0;
return *addr_a < *addr_b ? -1 : 1;
}
static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv)
{
return __bpf_kprobe_multi_cookie_cmp(a, b);
}
static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
{
struct bpf_kprobe_multi_run_ctx *run_ctx;
struct bpf_kprobe_multi_link *link;
u64 *cookie, entry_ip;
unsigned long *addr;
if (WARN_ON_ONCE(!ctx))
return 0;
run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
link = run_ctx->link;
if (!link->cookies)
return 0;
entry_ip = run_ctx->entry_ip;
addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
__bpf_kprobe_multi_cookie_cmp);
if (!addr)
return 0;
cookie = link->cookies + (addr - link->addrs);
return *cookie;
}
static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
struct bpf_kprobe_multi_run_ctx *run_ctx;
run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
return run_ctx->entry_ip;
}
static int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
unsigned long entry_ip, struct pt_regs *regs)
{
struct bpf_kprobe_multi_run_ctx run_ctx = {
.link = link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
int err;
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
err = 0;
goto out;
}
migrate_disable();
rcu_read_lock();
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
out:
__this_cpu_dec(bpf_prog_active);
return err;
}
static void
kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
struct pt_regs *regs)
{
struct bpf_kprobe_multi_link *link;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
kprobe_multi_link_prog_run(link, entry_ip, regs);
}
static int
kprobe_multi_resolve_syms(const void *usyms, u32 cnt,
unsigned long *addrs)
{
unsigned long addr, size;
const char **syms;
int err = -ENOMEM;
unsigned int i;
char *func;
size = cnt * sizeof(*syms);
syms = kvzalloc(size, GFP_KERNEL);
if (!syms)
return -ENOMEM;
func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL);
if (!func)
goto error;
if (copy_from_user(syms, usyms, size)) {
err = -EFAULT;
goto error;
}
for (i = 0; i < cnt; i++) {
err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN);
if (err == KSYM_NAME_LEN)
err = -E2BIG;
if (err < 0)
goto error;
err = -EINVAL;
addr = kallsyms_lookup_name(func);
if (!addr)
goto error;
if (!kallsyms_lookup_size_offset(addr, &size, NULL))
goto error;
addr = ftrace_location_range(addr, addr + size - 1);
if (!addr)
goto error;
addrs[i] = addr;
}
err = 0;
error:
kvfree(syms);
kfree(func);
return err;
}
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_kprobe_multi_link *link = NULL;
struct bpf_link_primer link_primer;
void __user *ucookies;
unsigned long *addrs;
u32 flags, cnt, size;
void __user *uaddrs;
u64 *cookies = NULL;
void __user *usyms;
int err;
/* no support for 32bit archs yet */
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI)
return -EINVAL;
flags = attr->link_create.kprobe_multi.flags;
if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
return -EINVAL;
uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
if (!!uaddrs == !!usyms)
return -EINVAL;
cnt = attr->link_create.kprobe_multi.cnt;
if (!cnt)
return -EINVAL;
size = cnt * sizeof(*addrs);
addrs = kvmalloc(size, GFP_KERNEL);
if (!addrs)
return -ENOMEM;
if (uaddrs) {
if (copy_from_user(addrs, uaddrs, size)) {
err = -EFAULT;
goto error;
}
} else {
err = kprobe_multi_resolve_syms(usyms, cnt, addrs);
if (err)
goto error;
}
ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
if (ucookies) {
cookies = kvmalloc(size, GFP_KERNEL);
if (!cookies) {
err = -ENOMEM;
goto error;
}
if (copy_from_user(cookies, ucookies, size)) {
err = -EFAULT;
goto error;
}
}
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
goto error;
}
bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
&bpf_kprobe_multi_link_lops, prog);
err = bpf_link_prime(&link->link, &link_primer);
if (err)
goto error;
if (flags & BPF_F_KPROBE_MULTI_RETURN)
link->fp.exit_handler = kprobe_multi_link_handler;
else
link->fp.entry_handler = kprobe_multi_link_handler;
link->addrs = addrs;
link->cookies = cookies;
link->cnt = cnt;
if (cookies) {
/*
* Sorting addresses will trigger sorting cookies as well
* (check bpf_kprobe_multi_cookie_swap). This way we can
* find cookie based on the address in bpf_get_attach_cookie
* helper.
*/
sort_r(addrs, cnt, sizeof(*addrs),
bpf_kprobe_multi_cookie_cmp,
bpf_kprobe_multi_cookie_swap,
link);
}
err = register_fprobe_ips(&link->fp, addrs, cnt);
if (err) {
bpf_link_cleanup(&link_primer);
return err;
}
return bpf_link_settle(&link_primer);
error:
kfree(link);
kvfree(addrs);
kvfree(cookies);
return err;
}
#else /* !CONFIG_FPROBE */
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
{
return 0;
}
static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
return 0;
}
#endif

332
kernel/trace/fprobe.c Normal file
View File

@@ -0,0 +1,332 @@
// SPDX-License-Identifier: GPL-2.0
/*
* fprobe - Simple ftrace probe wrapper for function entry.
*/
#define pr_fmt(fmt) "fprobe: " fmt
#include <linux/err.h>
#include <linux/fprobe.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/rethook.h>
#include <linux/slab.h>
#include <linux/sort.h>
#include "trace.h"
struct fprobe_rethook_node {
struct rethook_node node;
unsigned long entry_ip;
};
static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct fprobe_rethook_node *fpr;
struct rethook_node *rh;
struct fprobe *fp;
int bit;
fp = container_of(ops, struct fprobe, ops);
if (fprobe_disabled(fp))
return;
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0) {
fp->nmissed++;
return;
}
if (fp->entry_handler)
fp->entry_handler(fp, ip, ftrace_get_regs(fregs));
if (fp->exit_handler) {
rh = rethook_try_get(fp->rethook);
if (!rh) {
fp->nmissed++;
goto out;
}
fpr = container_of(rh, struct fprobe_rethook_node, node);
fpr->entry_ip = ip;
rethook_hook(rh, ftrace_get_regs(fregs), true);
}
out:
ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(fprobe_handler);
static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct fprobe *fp = container_of(ops, struct fprobe, ops);
if (unlikely(kprobe_running())) {
fp->nmissed++;
return;
}
kprobe_busy_begin();
fprobe_handler(ip, parent_ip, ops, fregs);
kprobe_busy_end();
}
static void fprobe_exit_handler(struct rethook_node *rh, void *data,
struct pt_regs *regs)
{
struct fprobe *fp = (struct fprobe *)data;
struct fprobe_rethook_node *fpr;
if (!fp || fprobe_disabled(fp))
return;
fpr = container_of(rh, struct fprobe_rethook_node, node);
fp->exit_handler(fp, fpr->entry_ip, regs);
}
NOKPROBE_SYMBOL(fprobe_exit_handler);
/* Convert ftrace location address from symbols */
static unsigned long *get_ftrace_locations(const char **syms, int num)
{
unsigned long addr, size;
unsigned long *addrs;
int i;
/* Convert symbols to symbol address */
addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return ERR_PTR(-ENOMEM);
for (i = 0; i < num; i++) {
addr = kallsyms_lookup_name(syms[i]);
if (!addr) /* Maybe wrong symbol */
goto error;
/* Convert symbol address to ftrace location. */
if (!kallsyms_lookup_size_offset(addr, &size, NULL) || !size)
goto error;
addr = ftrace_location_range(addr, addr + size - 1);
if (!addr) /* No dynamic ftrace there. */
goto error;
addrs[i] = addr;
}
return addrs;
error:
kfree(addrs);
return ERR_PTR(-ENOENT);
}
static void fprobe_init(struct fprobe *fp)
{
fp->nmissed = 0;
if (fprobe_shared_with_kprobes(fp))
fp->ops.func = fprobe_kprobe_handler;
else
fp->ops.func = fprobe_handler;
fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
}
static int fprobe_init_rethook(struct fprobe *fp, int num)
{
int i, size;
if (num < 0)
return -EINVAL;
if (!fp->exit_handler) {
fp->rethook = NULL;
return 0;
}
/* Initialize rethook if needed */
size = num * num_possible_cpus() * 2;
if (size < 0)
return -E2BIG;
fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
for (i = 0; i < size; i++) {
struct rethook_node *node;
node = kzalloc(sizeof(struct fprobe_rethook_node), GFP_KERNEL);
if (!node) {
rethook_free(fp->rethook);
fp->rethook = NULL;
return -ENOMEM;
}
rethook_add_node(fp->rethook, node);
}
return 0;
}
static void fprobe_fail_cleanup(struct fprobe *fp)
{
if (fp->rethook) {
/* Don't need to cleanup rethook->handler because this is not used. */
rethook_free(fp->rethook);
fp->rethook = NULL;
}
ftrace_free_filter(&fp->ops);
}
/**
* register_fprobe() - Register fprobe to ftrace by pattern.
* @fp: A fprobe data structure to be registered.
* @filter: A wildcard pattern of probed symbols.
* @notfilter: A wildcard pattern of NOT probed symbols.
*
* Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
* If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
*
* Return 0 if @fp is registered successfully, -errno if not.
*/
int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
{
struct ftrace_hash *hash;
unsigned char *str;
int ret, len;
if (!fp || !filter)
return -EINVAL;
fprobe_init(fp);
len = strlen(filter);
str = kstrdup(filter, GFP_KERNEL);
ret = ftrace_set_filter(&fp->ops, str, len, 0);
kfree(str);
if (ret)
return ret;
if (notfilter) {
len = strlen(notfilter);
str = kstrdup(notfilter, GFP_KERNEL);
ret = ftrace_set_notrace(&fp->ops, str, len, 0);
kfree(str);
if (ret)
goto out;
}
/* TODO:
* correctly calculate the total number of filtered symbols
* from both filter and notfilter.
*/
hash = fp->ops.local_hash.filter_hash;
if (WARN_ON_ONCE(!hash))
goto out;
ret = fprobe_init_rethook(fp, (int)hash->count);
if (!ret)
ret = register_ftrace_function(&fp->ops);
out:
if (ret)
fprobe_fail_cleanup(fp);
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe);
/**
* register_fprobe_ips() - Register fprobe to ftrace by address.
* @fp: A fprobe data structure to be registered.
* @addrs: An array of target ftrace location addresses.
* @num: The number of entries of @addrs.
*
* Register @fp to ftrace for enabling the probe on the address given by @addrs.
* The @addrs must be the addresses of ftrace location address, which may be
* the symbol address + arch-dependent offset.
* If you unsure what this mean, please use other registration functions.
*
* Return 0 if @fp is registered successfully, -errno if not.
*/
int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
{
int ret;
if (!fp || !addrs || num <= 0)
return -EINVAL;
fprobe_init(fp);
ret = ftrace_set_filter_ips(&fp->ops, addrs, num, 0, 0);
if (ret)
return ret;
ret = fprobe_init_rethook(fp, num);
if (!ret)
ret = register_ftrace_function(&fp->ops);
if (ret)
fprobe_fail_cleanup(fp);
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe_ips);
/**
* register_fprobe_syms() - Register fprobe to ftrace by symbols.
* @fp: A fprobe data structure to be registered.
* @syms: An array of target symbols.
* @num: The number of entries of @syms.
*
* Register @fp to the symbols given by @syms array. This will be useful if
* you are sure the symbols exist in the kernel.
*
* Return 0 if @fp is registered successfully, -errno if not.
*/
int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
{
unsigned long *addrs;
int ret;
if (!fp || !syms || num <= 0)
return -EINVAL;
addrs = get_ftrace_locations(syms, num);
if (IS_ERR(addrs))
return PTR_ERR(addrs);
ret = register_fprobe_ips(fp, addrs, num);
kfree(addrs);
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe_syms);
/**
* unregister_fprobe() - Unregister fprobe from ftrace
* @fp: A fprobe data structure to be unregistered.
*
* Unregister fprobe (and remove ftrace hooks from the function entries).
*
* Return 0 if @fp is unregistered successfully, -errno if not.
*/
int unregister_fprobe(struct fprobe *fp)
{
int ret;
if (!fp || fp->ops.func != fprobe_handler)
return -EINVAL;
/*
* rethook_free() starts disabling the rethook, but the rethook handlers
* may be running on other processors at this point. To make sure that all
* current running handlers are finished, call unregister_ftrace_function()
* after this.
*/
if (fp->rethook)
rethook_free(fp->rethook);
ret = unregister_ftrace_function(&fp->ops);
if (ret < 0)
return ret;
ftrace_free_filter(&fp->ops);
return ret;
}
EXPORT_SYMBOL_GPL(unregister_fprobe);

View File

@@ -4958,7 +4958,7 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
}
static int
ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
__ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
{
struct ftrace_func_entry *entry;
@@ -4976,9 +4976,30 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return add_hash_entry(hash, ip);
}
static int
ftrace_match_addr(struct ftrace_hash *hash, unsigned long *ips,
unsigned int cnt, int remove)
{
unsigned int i;
int err;
for (i = 0; i < cnt; i++) {
err = __ftrace_match_addr(hash, ips[i], remove);
if (err) {
/*
* This expects the @hash is a temporary hash and if this
* fails the caller must free the @hash.
*/
return err;
}
}
return 0;
}
static int
ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
unsigned long ip, int remove, int reset, int enable)
unsigned long *ips, unsigned int cnt,
int remove, int reset, int enable)
{
struct ftrace_hash **orig_hash;
struct ftrace_hash *hash;
@@ -5008,8 +5029,8 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
ret = -EINVAL;
goto out_regex_unlock;
}
if (ip) {
ret = ftrace_match_addr(hash, ip, remove);
if (ips) {
ret = ftrace_match_addr(hash, ips, cnt, remove);
if (ret < 0)
goto out_regex_unlock;
}
@@ -5026,10 +5047,10 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
}
static int
ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
int reset, int enable)
ftrace_set_addr(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt,
int remove, int reset, int enable)
{
return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable);
}
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
@@ -5634,10 +5655,29 @@ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
int remove, int reset)
{
ftrace_ops_init(ops);
return ftrace_set_addr(ops, ip, remove, reset, 1);
return ftrace_set_addr(ops, &ip, 1, remove, reset, 1);
}
EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
/**
* ftrace_set_filter_ips - set functions to filter on in ftrace by addresses
* @ops - the ops to set the filter with
* @ips - the array of addresses to add to or remove from the filter.
* @cnt - the number of addresses in @ips
* @remove - non zero to remove ips from the filter
* @reset - non zero to reset all filters before applying this filter.
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ips array or any ip specified within is NULL , it fails to update filter.
*/
int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips,
unsigned int cnt, int remove, int reset)
{
ftrace_ops_init(ops);
return ftrace_set_addr(ops, ips, cnt, remove, reset, 1);
}
EXPORT_SYMBOL_GPL(ftrace_set_filter_ips);
/**
* ftrace_ops_set_global_filter - setup ops to use global filters
* @ops - the ops which will use the global filters
@@ -5659,7 +5699,7 @@ static int
ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
int reset, int enable)
{
return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);
return ftrace_set_hash(ops, buf, len, NULL, 0, 0, reset, enable);
}
/**

317
kernel/trace/rethook.c Normal file
View File

@@ -0,0 +1,317 @@
// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) "rethook: " fmt
#include <linux/bug.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/preempt.h>
#include <linux/rethook.h>
#include <linux/slab.h>
#include <linux/sort.h>
/* Return hook list (shadow stack by list) */
/*
* This function is called from delayed_put_task_struct() when a task is
* dead and cleaned up to recycle any kretprobe instances associated with
* this task. These left over instances represent probed functions that
* have been called but will never return.
*/
void rethook_flush_task(struct task_struct *tk)
{
struct rethook_node *rhn;
struct llist_node *node;
node = __llist_del_all(&tk->rethooks);
while (node) {
rhn = container_of(node, struct rethook_node, llist);
node = node->next;
preempt_disable();
rethook_recycle(rhn);
preempt_enable();
}
}
static void rethook_free_rcu(struct rcu_head *head)
{
struct rethook *rh = container_of(head, struct rethook, rcu);
struct rethook_node *rhn;
struct freelist_node *node;
int count = 1;
node = rh->pool.head;
while (node) {
rhn = container_of(node, struct rethook_node, freelist);
node = node->next;
kfree(rhn);
count++;
}
/* The rh->ref is the number of pooled node + 1 */
if (refcount_sub_and_test(count, &rh->ref))
kfree(rh);
}
/**
* rethook_free() - Free struct rethook.
* @rh: the struct rethook to be freed.
*
* Free the rethook. Before calling this function, user must ensure the
* @rh::data is cleaned if needed (or, the handler can access it after
* calling this function.) This function will set the @rh to be freed
* after all rethook_node are freed (not soon). And the caller must
* not touch @rh after calling this.
*/
void rethook_free(struct rethook *rh)
{
rcu_assign_pointer(rh->handler, NULL);
call_rcu(&rh->rcu, rethook_free_rcu);
}
/**
* rethook_alloc() - Allocate struct rethook.
* @data: a data to pass the @handler when hooking the return.
* @handler: the return hook callback function.
*
* Allocate and initialize a new rethook with @data and @handler.
* Return NULL if memory allocation fails or @handler is NULL.
* Note that @handler == NULL means this rethook is going to be freed.
*/
struct rethook *rethook_alloc(void *data, rethook_handler_t handler)
{
struct rethook *rh = kzalloc(sizeof(struct rethook), GFP_KERNEL);
if (!rh || !handler)
return NULL;
rh->data = data;
rh->handler = handler;
rh->pool.head = NULL;
refcount_set(&rh->ref, 1);
return rh;
}
/**
* rethook_add_node() - Add a new node to the rethook.
* @rh: the struct rethook.
* @node: the struct rethook_node to be added.
*
* Add @node to @rh. User must allocate @node (as a part of user's
* data structure.) The @node fields are initialized in this function.
*/
void rethook_add_node(struct rethook *rh, struct rethook_node *node)
{
node->rethook = rh;
freelist_add(&node->freelist, &rh->pool);
refcount_inc(&rh->ref);
}
static void free_rethook_node_rcu(struct rcu_head *head)
{
struct rethook_node *node = container_of(head, struct rethook_node, rcu);
if (refcount_dec_and_test(&node->rethook->ref))
kfree(node->rethook);
kfree(node);
}
/**
* rethook_recycle() - return the node to rethook.
* @node: The struct rethook_node to be returned.
*
* Return back the @node to @node::rethook. If the @node::rethook is already
* marked as freed, this will free the @node.
*/
void rethook_recycle(struct rethook_node *node)
{
lockdep_assert_preemption_disabled();
if (likely(READ_ONCE(node->rethook->handler)))
freelist_add(&node->freelist, &node->rethook->pool);
else
call_rcu(&node->rcu, free_rethook_node_rcu);
}
NOKPROBE_SYMBOL(rethook_recycle);
/**
* rethook_try_get() - get an unused rethook node.
* @rh: The struct rethook which pools the nodes.
*
* Get an unused rethook node from @rh. If the node pool is empty, this
* will return NULL. Caller must disable preemption.
*/
struct rethook_node *rethook_try_get(struct rethook *rh)
{
rethook_handler_t handler = READ_ONCE(rh->handler);
struct freelist_node *fn;
lockdep_assert_preemption_disabled();
/* Check whether @rh is going to be freed. */
if (unlikely(!handler))
return NULL;
fn = freelist_try_get(&rh->pool);
if (!fn)
return NULL;
return container_of(fn, struct rethook_node, freelist);
}
NOKPROBE_SYMBOL(rethook_try_get);
/**
* rethook_hook() - Hook the current function return.
* @node: The struct rethook node to hook the function return.
* @regs: The struct pt_regs for the function entry.
* @mcount: True if this is called from mcount(ftrace) context.
*
* Hook the current running function return. This must be called when the
* function entry (or at least @regs must be the registers of the function
* entry.) @mcount is used for identifying the context. If this is called
* from ftrace (mcount) callback, @mcount must be set true. If this is called
* from the real function entry (e.g. kprobes) @mcount must be set false.
* This is because the way to hook the function return depends on the context.
*/
void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount)
{
arch_rethook_prepare(node, regs, mcount);
__llist_add(&node->llist, &current->rethooks);
}
NOKPROBE_SYMBOL(rethook_hook);
/* This assumes the 'tsk' is the current task or is not running. */
static unsigned long __rethook_find_ret_addr(struct task_struct *tsk,
struct llist_node **cur)
{
struct rethook_node *rh = NULL;
struct llist_node *node = *cur;
if (!node)
node = tsk->rethooks.first;
else
node = node->next;
while (node) {
rh = container_of(node, struct rethook_node, llist);
if (rh->ret_addr != (unsigned long)arch_rethook_trampoline) {
*cur = node;
return rh->ret_addr;
}
node = node->next;
}
return 0;
}
NOKPROBE_SYMBOL(__rethook_find_ret_addr);
/**
* rethook_find_ret_addr -- Find correct return address modified by rethook
* @tsk: Target task
* @frame: A frame pointer
* @cur: a storage of the loop cursor llist_node pointer for next call
*
* Find the correct return address modified by a rethook on @tsk in unsigned
* long type.
* The @tsk must be 'current' or a task which is not running. @frame is a hint
* to get the currect return address - which is compared with the
* rethook::frame field. The @cur is a loop cursor for searching the
* kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
* first call, but '@cur' itself must NOT NULL.
*
* Returns found address value or zero if not found.
*/
unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame,
struct llist_node **cur)
{
struct rethook_node *rhn = NULL;
unsigned long ret;
if (WARN_ON_ONCE(!cur))
return 0;
if (WARN_ON_ONCE(tsk != current && task_is_running(tsk)))
return 0;
do {
ret = __rethook_find_ret_addr(tsk, cur);
if (!ret)
break;
rhn = container_of(*cur, struct rethook_node, llist);
} while (rhn->frame != frame);
return ret;
}
NOKPROBE_SYMBOL(rethook_find_ret_addr);
void __weak arch_rethook_fixup_return(struct pt_regs *regs,
unsigned long correct_ret_addr)
{
/*
* Do nothing by default. If the architecture which uses a
* frame pointer to record real return address on the stack,
* it should fill this function to fixup the return address
* so that stacktrace works from the rethook handler.
*/
}
/* This function will be called from each arch-defined trampoline. */
unsigned long rethook_trampoline_handler(struct pt_regs *regs,
unsigned long frame)
{
struct llist_node *first, *node = NULL;
unsigned long correct_ret_addr;
rethook_handler_t handler;
struct rethook_node *rhn;
correct_ret_addr = __rethook_find_ret_addr(current, &node);
if (!correct_ret_addr) {
pr_err("rethook: Return address not found! Maybe there is a bug in the kernel\n");
BUG_ON(1);
}
instruction_pointer_set(regs, correct_ret_addr);
/*
* These loops must be protected from rethook_free_rcu() because those
* are accessing 'rhn->rethook'.
*/
preempt_disable();
/*
* Run the handler on the shadow stack. Do not unlink the list here because
* stackdump inside the handlers needs to decode it.
*/
first = current->rethooks.first;
while (first) {
rhn = container_of(first, struct rethook_node, llist);
if (WARN_ON_ONCE(rhn->frame != frame))
break;
handler = READ_ONCE(rhn->rethook->handler);
if (handler)
handler(rhn, rhn->rethook->data, regs);
if (first == node)
break;
first = first->next;
}
/* Fixup registers for returning to correct address. */
arch_rethook_fixup_return(regs, correct_ret_addr);
/* Unlink used shadow stack */
first = current->rethooks.first;
current->rethooks.first = node->next;
node->next = NULL;
while (first) {
rhn = container_of(first, struct rethook_node, llist);
first = first->next;
rethook_recycle(rhn);
}
preempt_enable();
return correct_ret_addr;
}
NOKPROBE_SYMBOL(rethook_trampoline_handler);