Merge branch 'bpf-sleepable'
Alexei Starovoitov says: ==================== v2->v3: - switched to minimal allowlist approach. Essentially that means that syscall entry, few btrfs allow_error_inject functions, should_fail_bio(), and two LSM hooks: file_mprotect and bprm_committed_creds are the only hooks that allow attaching of sleepable BPF programs. When comprehensive analysis of LSM hooks will be done this allowlist will be extended. - added patch 1 that fixes prototypes of two mm functions to reliably work with error injection. It's also necessary for resolve_btfids tool to recognize these two funcs, but that's secondary. v1->v2: - split fmod_ret fix into separate patch - added denylist v1: This patch set introduces the minimal viable support for sleepable bpf programs. In this patch only fentry/fexit/fmod_ret and lsm progs can be sleepable. Only array and pre-allocated hash and lru maps allowed. Here is 'perf report' difference of sleepable vs non-sleepable: 3.86% bench [k] __srcu_read_unlock 3.22% bench [k] __srcu_read_lock 0.92% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep 0.50% bench [k] bpf_trampoline_10297 0.26% bench [k] __bpf_prog_exit_sleepable 0.21% bench [k] __bpf_prog_enter_sleepable vs 0.88% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry 0.84% bench [k] bpf_trampoline_10297 0.13% bench [k] __bpf_prog_enter 0.12% bench [k] __bpf_prog_exit vs 0.79% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep 0.72% bench [k] bpf_trampoline_10381 0.31% bench [k] __bpf_prog_exit_sleepable 0.29% bench [k] __bpf_prog_enter_sleepable Sleepable vs non-sleepable program invocation overhead is only marginally higher due to rcu_trace. srcu approach is much slower. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
commit
10496f261e
@ -1379,10 +1379,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
||||
u8 *prog = *pprog;
|
||||
int cnt = 0;
|
||||
|
||||
if (emit_call(&prog, __bpf_prog_enter, prog))
|
||||
return -EINVAL;
|
||||
/* remember prog start time returned by __bpf_prog_enter */
|
||||
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
|
||||
if (p->aux->sleepable) {
|
||||
if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (emit_call(&prog, __bpf_prog_enter, prog))
|
||||
return -EINVAL;
|
||||
/* remember prog start time returned by __bpf_prog_enter */
|
||||
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
|
||||
}
|
||||
|
||||
/* arg1: lea rdi, [rbp - stack_size] */
|
||||
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
|
||||
@ -1402,13 +1407,18 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
||||
if (mod_ret)
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
|
||||
|
||||
/* arg1: mov rdi, progs[i] */
|
||||
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
|
||||
(u32) (long) p);
|
||||
/* arg2: mov rsi, rbx <- start time in nsec */
|
||||
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
|
||||
if (emit_call(&prog, __bpf_prog_exit, prog))
|
||||
return -EINVAL;
|
||||
if (p->aux->sleepable) {
|
||||
if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* arg1: mov rdi, progs[i] */
|
||||
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
|
||||
(u32) (long) p);
|
||||
/* arg2: mov rsi, rbx <- start time in nsec */
|
||||
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
|
||||
if (emit_call(&prog, __bpf_prog_exit, prog))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*pprog = prog;
|
||||
return 0;
|
||||
|
@ -539,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
|
||||
/* these two functions are called from generated trampoline */
|
||||
u64 notrace __bpf_prog_enter(void);
|
||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
|
||||
void notrace __bpf_prog_enter_sleepable(void);
|
||||
void notrace __bpf_prog_exit_sleepable(void);
|
||||
|
||||
struct bpf_ksym {
|
||||
unsigned long start;
|
||||
@ -734,6 +736,7 @@ struct bpf_prog_aux {
|
||||
bool offload_requested;
|
||||
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
|
||||
bool func_proto_unreliable;
|
||||
bool sleepable;
|
||||
enum bpf_tramp_prog_type trampoline_prog_type;
|
||||
struct bpf_trampoline *trampoline;
|
||||
struct hlist_node tramp_hlist;
|
||||
@ -1781,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
|
||||
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
|
||||
extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
|
||||
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
|
||||
extern const struct bpf_func_proto bpf_copy_from_user_proto;
|
||||
|
||||
const struct bpf_func_proto *bpf_tracing_func_proto(
|
||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
|
@ -346,6 +346,14 @@ enum bpf_link_type {
|
||||
/* The verifier internal test flag. Behavior is undefined */
|
||||
#define BPF_F_TEST_STATE_FREQ (1U << 3)
|
||||
|
||||
/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
|
||||
* restrict map and helper usage for such programs. Sleepable BPF programs can
|
||||
* only be attached to hooks where kernel execution context allows sleeping.
|
||||
* Such programs are allowed to use helpers that may sleep like
|
||||
* bpf_copy_from_user().
|
||||
*/
|
||||
#define BPF_F_SLEEPABLE (1U << 4)
|
||||
|
||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||
* two extensions:
|
||||
*
|
||||
@ -3561,6 +3569,13 @@ union bpf_attr {
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
* value.
|
||||
*
|
||||
* long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
|
||||
* Description
|
||||
* Read *size* bytes from user space address *user_ptr* and store
|
||||
* the data in *dst*. This is a wrapper of copy_from_user().
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -3711,6 +3726,7 @@ union bpf_attr {
|
||||
FN(inode_storage_get), \
|
||||
FN(inode_storage_delete), \
|
||||
FN(d_path), \
|
||||
FN(copy_from_user), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
@ -1691,6 +1691,7 @@ config BPF_SYSCALL
|
||||
bool "Enable bpf() system call"
|
||||
select BPF
|
||||
select IRQ_WORK
|
||||
select TASKS_TRACE_RCU
|
||||
default n
|
||||
help
|
||||
Enable the bpf() system call that allows to manipulate eBPF
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/filter.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
#include "map_in_map.h"
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/rculist_nulls.h>
|
||||
#include <linux/random.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include "percpu_freelist.h"
|
||||
#include "bpf_lru_list.h"
|
||||
#include "map_in_map.h"
|
||||
@ -577,8 +578,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
struct htab_elem *l;
|
||||
u32 hash, key_size;
|
||||
|
||||
/* Must be called with rcu_read_lock. */
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@ -941,7 +941,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@ -1032,7 +1032,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@ -1220,7 +1220,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
u32 hash, key_size;
|
||||
int ret = -ENOENT;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@ -1252,7 +1252,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
||||
u32 hash, key_size;
|
||||
int ret = -ENOENT;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
|
@ -601,6 +601,28 @@ const struct bpf_func_proto bpf_event_output_data_proto = {
|
||||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
|
||||
const void __user *, user_ptr)
|
||||
{
|
||||
int ret = copy_from_user(dst, user_ptr, size);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
memset(dst, 0, size);
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_copy_from_user_proto = {
|
||||
.func = bpf_copy_from_user,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_get_current_task_proto __weak;
|
||||
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
|
||||
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/bpf-netns.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
|
||||
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
|
||||
@ -1731,10 +1732,14 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
|
||||
btf_put(prog->aux->btf);
|
||||
bpf_prog_free_linfo(prog);
|
||||
|
||||
if (deferred)
|
||||
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
|
||||
else
|
||||
if (deferred) {
|
||||
if (prog->aux->sleepable)
|
||||
call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
|
||||
else
|
||||
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
|
||||
} else {
|
||||
__bpf_prog_put_rcu(&prog->aux->rcu);
|
||||
}
|
||||
}
|
||||
|
||||
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
|
||||
@ -2104,6 +2109,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
|
||||
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
|
||||
BPF_F_ANY_ALIGNMENT |
|
||||
BPF_F_TEST_STATE_FREQ |
|
||||
BPF_F_SLEEPABLE |
|
||||
BPF_F_TEST_RND_HI32))
|
||||
return -EINVAL;
|
||||
|
||||
@ -2159,6 +2165,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
|
||||
}
|
||||
|
||||
prog->aux->offload_requested = !!attr->prog_ifindex;
|
||||
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
|
||||
|
||||
err = security_bpf_prog_alloc(prog->aux);
|
||||
if (err)
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <linux/rbtree_latch.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
|
||||
/* dummy _ops. The verifier will operate on target program's ops. */
|
||||
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
|
||||
@ -210,9 +212,12 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
||||
* updates to trampoline would change the code from underneath the
|
||||
* preempted task. Hence wait for tasks to voluntarily schedule or go
|
||||
* to userspace.
|
||||
* The same trampoline can hold both sleepable and non-sleepable progs.
|
||||
* synchronize_rcu_tasks_trace() is needed to make sure all sleepable
|
||||
* programs finish executing.
|
||||
* Wait for these two grace periods together.
|
||||
*/
|
||||
|
||||
synchronize_rcu_tasks();
|
||||
synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace);
|
||||
|
||||
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
|
||||
&tr->func.model, flags, tprogs,
|
||||
@ -344,7 +349,14 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
|
||||
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
|
||||
goto out;
|
||||
bpf_image_ksym_del(&tr->ksym);
|
||||
/* wait for tasks to get out of trampoline before freeing it */
|
||||
/* This code will be executed when all bpf progs (both sleepable and
|
||||
* non-sleepable) went through
|
||||
* bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred().
|
||||
* Hence no need for another synchronize_rcu_tasks_trace() here,
|
||||
* but synchronize_rcu_tasks() is still needed, since trampoline
|
||||
* may not have had any sleepable programs and we need to wait
|
||||
* for tasks to get out of trampoline code before freeing it.
|
||||
*/
|
||||
synchronize_rcu_tasks();
|
||||
bpf_jit_free_exec(tr->image);
|
||||
hlist_del(&tr->hlist);
|
||||
@ -394,6 +406,16 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_enter_sleepable(void)
|
||||
{
|
||||
rcu_read_lock_trace();
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit_sleepable(void)
|
||||
{
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
int __weak
|
||||
arch_prepare_bpf_trampoline(void *image, void *image_end,
|
||||
const struct btf_func_model *m, u32 flags,
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/btf_ids.h>
|
||||
|
||||
#include "disasm.h"
|
||||
|
||||
@ -9367,6 +9368,23 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (prog->aux->sleepable)
|
||||
switch (map->map_type) {
|
||||
case BPF_MAP_TYPE_HASH:
|
||||
case BPF_MAP_TYPE_LRU_HASH:
|
||||
case BPF_MAP_TYPE_ARRAY:
|
||||
if (!is_preallocated_map(map)) {
|
||||
verbose(env,
|
||||
"Sleepable programs can only use preallocated hash maps\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
verbose(env,
|
||||
"Sleepable programs can only use array and hash maps\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -10985,6 +11003,36 @@ static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* non exhaustive list of sleepable bpf_lsm_*() functions */
|
||||
BTF_SET_START(btf_sleepable_lsm_hooks)
|
||||
#ifdef CONFIG_BPF_LSM
|
||||
BTF_ID(func, bpf_lsm_file_mprotect)
|
||||
BTF_ID(func, bpf_lsm_bprm_committed_creds)
|
||||
#endif
|
||||
BTF_SET_END(btf_sleepable_lsm_hooks)
|
||||
|
||||
static int check_sleepable_lsm_hook(u32 btf_id)
|
||||
{
|
||||
return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
|
||||
}
|
||||
|
||||
/* list of non-sleepable functions that are otherwise on
|
||||
* ALLOW_ERROR_INJECTION list
|
||||
*/
|
||||
BTF_SET_START(btf_non_sleepable_error_inject)
|
||||
/* Three functions below can be called from sleepable and non-sleepable context.
|
||||
* Assume non-sleepable from bpf safety point of view.
|
||||
*/
|
||||
BTF_ID(func, __add_to_page_cache_locked)
|
||||
BTF_ID(func, should_fail_alloc_page)
|
||||
BTF_ID(func, should_failslab)
|
||||
BTF_SET_END(btf_non_sleepable_error_inject)
|
||||
|
||||
static int check_non_sleepable_error_inject(u32 btf_id)
|
||||
{
|
||||
return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
|
||||
}
|
||||
|
||||
static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_prog *prog = env->prog;
|
||||
@ -11002,6 +11050,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
long addr;
|
||||
u64 key;
|
||||
|
||||
if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
|
||||
prog->type != BPF_PROG_TYPE_LSM) {
|
||||
verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
|
||||
return check_struct_ops_btf_id(env);
|
||||
|
||||
@ -11210,13 +11264,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
}
|
||||
}
|
||||
|
||||
if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
|
||||
if (prog->aux->sleepable) {
|
||||
ret = -EINVAL;
|
||||
switch (prog->type) {
|
||||
case BPF_PROG_TYPE_TRACING:
|
||||
/* fentry/fexit/fmod_ret progs can be sleepable only if they are
|
||||
* attached to ALLOW_ERROR_INJECTION and are not in denylist.
|
||||
*/
|
||||
if (!check_non_sleepable_error_inject(btf_id) &&
|
||||
within_error_injection_list(addr))
|
||||
ret = 0;
|
||||
break;
|
||||
case BPF_PROG_TYPE_LSM:
|
||||
/* LSM progs check that they are attached to bpf_lsm_*() funcs.
|
||||
* Only some of them are sleepable.
|
||||
*/
|
||||
if (check_sleepable_lsm_hook(btf_id))
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
verbose(env, "%s is not sleepable\n",
|
||||
prog->aux->attach_func_name);
|
||||
} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
|
||||
ret = check_attach_modify_return(prog, addr);
|
||||
if (ret)
|
||||
verbose(env, "%s() is not modifiable\n",
|
||||
prog->aux->attach_func_name);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
tr->func.addr = (void *)addr;
|
||||
|
@ -1228,6 +1228,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_jiffies64_proto;
|
||||
case BPF_FUNC_get_task_stack:
|
||||
return &bpf_get_task_stack_proto;
|
||||
case BPF_FUNC_copy_from_user:
|
||||
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@ -827,10 +827,10 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(replace_page_cache_page);
|
||||
|
||||
static int __add_to_page_cache_locked(struct page *page,
|
||||
struct address_space *mapping,
|
||||
pgoff_t offset, gfp_t gfp_mask,
|
||||
void **shadowp)
|
||||
noinline int __add_to_page_cache_locked(struct page *page,
|
||||
struct address_space *mapping,
|
||||
pgoff_t offset, gfp_t gfp_mask,
|
||||
void **shadowp)
|
||||
{
|
||||
XA_STATE(xas, &mapping->i_pages, offset);
|
||||
int huge = PageHuge(page);
|
||||
|
@ -3477,7 +3477,7 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
|
||||
|
||||
#endif /* CONFIG_FAIL_PAGE_ALLOC */
|
||||
|
||||
static noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
|
||||
noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
|
||||
{
|
||||
return __should_fail_alloc_page(gfp_mask, order);
|
||||
}
|
||||
|
@ -346,6 +346,14 @@ enum bpf_link_type {
|
||||
/* The verifier internal test flag. Behavior is undefined */
|
||||
#define BPF_F_TEST_STATE_FREQ (1U << 3)
|
||||
|
||||
/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
|
||||
* restrict map and helper usage for such programs. Sleepable BPF programs can
|
||||
* only be attached to hooks where kernel execution context allows sleeping.
|
||||
* Such programs are allowed to use helpers that may sleep like
|
||||
* bpf_copy_from_user().
|
||||
*/
|
||||
#define BPF_F_SLEEPABLE (1U << 4)
|
||||
|
||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||
* two extensions:
|
||||
*
|
||||
@ -3561,6 +3569,13 @@ union bpf_attr {
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
* value.
|
||||
*
|
||||
* long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
|
||||
* Description
|
||||
* Read *size* bytes from user space address *user_ptr* and store
|
||||
* the data in *dst*. This is a wrapper of copy_from_user().
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -3711,6 +3726,7 @@ union bpf_attr {
|
||||
FN(inode_storage_get), \
|
||||
FN(inode_storage_delete), \
|
||||
FN(d_path), \
|
||||
FN(copy_from_user), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
@ -208,6 +208,7 @@ struct bpf_sec_def {
|
||||
bool is_exp_attach_type_optional;
|
||||
bool is_attachable;
|
||||
bool is_attach_btf;
|
||||
bool is_sleepable;
|
||||
attach_fn_t attach_fn;
|
||||
};
|
||||
|
||||
@ -6291,6 +6292,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
|
||||
/* couldn't guess, but user might manually specify */
|
||||
continue;
|
||||
|
||||
if (prog->sec_def->is_sleepable)
|
||||
prog->prog_flags |= BPF_F_SLEEPABLE;
|
||||
bpf_program__set_type(prog, prog->sec_def->prog_type);
|
||||
bpf_program__set_expected_attach_type(prog,
|
||||
prog->sec_def->expected_attach_type);
|
||||
@ -7559,6 +7562,21 @@ static const struct bpf_sec_def section_defs[] = {
|
||||
.expected_attach_type = BPF_TRACE_FEXIT,
|
||||
.is_attach_btf = true,
|
||||
.attach_fn = attach_trace),
|
||||
SEC_DEF("fentry.s/", TRACING,
|
||||
.expected_attach_type = BPF_TRACE_FENTRY,
|
||||
.is_attach_btf = true,
|
||||
.is_sleepable = true,
|
||||
.attach_fn = attach_trace),
|
||||
SEC_DEF("fmod_ret.s/", TRACING,
|
||||
.expected_attach_type = BPF_MODIFY_RETURN,
|
||||
.is_attach_btf = true,
|
||||
.is_sleepable = true,
|
||||
.attach_fn = attach_trace),
|
||||
SEC_DEF("fexit.s/", TRACING,
|
||||
.expected_attach_type = BPF_TRACE_FEXIT,
|
||||
.is_attach_btf = true,
|
||||
.is_sleepable = true,
|
||||
.attach_fn = attach_trace),
|
||||
SEC_DEF("freplace/", EXT,
|
||||
.is_attach_btf = true,
|
||||
.attach_fn = attach_trace),
|
||||
@ -7566,6 +7584,11 @@ static const struct bpf_sec_def section_defs[] = {
|
||||
.is_attach_btf = true,
|
||||
.expected_attach_type = BPF_LSM_MAC,
|
||||
.attach_fn = attach_lsm),
|
||||
SEC_DEF("lsm.s/", LSM,
|
||||
.is_attach_btf = true,
|
||||
.is_sleepable = true,
|
||||
.expected_attach_type = BPF_LSM_MAC,
|
||||
.attach_fn = attach_lsm),
|
||||
SEC_DEF("iter/", TRACING,
|
||||
.expected_attach_type = BPF_TRACE_ITER,
|
||||
.is_attach_btf = true,
|
||||
@ -8288,7 +8311,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
|
||||
|
||||
prog->prog_ifindex = attr->ifindex;
|
||||
prog->log_level = attr->log_level;
|
||||
prog->prog_flags = attr->prog_flags;
|
||||
prog->prog_flags |= attr->prog_flags;
|
||||
if (!first_prog)
|
||||
first_prog = prog;
|
||||
}
|
||||
|
@ -317,6 +317,7 @@ extern const struct bench bench_trig_tp;
|
||||
extern const struct bench bench_trig_rawtp;
|
||||
extern const struct bench bench_trig_kprobe;
|
||||
extern const struct bench bench_trig_fentry;
|
||||
extern const struct bench bench_trig_fentry_sleep;
|
||||
extern const struct bench bench_trig_fmodret;
|
||||
extern const struct bench bench_rb_libbpf;
|
||||
extern const struct bench bench_rb_custom;
|
||||
@ -338,6 +339,7 @@ static const struct bench *benchs[] = {
|
||||
&bench_trig_rawtp,
|
||||
&bench_trig_kprobe,
|
||||
&bench_trig_fentry,
|
||||
&bench_trig_fentry_sleep,
|
||||
&bench_trig_fmodret,
|
||||
&bench_rb_libbpf,
|
||||
&bench_rb_custom,
|
||||
|
@ -90,6 +90,12 @@ static void trigger_fentry_setup()
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
|
||||
}
|
||||
|
||||
static void trigger_fentry_sleep_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
|
||||
}
|
||||
|
||||
static void trigger_fmodret_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = {
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_fentry_sleep = {
|
||||
.name = "trig-fentry-sleep",
|
||||
.validate = trigger_validate,
|
||||
.setup = trigger_fentry_sleep_setup,
|
||||
.producer_thread = trigger_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_fmodret = {
|
||||
.name = "trig-fmodret",
|
||||
.validate = trigger_validate,
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <unistd.h>
|
||||
#include <malloc.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "lsm.skel.h"
|
||||
|
||||
@ -55,6 +56,7 @@ void test_test_lsm(void)
|
||||
{
|
||||
struct lsm *skel = NULL;
|
||||
int err, duration = 0;
|
||||
int buf = 1234;
|
||||
|
||||
skel = lsm__open_and_load();
|
||||
if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
|
||||
@ -81,6 +83,13 @@ void test_test_lsm(void)
|
||||
CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
|
||||
"mprotect_count = %d\n", skel->bss->mprotect_count);
|
||||
|
||||
syscall(__NR_setdomainname, &buf, -2L);
|
||||
syscall(__NR_setdomainname, 0, -3L);
|
||||
syscall(__NR_setdomainname, ~0L, -4L);
|
||||
|
||||
CHECK(skel->bss->copy_test != 3, "copy_test",
|
||||
"copy_test = %d\n", skel->bss->copy_test);
|
||||
|
||||
close_prog:
|
||||
lsm__destroy(skel);
|
||||
}
|
||||
|
@ -9,16 +9,41 @@
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <errno.h>
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} array SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} hash SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_LRU_HASH);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} lru_hash SEC(".maps");
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
int monitored_pid = 0;
|
||||
int mprotect_count = 0;
|
||||
int bprm_count = 0;
|
||||
|
||||
SEC("lsm/file_mprotect")
|
||||
SEC("lsm.s/file_mprotect")
|
||||
int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
|
||||
unsigned long reqprot, unsigned long prot, int ret)
|
||||
{
|
||||
char args[64];
|
||||
__u32 key = 0;
|
||||
__u64 *value;
|
||||
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
@ -28,6 +53,18 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
|
||||
is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
|
||||
vma->vm_end >= vma->vm_mm->start_stack);
|
||||
|
||||
bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start);
|
||||
|
||||
value = bpf_map_lookup_elem(&array, &key);
|
||||
if (value)
|
||||
*value = 0;
|
||||
value = bpf_map_lookup_elem(&hash, &key);
|
||||
if (value)
|
||||
*value = 0;
|
||||
value = bpf_map_lookup_elem(&lru_hash, &key);
|
||||
if (value)
|
||||
*value = 0;
|
||||
|
||||
if (is_stack && monitored_pid == pid) {
|
||||
mprotect_count++;
|
||||
ret = -EPERM;
|
||||
@ -36,7 +73,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
SEC("lsm/bprm_committed_creds")
|
||||
SEC("lsm.s/bprm_committed_creds")
|
||||
int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
|
||||
{
|
||||
__u32 pid = bpf_get_current_pid_tgid() >> 32;
|
||||
@ -46,3 +83,28 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
|
||||
|
||||
return 0;
|
||||
}
|
||||
SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
|
||||
int BPF_PROG(test_task_free, struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int copy_test = 0;
|
||||
|
||||
SEC("fentry.s/__x64_sys_setdomainname")
|
||||
int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
|
||||
{
|
||||
void *ptr = (void *)PT_REGS_PARM1(regs);
|
||||
int len = PT_REGS_PARM2(regs);
|
||||
int buf = 0;
|
||||
long ret;
|
||||
|
||||
ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
|
||||
if (len == -2 && ret == 0 && buf == 1234)
|
||||
copy_test++;
|
||||
if (len == -3 && ret == -EFAULT)
|
||||
copy_test++;
|
||||
if (len == -4 && ret == -EFAULT)
|
||||
copy_test++;
|
||||
return 0;
|
||||
}
|
||||
|
@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry.s/__x64_sys_getpgid")
|
||||
int bench_trigger_fentry_sleep(void *ctx)
|
||||
{
|
||||
__sync_add_and_fetch(&hits, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fmod_ret/__x64_sys_getpgid")
|
||||
int bench_trigger_fmodret(void *ctx)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user