Merge branch 'Add bpf_copy_from_user_task helper and sleepable bpf iterator programs'

Kenny Yu says:

====================

This patch series makes the following changes:
* Adds a new bpf helper `bpf_copy_from_user_task` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v6:
* Split first patch into two patches: first patch to add support
  for bpf iterators to use sleepable helpers, and the second to add
  the new bpf helper.
* Simplify implementation of `bpf_copy_from_user_task` based on feedback.
* Add to docs that the destination buffer will be zero-ed on error.

Changes since v5:
* Rename `bpf_access_process_vm` to `bpf_copy_from_user_task`.
* Change return value to be all-or-nothing. If we get a partial read,
  memset all bytes to 0 and return -EFAULT.
* Add to docs that the helper can only be used by sleepable BPF programs.
* Fix nits in selftests.

Changes since v4:
* Make `flags` into u64.
* Use `user_ptr` arg name to be consistent with `bpf_copy_from_user`.
* Add an extra check in selftests to verify access_process_vm calls
  succeeded.

Changes since v3:
* Check if `flags` is 0 and return -EINVAL if not.
* Rebase on latest bpf-next branch and fix merge conflicts.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2022-01-24 19:55:41 -08:00
commit c45c79e546
9 changed files with 149 additions and 5 deletions

View File

@ -2243,6 +2243,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
extern const struct bpf_func_proto bpf_loop_proto;
extern const struct bpf_func_proto bpf_strncmp_proto;
extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);

View File

@ -5076,6 +5076,16 @@ union bpf_attr {
* associated to *xdp_md*, at *offset*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
* Description
* Read *size* bytes from user space address *user_ptr* in *tsk*'s
* address space, and stores the data in *dst*. *flags* is not
* used yet and is provided for future extensibility. This helper
* can only be used by sleepable programs.
* Return
* 0 on success, or a negative error in case of failure. On error
* *dst* buffer is zeroed out.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -5269,6 +5279,7 @@ union bpf_attr {
FN(xdp_get_buff_len), \
FN(xdp_load_bytes), \
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

View File

@ -5,6 +5,7 @@
#include <linux/anon_inodes.h>
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/rcupdate_trace.h>
struct bpf_iter_target_info {
struct list_head list;
@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
{
int ret;
rcu_read_lock();
migrate_disable();
ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock();
if (prog->aux->sleepable) {
rcu_read_lock_trace();
migrate_disable();
might_fault();
ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock_trace();
} else {
rcu_read_lock();
migrate_disable();
ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock();
}
/* bpf program can only return 0 or 1:
* 0 : okay

View File

@ -16,6 +16,7 @@
#include <linux/pid_namespace.h>
#include <linux/proc_ns.h>
#include <linux/security.h>
#include <linux/btf_ids.h>
#include "../../lib/kstrtox.h"
@ -671,6 +672,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
{
int ret;
/* flags is not used yet */
if (unlikely(flags))
return -EINVAL;
if (unlikely(!size))
return 0;
ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
if (ret == size)
return 0;
memset(dst, 0, size);
/* Return -EFAULT for partial read */
return ret < 0 ? ret : -EFAULT;
}
const struct bpf_func_proto bpf_copy_from_user_task_proto = {
.func = bpf_copy_from_user_task,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_BTF_ID,
.arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.arg5_type = ARG_ANYTHING
};
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
{
if (cpu >= nr_cpu_ids)

View File

@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
case BPF_FUNC_copy_from_user_task:
return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_per_cpu_ptr:

View File

@ -5076,6 +5076,16 @@ union bpf_attr {
* associated to *xdp_md*, at *offset*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
* Description
* Read *size* bytes from user space address *user_ptr* in *tsk*'s
* address space, and stores the data in *dst*. *flags* is not
* used yet and is provided for future extensibility. This helper
* can only be used by sleepable programs.
* Return
* 0 on success, or a negative error in case of failure. On error
* *dst* buffer is zeroed out.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -5269,6 +5279,7 @@ union bpf_attr {
FN(xdp_get_buff_len), \
FN(xdp_load_bytes), \
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

View File

@ -8612,6 +8612,7 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),

View File

@ -138,6 +138,24 @@ static void test_task(void)
bpf_iter_task__destroy(skel);
}
static void test_task_sleepable(void)
{
struct bpf_iter_task *skel;
skel = bpf_iter_task__open_and_load();
if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
return;
do_dummy_read(skel->progs.dump_task_sleepable);
ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
"num_expected_failure_copy_from_user_task");
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");
bpf_iter_task__destroy(skel);
}
static void test_task_stack(void)
{
struct bpf_iter_task_stack *skel;
@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
test_bpf_map();
if (test__start_subtest("task"))
test_task();
if (test__start_subtest("task_sleepable"))
test_task_sleepable();
if (test__start_subtest("task_stack"))
test_task_stack();
if (test__start_subtest("task_file"))

View File

@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
return 0;
}
int num_expected_failure_copy_from_user_task = 0;
int num_success_copy_from_user_task = 0;
SEC("iter.s/task")
int dump_task_sleepable(struct bpf_iter__task *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
static const char info[] = " === END ===";
struct pt_regs *regs;
void *ptr;
uint32_t user_data = 0;
int ret;
if (task == (void *)0) {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
/* Read an invalid pointer and ensure we get an error */
ptr = NULL;
ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
if (ret) {
++num_expected_failure_copy_from_user_task;
} else {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
/* Try to read the contents of the task's instruction pointer from the
* remote task's address space.
*/
regs = (struct pt_regs *)bpf_task_pt_regs(task);
if (regs == (void *)0) {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
ptr = (void *)PT_REGS_IP(regs);
ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
if (ret) {
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
++num_success_copy_from_user_task;
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid data\n");
BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
return 0;
}