Merge branch 'relax-allowlist-for-open-coded-css_task-iter'

Chuyi Zhou says:

====================
Relax allowlist for open-coded css_task iter

Hi,
The patchset aims to relax the allowlist for open-coded css_task iter
suggested by Alexei[1].

Please see individual patches for more details. And comments are always
welcome.

Patch summary:
 * Patch #1: Relax the allowlist and let css_task iter can be used in
   bpf iters and any sleepable progs.
 * Patch #2: Add a test in cgroup_iters.c which demonstrates how
   css_task iters can be combined with cgroup iter.
 * Patch #3: Add a test to prove css_task iter can be used in normal
 * sleepable progs.
link[1]:https://lore.kernel.org/lkml/CAADnVQKafk_junRyE=-FVAik4hjTRDtThymYGEL8hGTuYoOGpA@mail.gmail.com/
---

Changes in v2:
 * Fix the incorrect logic in check_css_task_iter_allowlist. Use
   expected_attach_type to check whether we are using bpf_iters.
 * Link to v1:https://lore.kernel.org/bpf/20231022154527.229117-1-zhouchuyi@bytedance.com/T/#m946f9cde86b44a13265d9a44c5738a711eb578fd
Changes in v3:
 * Add a testcase to prove css_task can be used in fentry.s
 * Link to v2:https://lore.kernel.org/bpf/20231024024240.42790-1-zhouchuyi@bytedance.com/T/#m14a97041ff56c2df21bc0149449abd275b73f6a3
Changes in v4:
 * Add Yonghong's ack for patch #1 and patch #2.
 * Solve Yonghong's comments for patch #2
 * Move prog 'iter_css_task_for_each_sleep' from iters_task_failure.c to
   iters_css_task.c. Use RUN_TESTS to prove we can load this prog.
 * Link to v3:https://lore.kernel.org/bpf/20231025075914.30979-1-zhouchuyi@bytedance.com/T/#m3200d8ad29af4ffab97588e297361d0a45d7585d

---
====================

Link: https://lore.kernel.org/r/20231031050438.93297-1-zhouchuyi@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2023-11-01 22:49:20 -07:00
commit 698b8c5e3b
5 changed files with 111 additions and 6 deletions

View File

@ -11402,6 +11402,12 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
&meta->arg_rbtree_root.field); &meta->arg_rbtree_root.field);
} }
/*
* css_task iter allowlist is needed to avoid dead locking on css_set_lock.
* LSM hooks and iters (both sleepable and non-sleepable) are safe.
* Any sleepable progs are also safe since bpf_check_attach_target() enforce
* them can only be attached to some specific hook points.
*/
static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
{ {
enum bpf_prog_type prog_type = resolve_prog_type(env->prog); enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
@ -11409,10 +11415,12 @@ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
switch (prog_type) { switch (prog_type) {
case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_LSM:
return true; return true;
case BPF_TRACE_ITER: case BPF_PROG_TYPE_TRACING:
return env->prog->aux->sleepable; if (env->prog->expected_attach_type == BPF_TRACE_ITER)
return true;
fallthrough;
default: default:
return false; return env->prog->aux->sleepable;
} }
} }
@ -11671,7 +11679,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_ITER:
if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) { if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
if (!check_css_task_iter_allowlist(env)) { if (!check_css_task_iter_allowlist(env)) {
verbose(env, "css_task_iter is only allowed in bpf_lsm and bpf iter-s\n"); verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
return -EINVAL; return -EINVAL;
} }
} }

View File

@ -4,6 +4,7 @@
#include <test_progs.h> #include <test_progs.h>
#include <bpf/libbpf.h> #include <bpf/libbpf.h>
#include <bpf/btf.h> #include <bpf/btf.h>
#include "iters_css_task.skel.h"
#include "cgroup_iter.skel.h" #include "cgroup_iter.skel.h"
#include "cgroup_helpers.h" #include "cgroup_helpers.h"
@ -263,6 +264,35 @@ close_cgrp:
close(cgrp_fd); close(cgrp_fd);
} }
static void test_walk_self_only_css_task(void)
{
struct iters_css_task *skel;
int err;
skel = iters_css_task__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
bpf_program__set_autoload(skel->progs.cgroup_id_printer, true);
err = iters_css_task__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto cleanup;
err = join_cgroup(cg_path[CHILD2]);
if (!ASSERT_OK(err, "join_cgroup"))
goto cleanup;
skel->bss->target_pid = getpid();
snprintf(expected_output, sizeof(expected_output),
PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]);
read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2],
BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task");
ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
cleanup:
iters_css_task__destroy(skel);
}
void test_cgroup_iter(void) void test_cgroup_iter(void)
{ {
struct cgroup_iter *skel = NULL; struct cgroup_iter *skel = NULL;
@ -293,6 +323,9 @@ void test_cgroup_iter(void)
test_walk_self_only(skel); test_walk_self_only(skel);
if (test__start_subtest("cgroup_iter__dead_self_only")) if (test__start_subtest("cgroup_iter__dead_self_only"))
test_walk_dead_self_only(skel); test_walk_dead_self_only(skel);
if (test__start_subtest("cgroup_iter__self_only_css_task"))
test_walk_self_only_css_task();
out: out:
cgroup_iter__destroy(skel); cgroup_iter__destroy(skel);
cleanup_cgroups(); cleanup_cgroups();

View File

@ -294,6 +294,7 @@ void test_iters(void)
RUN_TESTS(iters_state_safety); RUN_TESTS(iters_state_safety);
RUN_TESTS(iters_looping); RUN_TESTS(iters_looping);
RUN_TESTS(iters); RUN_TESTS(iters);
RUN_TESTS(iters_css_task);
if (env.has_testmod) if (env.has_testmod)
RUN_TESTS(iters_testmod_seq); RUN_TESTS(iters_testmod_seq);

View File

@ -10,6 +10,7 @@
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
void bpf_cgroup_release(struct cgroup *p) __ksym; void bpf_cgroup_release(struct cgroup *p) __ksym;
@ -45,3 +46,65 @@ int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma,
return -EPERM; return -EPERM;
} }
static inline u64 cgroup_id(struct cgroup *cgrp)
{
return cgrp->kn->id;
}
SEC("?iter/cgroup")
int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct cgroup *cgrp, *acquired;
struct cgroup_subsys_state *css;
struct task_struct *task;
u64 cgrp_id;
cgrp = ctx->cgroup;
/* epilogue */
if (cgrp == NULL) {
BPF_SEQ_PRINTF(seq, "epilogue\n");
return 0;
}
/* prologue */
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, "prologue\n");
cgrp_id = cgroup_id(cgrp);
BPF_SEQ_PRINTF(seq, "%8llu\n", cgrp_id);
acquired = bpf_cgroup_from_id(cgrp_id);
if (!acquired)
return 0;
css = &acquired->self;
css_task_cnt = 0;
bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
if (task->pid == target_pid)
css_task_cnt++;
}
bpf_cgroup_release(acquired);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int BPF_PROG(iter_css_task_for_each_sleep)
{
u64 cgrp_id = bpf_get_current_cgroup_id();
struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id);
struct cgroup_subsys_state *css;
struct task_struct *task;
if (cgrp == NULL)
return 0;
css = &cgrp->self;
bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
}
bpf_cgroup_release(cgrp);
return 0;
}

View File

@ -84,8 +84,8 @@ int BPF_PROG(iter_css_lock_and_unlock)
return 0; return 0;
} }
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") SEC("?fentry/" SYS_PREFIX "sys_getpgid")
__failure __msg("css_task_iter is only allowed in bpf_lsm and bpf iter-s") __failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs")
int BPF_PROG(iter_css_task_for_each) int BPF_PROG(iter_css_task_for_each)
{ {
u64 cg_id = bpf_get_current_cgroup_id(); u64 cg_id = bpf_get_current_cgroup_id();