From e8ddbfb4bcb2254acb71f2ad9008d736f45273a7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 09:21:15 -0800 Subject: [PATCH 01/29] selftests/bpf: skip sockmap in test_maps if kernel doesn't have support Use recently introduced bpf_probe_map_type() to skip test_sockmap() if map creation fails. The skipped test is indicated in the output. Example: test_sockmap SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP) Fork 1024 tasks to 'test_update_delete' ... test_sockmap SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP) Fork 1024 tasks to 'test_update_delete' ... test_maps: OK, 2 SKIPPED Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 1dfef77cff6f..e7798dd97f4b 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -32,6 +32,8 @@ #define ENOTSUPP 524 #endif +static int skips; + static int map_flags; #define CHECK(condition, tag, format...) ({ \ @@ -724,6 +726,15 @@ static void test_sockmap(int tasks, void *data) sizeof(key), sizeof(value), 6, 0); if (fd < 0) { + if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) { + printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n", + __func__); + skips++; + for (i = 0; i < 6; i++) + close(sfd[i]); + return; + } + printf("Failed to create sockmap %i\n", fd); goto out_sockmap; } @@ -1701,6 +1712,6 @@ int main(void) map_flags = BPF_F_NO_PREALLOC; run_all_tests(); - printf("test_maps: OK\n"); + printf("test_maps: OK, %d SKIPPED\n", skips); return 0; } From 8184d44c9a577a2f1842ed6cc844bfd4a9981d8e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 09:21:16 -0800 Subject: [PATCH 02/29] selftests/bpf: skip verifier tests for unsupported program types Use recently introduced bpf_probe_prog_type() to skip tests in the test_verifier() if bpf_verify_program() fails. The skipped test is indicated in the output. Example: ... 679/p bpf_get_stack return R0 within range SKIP (unsupported program type 5) 680/p ld_abs: invalid op 1 OK ... Summary: 863 PASSED, 165 SKIPPED, 3 FAILED Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c5e22422a852..2a36f9f2ee8f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -34,6 +34,7 @@ #include #include +#include #ifdef HAVE_GENHDR # include "autoconf.h" @@ -59,6 +60,7 @@ #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" static bool unpriv_disabled = false; +static int skips; struct bpf_test { const char *descr; @@ -598,6 +600,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, pflags |= BPF_F_ANY_ALIGNMENT; fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); + if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { + printf("SKIP (unsupported program type %d)\n", prog_type); + skips++; + goto close_fds; + } expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; @@ -751,7 +758,7 @@ static bool test_as_unpriv(struct bpf_test *test) static int do_test(bool unpriv, unsigned int from, unsigned int to) { - int i, passes = 0, errors = 0, skips = 0; + int i, passes = 0, errors = 0; for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; From 9acea337ef570faa14f13232d1b2fa6615aaf569 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 09:21:17 -0800 Subject: [PATCH 03/29] selftests/bpf: skip verifier tests for unsupported map types Use recently introduced bpf_probe_map_type() to skip tests in the test_verifier if map creation (create_map) fails. It's handled explicitly for each fixup, i.e. if bpf_create_map returns negative fd, we probe the kernel for the appropriate map support and skip the test is map type is not supported. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2a36f9f2ee8f..97188dbe80bd 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -265,6 +265,16 @@ static int probe_filter_length(const struct bpf_insn *fp) return len + 1; } +static bool skip_unsupported_map(enum bpf_map_type map_type) +{ + if (!bpf_probe_map_type(map_type, 0)) { + printf("SKIP (unsupported map type %d)\n", map_type); + skips++; + return true; + } + return false; +} + static int create_map(uint32_t type, uint32_t size_key, uint32_t size_value, uint32_t max_elem) { @@ -272,8 +282,11 @@ static int create_map(uint32_t type, uint32_t size_key, fd = bpf_create_map(type, size_key, size_value, max_elem, type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0); - if (fd < 0) + if (fd < 0) { + if (skip_unsupported_map(type)) + return -1; printf("Failed to create hash map '%s'!\n", strerror(errno)); + } return fd; } @@ -323,6 +336,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), sizeof(int), max_elem, 0); if (mfd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY)) + return -1; printf("Failed to create prog array '%s'!\n", strerror(errno)); return -1; } @@ -353,15 +368,20 @@ static int create_map_in_map(void) inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(int), 1, 0); if (inner_map_fd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY)) + return -1; printf("Failed to create array '%s'!\n", strerror(errno)); return inner_map_fd; } outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, sizeof(int), inner_map_fd, 1, 0); - if (outer_map_fd < 0) + if (outer_map_fd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS)) + return -1; printf("Failed to create array of maps '%s'!\n", strerror(errno)); + } close(inner_map_fd); @@ -376,9 +396,12 @@ static int create_cgroup_storage(bool percpu) fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), TEST_DATA_LEN, 0, 0); - if (fd < 0) + if (fd < 0) { + if (skip_unsupported_map(type)) + return -1; printf("Failed to create cgroup storage '%s'!\n", strerror(errno)); + } return fd; } @@ -582,6 +605,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int fixup_skips; __u32 pflags; int i, err; @@ -590,7 +614,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + fixup_skips = skips; do_test_fixup(test, prog_type, prog, map_fds); + /* If there were some map skips during fixup due to missing bpf + * features, skip this test. + */ + if (fixup_skips != skips) + return; prog_len = probe_filter_length(prog); pflags = 0; From cfff578ed51ca4ad4c66c8d4bd78694096ea5515 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 09:21:18 -0800 Subject: [PATCH 04/29] selftests/bpf: mark verifier test that uses bpf_trace_printk as BPF_PROG_TYPE_TRACEPOINT We don't have this helper if the kernel was compiled without CONFIG_BPF_EVENTS. Setting prog_type to BPF_PROG_TYPE_TRACEPOINT let's verifier correctly skip this test based on the missing prog_type support in the kernel. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/verifier/unpriv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index dca58cf1a4ab..3e046695fad7 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -76,6 +76,7 @@ .errstr_unpriv = "unknown func bpf_trace_printk#6", .result_unpriv = REJECT, .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { "unpriv: pass pointer to helper function", From befa618112a0a4590ce21d70aa35c9d341337774 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 09:21:19 -0800 Subject: [PATCH 05/29] bpf: BPF_PROG_TYPE_CGROUP_{SKB, SOCK, SOCK_ADDR} require cgroups enabled There is no way to exercise appropriate attach points without cgroups enabled. This lets test_verifier correctly skip tests for these prog_types if kernel was compiled without BPF cgroup support. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/bpf_types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 44d9ab4809bd..08bf2f1fe553 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -6,9 +6,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +#ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) +#endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) From de1da68d9c9d230e448b9fa0abff139a0468fb96 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Mon, 28 Jan 2019 23:04:46 -0500 Subject: [PATCH 06/29] bpf: fix bitrotted kerneldoc Over the years, the function signature has changed, but the kerneldoc block hasn't. Signed-off-by: Valdis Kletnieks Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index a7bcb23bee84..f13c543b7b36 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1263,8 +1263,9 @@ bool bpf_opcode_in_insntable(u8 code) #ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * __bpf_prog_run - run eBPF program on a given context - * @ctx: is the data we are operating on + * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions + * @stack: is the eBPF storage stack * * Decode and execute eBPF instructions. */ From 116bfa96a255123ed209da6544f74a4f2eaca5da Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 29 Jan 2019 01:04:25 -0500 Subject: [PATCH 07/29] bpf: fix missing prototype warnings Compiling with W=1 generates warnings: CC kernel/bpf/core.o kernel/bpf/core.c:721:12: warning: no previous prototype for ?bpf_jit_alloc_exec_limit? [-Wmissing-prototypes] 721 | u64 __weak bpf_jit_alloc_exec_limit(void) | ^~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/core.c:757:14: warning: no previous prototype for ?bpf_jit_alloc_exec? [-Wmissing-prototypes] 757 | void *__weak bpf_jit_alloc_exec(unsigned long size) | ^~~~~~~~~~~~~~~~~~ kernel/bpf/core.c:762:13: warning: no previous prototype for ?bpf_jit_free_exec? [-Wmissing-prototypes] 762 | void __weak bpf_jit_free_exec(void *addr) | ^~~~~~~~~~~~~~~~~ All three are weak functions that archs can override, provide proper prototypes for when a new arch provides their own. Signed-off-by: Valdis Kletnieks Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index e4b473f85b46..7317376734f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -880,7 +880,9 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); - +u64 bpf_jit_alloc_exec_limit(void); +void *bpf_jit_alloc_exec(unsigned long size); +void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); int bpf_jit_get_func_addr(const struct bpf_prog *prog, From 1832f4ef5867fd3898d8a6c6c1978b75d76fc246 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 29 Jan 2019 01:47:06 -0500 Subject: [PATCH 08/29] bpf, cgroups: clean up kerneldoc warnings Building with W=1 reveals some bitrot: CC kernel/bpf/cgroup.o kernel/bpf/cgroup.c:238: warning: Function parameter or member 'flags' not described in '__cgroup_bpf_attach' kernel/bpf/cgroup.c:367: warning: Function parameter or member 'unused_flags' not described in '__cgroup_bpf_detach' Add a kerneldoc line for 'flags'. Fixing the warning for 'unused_flags' is best approached by removing the unused parameter on the function call. Signed-off-by: Valdis Kletnieks Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 3 ++- kernel/cgroup/cgroup.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 588dd5f0bd85..695b2a880d9a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -78,7 +78,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + enum bpf_attach_type type); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d78cfec5807d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -230,6 +230,7 @@ cleanup: * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach * @type: Type of attach operation + * @flags: Option flags * * Must be called with cgroup_mutex held. */ @@ -363,7 +364,7 @@ cleanup: * Must be called with cgroup_mutex held. */ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 unused_flags) + enum bpf_attach_type type) { struct list_head *progs = &cgrp->bpf.progs[type]; enum bpf_cgroup_storage_type stype; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..9f617605dacb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5996,7 +5996,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, type, flags); + ret = __cgroup_bpf_detach(cgrp, prog, type); mutex_unlock(&cgroup_mutex); return ret; } From d83525ca62cf8ebe3271d14c36fb900c294274a2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:04 -0800 Subject: [PATCH 09/29] bpf: introduce bpf_spin_lock Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 37 +++++++- include/linux/bpf_verifier.h | 1 + include/linux/btf.h | 1 + include/uapi/linux/bpf.h | 7 +- kernel/Kconfig.locks | 3 + kernel/bpf/arraymap.c | 7 +- kernel/bpf/btf.c | 42 +++++++++ kernel/bpf/core.c | 2 + kernel/bpf/hashtab.c | 21 +++-- kernel/bpf/helpers.c | 80 +++++++++++++++++ kernel/bpf/map_in_map.c | 5 ++ kernel/bpf/syscall.c | 21 ++++- kernel/bpf/verifier.c | 169 ++++++++++++++++++++++++++++++++++- net/core/filter.c | 16 +++- 14 files changed, 386 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0394f1f9213b..2ae615b48bb8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -72,14 +72,15 @@ struct bpf_map { u32 value_size; u32 max_entries; u32 map_flags; - u32 pages; + int spin_lock_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; + u32 pages; bool unpriv_array; - /* 55 bytes hole */ + /* 51 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -91,6 +92,34 @@ struct bpf_map { char name[BPF_OBJ_NAME_LEN]; }; +static inline bool map_value_has_spin_lock(const struct bpf_map *map) +{ + return map->spin_lock_off >= 0; +} + +static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) +{ + if (likely(!map_value_has_spin_lock(map))) + return; + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; +} + +/* copy everything but bpf_spin_lock */ +static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +{ + if (unlikely(map_value_has_spin_lock(map))) { + u32 off = map->spin_lock_off; + + memcpy(dst, src, off); + memcpy(dst + off + sizeof(struct bpf_spin_lock), + src + off + sizeof(struct bpf_spin_lock), + map->value_size - off - sizeof(struct bpf_spin_lock)); + } else { + memcpy(dst, src, map->value_size); + } +} + struct bpf_offload_dev; struct bpf_offloaded_map; @@ -162,6 +191,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ + ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ }; /* type of values returned from helper functions */ @@ -879,7 +909,8 @@ extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; extern const struct bpf_func_proto bpf_sk_redirect_map_proto; - +extern const struct bpf_func_proto bpf_spin_lock_proto; +extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; /* Shared helpers among cBPF and eBPF. */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0620e418dde5..69f7a3449eda 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -148,6 +148,7 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; u32 curframe; + u32 active_spin_lock; bool speculative; }; diff --git a/include/linux/btf.h b/include/linux/btf.h index 12502e25e767..455d31b55828 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -50,6 +50,7 @@ u32 btf_id(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 60b99b730a41..86f7c438d40f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2422,7 +2422,9 @@ union bpf_attr { FN(map_peek_elem), \ FN(msg_push_data), \ FN(msg_pop_data), \ - FN(rc_pointer_rel), + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3056,4 +3058,7 @@ struct bpf_line_info { __u32 line_col; }; +struct bpf_spin_lock { + __u32 val; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 84d882f3e299..fbba478ae522 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS def_bool y if ARCH_USE_QUEUED_SPINLOCKS depends on SMP +config BPF_ARCH_SPINLOCK + bool + config ARCH_USE_QUEUED_RWLOCKS bool diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 25632a75d630..d6d979910a2a 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -270,9 +270,10 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else - memcpy(array->value + - array->elem_size * (index & array->index_mask), - value, map->value_size); + copy_map_value(map, + array->value + + array->elem_size * (index & array->index_mask), + value); return 0; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3d661f0606fe..7019c1f05cab 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t) return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; } +static bool __btf_type_is_struct(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; +} + static bool btf_type_is_array(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; @@ -2045,6 +2050,43 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } +/* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +{ + const struct btf_member *member; + u32 i, off = -ENOENT; + + if (!__btf_type_is_struct(t)) + return -EINVAL; + + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + if (!__btf_type_is_struct(member_type)) + continue; + if (member_type->size != sizeof(struct bpf_spin_lock)) + continue; + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), + "bpf_spin_lock")) + continue; + if (off != -ENOENT) + /* only one 'struct bpf_spin_lock' is allowed */ + return -E2BIG; + off = btf_member_bit_offset(t, member); + if (off % 8) + /* valid C code cannot generate such BTF */ + return -EINVAL; + off /= 8; + if (off % __alignof__(struct bpf_spin_lock)) + /* valid struct bpf_spin_lock will be 4 byte aligned */ + return -EINVAL; + } + return off; +} + static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f13c543b7b36..ef88b167959d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2002,6 +2002,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_spin_lock_proto __weak; +const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..6d3b22c5ad68 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -718,21 +718,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) BITS_PER_LONG == 64; } -static u32 htab_size_value(const struct bpf_htab *htab, bool percpu) -{ - u32 size = htab->map.value_size; - - if (percpu || fd_htab_map_needs_adjust(htab)) - size = round_up(size, 8); - return size; -} - static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, struct htab_elem *old_elem) { - u32 size = htab_size_value(htab, percpu); + u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); struct htab_elem *l_new, **pl_new; void __percpu *pptr; @@ -770,10 +761,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-ENOMEM); goto dec_count; } + check_and_init_map_lock(&htab->map, + l_new->key + round_up(key_size, 8)); } memcpy(l_new->key, key, key_size); if (percpu) { + size = round_up(size, 8); if (prealloc) { pptr = htab_elem_get_ptr(l_new, key_size); } else { @@ -791,8 +785,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); - } else { + } else if (fd_htab_map_needs_adjust(htab)) { + size = round_up(size, 8); memcpy(l_new->key + round_up(key_size, 8), value, size); + } else { + copy_map_value(&htab->map, + l_new->key + round_up(key_size, 8), + value); } l_new->hash = hash; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a74972b07e74..fbe544761628 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .arg2_type = ARG_CONST_SIZE, }; +#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + union { + __u32 val; + arch_spinlock_t lock; + } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; + + compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); + BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); + BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); + arch_spin_lock(l); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + + arch_spin_unlock(l); +} + +#else + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); + do { + atomic_cond_read_relaxed(l, !VAL); + } while (atomic_xchg(l, 1)); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + atomic_set_release(l, 0); +} + +#endif + +static DEFINE_PER_CPU(unsigned long, irqsave_flags); + +notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + local_irq_save(flags); + __bpf_spin_lock(lock); + __this_cpu_write(irqsave_flags, flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_lock_proto = { + .func = bpf_spin_lock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + +notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + flags = __this_cpu_read(irqsave_flags); + __bpf_spin_unlock(lock); + local_irq_restore(flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_unlock_proto = { + .func = bpf_spin_unlock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 52378d3e34b3..583346a0ab29 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } + if (map_value_has_spin_lock(inner_map)) { + fdput(f); + return ERR_PTR(-ENOTSUPP); + } + inner_map_meta_size = sizeof(*inner_map_meta); /* In some cases verifier needs to access beyond just base map. */ if (inner_map->ops == &array_map_ops) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..ebf0a673cb83 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -463,7 +463,7 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; @@ -478,6 +478,21 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf, if (!value_type || value_size != map->value_size) return -EINVAL; + map->spin_lock_off = btf_find_spin_lock(btf, value_type); + + if (map_value_has_spin_lock(map)) { + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) + return -ENOTSUPP; + if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > + map->value_size) { + WARN_ONCE(1, + "verifier bug spin_lock_off %d value_size %d\n", + map->spin_lock_off, map->value_size); + return -EFAULT; + } + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type); @@ -542,6 +557,8 @@ static int map_create(union bpf_attr *attr) map->btf = btf; map->btf_key_type_id = attr->btf_key_type_id; map->btf_value_type_id = attr->btf_value_type_id; + } else { + map->spin_lock_off = -EINVAL; } err = security_bpf_map_alloc(map); @@ -740,7 +757,7 @@ static int map_lookup_elem(union bpf_attr *attr) err = -ENOENT; } else { err = 0; - memcpy(value, ptr, value_size); + copy_map_value(map, value, ptr); } rcu_read_unlock(); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8c1c21cd50b4..38892bdee651 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -213,6 +213,7 @@ struct bpf_call_arg_meta { s64 msize_smax_value; u64 msize_umax_value; int ptr_id; + int func_id; }; static DEFINE_MUTEX(bpf_verifier_lock); @@ -351,6 +352,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg) return type_is_refcounted(reg->type); } +static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) +{ + return reg->type == PTR_TO_MAP_VALUE && + map_value_has_spin_lock(reg->map_ptr); +} + static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) { return type_is_refcounted_or_null(reg->type); @@ -712,6 +719,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, } dst_state->speculative = src->speculative; dst_state->curframe = src->curframe; + dst_state->active_spin_lock = src->active_spin_lock; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1483,6 +1491,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, if (err) verbose(env, "R%d max value is outside of the array range\n", regno); + + if (map_value_has_spin_lock(reg->map_ptr)) { + u32 lock = reg->map_ptr->spin_lock_off; + + /* if any part of struct bpf_spin_lock can be touched by + * load/store reject this program. + * To check that [x1, x2) overlaps with [y1, y2) + * it is sufficient to check x1 < y2 && y1 < x2. + */ + if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && + lock < reg->umax_value + off + size) { + verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); + return -EACCES; + } + } return err; } @@ -2192,6 +2215,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +/* Implementation details: + * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL + * Two bpf_map_lookups (even with the same key) will have different reg->id. + * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after + * value_or_null->value transition, since the verifier only cares about + * the range of access to valid map value pointer and doesn't care about actual + * address of the map element. + * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps + * reg->id > 0 after value_or_null->value transition. By doing so + * two bpf_map_lookups will be considered two different pointers that + * point to different bpf_spin_locks. + * The verifier allows taking only one bpf_spin_lock at a time to avoid + * dead-locks. + * Since only one bpf_spin_lock is allowed the checks are simpler than + * reg_is_refcounted() logic. The verifier needs to remember only + * one spin_lock instead of array of acquired_refs. + * cur_state->active_spin_lock remembers which map value element got locked + * and clears it after bpf_spin_unlock. + */ +static int process_spin_lock(struct bpf_verifier_env *env, int regno, + bool is_lock) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_verifier_state *cur = env->cur_state; + bool is_const = tnum_is_const(reg->var_off); + struct bpf_map *map = reg->map_ptr; + u64 val = reg->var_off.value; + + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "R%d is not a pointer to map_value\n", regno); + return -EINVAL; + } + if (!is_const) { + verbose(env, + "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map->btf) { + verbose(env, + "map '%s' has to have BTF in order to use bpf_spin_lock\n", + map->name); + return -EINVAL; + } + if (!map_value_has_spin_lock(map)) { + if (map->spin_lock_off == -E2BIG) + verbose(env, + "map '%s' has more than one 'struct bpf_spin_lock'\n", + map->name); + else if (map->spin_lock_off == -ENOENT) + verbose(env, + "map '%s' doesn't have 'struct bpf_spin_lock'\n", + map->name); + else + verbose(env, + "map '%s' is not a struct type or bpf_spin_lock is mangled\n", + map->name); + return -EINVAL; + } + if (map->spin_lock_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", + val + reg->off); + return -EINVAL; + } + if (is_lock) { + if (cur->active_spin_lock) { + verbose(env, + "Locking two bpf_spin_locks are not allowed\n"); + return -EINVAL; + } + cur->active_spin_lock = reg->id; + } else { + if (!cur->active_spin_lock) { + verbose(env, "bpf_spin_unlock without taking a lock\n"); + return -EINVAL; + } + if (cur->active_spin_lock != reg->id) { + verbose(env, "bpf_spin_unlock of different lock\n"); + return -EINVAL; + } + cur->active_spin_lock = 0; + } + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return type == ARG_PTR_TO_MEM || @@ -2268,6 +2376,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, return -EFAULT; } meta->ptr_id = reg->id; + } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { + if (meta->func_id == BPF_FUNC_spin_lock) { + if (process_spin_lock(env, regno, true)) + return -EACCES; + } else if (meta->func_id == BPF_FUNC_spin_unlock) { + if (process_spin_lock(env, regno, false)) + return -EACCES; + } else { + verbose(env, "verifier internal error\n"); + return -EFAULT; + } } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be @@ -2887,6 +3006,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } + meta.func_id = func_id; /* check args */ err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); if (err) @@ -4473,7 +4593,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { reg->type = PTR_TO_SOCKET; } - if (is_null || !reg_is_refcounted(reg)) { + if (is_null || !(reg_is_refcounted(reg) || + reg_may_point_to_spin_lock(reg))) { /* We don't need id from this point onwards anymore, * thus we should better reset it, so that state * pruning has chances to take effect. @@ -4871,6 +4992,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } + if (env->cur_state->active_spin_lock) { + verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); + return -EINVAL; + } + if (regs[BPF_REG_6].type != PTR_TO_CTX) { verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); @@ -5607,8 +5733,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. - * We don't care about the 'id' value, because nothing - * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL) + * 'id' is not compared, since it's only used for maps with + * bpf_spin_lock inside map element and in such cases if + * the rest of the prog is valid for one map element then + * it's valid for all map elements regardless of the key + * used in bpf_map_lookup() */ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && @@ -5811,6 +5940,9 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; + if (old->active_spin_lock != cur->active_spin_lock) + return false; + /* for states to be equal callsites have to be the same * and all frame states need to be equivalent */ @@ -6229,6 +6361,12 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + if (env->cur_state->active_spin_lock && + (insn->src_reg == BPF_PSEUDO_CALL || + insn->imm != BPF_FUNC_spin_unlock)) { + verbose(env, "function calls are not allowed while holding a lock\n"); + return -EINVAL; + } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else @@ -6259,6 +6397,11 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + if (env->cur_state->active_spin_lock) { + verbose(env, "bpf_spin_unlock is missing\n"); + return -EINVAL; + } + if (state->curframe) { /* exit from nested function */ env->prev_insn_idx = env->insn_idx; @@ -6356,6 +6499,19 @@ static int check_map_prealloc(struct bpf_map *map) !(map->map_flags & BPF_F_NO_PREALLOC); } +static bool is_tracing_prog_type(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return true; + default: + return false; + } +} + static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog) @@ -6378,6 +6534,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if ((is_tracing_prog_type(prog->type) || + prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && + map_value_has_spin_lock(map)) { + verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); + return -EINVAL; + } + if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); diff --git a/net/core/filter.c b/net/core/filter.c index 41984ad4b9b4..3a49f68eda10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5314,10 +5314,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* else, fall through */ + return bpf_get_trace_printk_proto(); default: return NULL; } From e16d2f1ab96849b4b65e64b82550a7ecdbf405eb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:05 -0800 Subject: [PATCH 10/29] bpf: add support for bpf_spin_lock to cgroup local storage Allow 'struct bpf_spin_lock' to reside inside cgroup local storage. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/local_storage.c | 2 ++ kernel/bpf/syscall.c | 3 ++- kernel/bpf/verifier.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 07a34ef562a0..0295427f06e2 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -147,6 +147,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, return -ENOMEM; memcpy(&new->data[0], value, map->value_size); + check_and_init_map_lock(map, new->data); new = xchg(&storage->buf, new); kfree_rcu(new, rcu); @@ -483,6 +484,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, storage->buf = kmalloc_node(size, flags, map->numa_node); if (!storage->buf) goto enomem; + check_and_init_map_lock(map, storage->buf->data); } else { storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); if (!storage->percpu_buf) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ebf0a673cb83..b29e6dc44650 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -482,7 +482,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, if (map_value_has_spin_lock(map)) { if (map->map_type != BPF_MAP_TYPE_HASH && - map->map_type != BPF_MAP_TYPE_ARRAY) + map->map_type != BPF_MAP_TYPE_ARRAY && + map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 38892bdee651..b63bc77af2d1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3089,6 +3089,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].map_ptr = meta.map_ptr; if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + if (map_value_has_spin_lock(meta.map_ptr)) + regs[BPF_REG_0].id = ++env->id_gen; } else { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; From 7dac3ae42cf8203c0a9c54cb387512c91b64e22a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:06 -0800 Subject: [PATCH 11/29] tools/bpf: sync include/uapi/linux/bpf.h sync bpf.h Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 60b99b730a41..86f7c438d40f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2422,7 +2422,9 @@ union bpf_attr { FN(map_peek_elem), \ FN(msg_push_data), \ FN(msg_pop_data), \ - FN(rc_pointer_rel), + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3056,4 +3058,7 @@ struct bpf_line_info { __u32 line_col; }; +struct bpf_spin_lock { + __u32 val; +}; #endif /* _UAPI__LINUX_BPF_H__ */ From b4d4556c32667728c9737619580b6ade695027e0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:07 -0800 Subject: [PATCH 12/29] selftests/bpf: add bpf_spin_lock verifier tests add bpf_spin_lock tests to test_verifier.c that don't require latest llvm with BTF support Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 104 +++++- .../selftests/bpf/verifier/spin_lock.c | 331 ++++++++++++++++++ 2 files changed, 434 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/verifier/spin_lock.c diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 97188dbe80bd..477a9dcf9fff 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -50,7 +51,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 13 +#define MAX_NR_MAPS 14 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -78,6 +79,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; int fixup_cgroup_storage[MAX_FIXUPS]; int fixup_percpu_cgroup_storage[MAX_FIXUPS]; + int fixup_map_spin_lock[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t retval, retval_unpriv, insn_processed; @@ -406,6 +408,98 @@ static int create_cgroup_storage(bool percpu) return fd; } +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) + +struct btf_raw_data { + __u32 raw_types[64]; + const char *str_sec; + __u32 str_sec_size; +}; + +/* struct bpf_spin_lock { + * int val; + * }; + * struct val { + * int cnt; + * struct bpf_spin_lock l; + * }; + */ +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; +static __u32 btf_raw_types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ +}; + +static int load_btf(void) +{ + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(btf_raw_types), + .str_off = sizeof(btf_raw_types), + .str_len = sizeof(btf_str_sec), + }; + void *ptr, *raw_btf; + int btf_fd; + + ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) + + sizeof(btf_str_sec)); + + memcpy(ptr, &hdr, sizeof(hdr)); + ptr += sizeof(hdr); + memcpy(ptr, btf_raw_types, hdr.type_len); + ptr += hdr.type_len; + memcpy(ptr, btf_str_sec, hdr.str_len); + ptr += hdr.str_len; + + btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0); + free(raw_btf); + if (btf_fd < 0) + return -1; + return btf_fd; +} + +static int create_map_spin_lock(void) +{ + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = 4, + .value_size = 8, + .max_entries = 1, + .btf_key_type_id = 1, + .btf_value_type_id = 3, + }; + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); + if (fd < 0) + printf("Failed to create map with spin_lock\n"); + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -424,6 +518,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_in_map = test->fixup_map_in_map; int *fixup_cgroup_storage = test->fixup_cgroup_storage; int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage; + int *fixup_map_spin_lock = test->fixup_map_spin_lock; if (test->fill_helper) test->fill_helper(test); @@ -540,6 +635,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_stacktrace++; } while (*fixup_map_stacktrace); } + if (*fixup_map_spin_lock) { + map_fds[13] = create_map_spin_lock(); + do { + prog[*fixup_map_spin_lock].imm = map_fds[13]; + fixup_map_spin_lock++; + } while (*fixup_map_spin_lock); + } } static int set_admin(bool admin) diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c new file mode 100644 index 000000000000..d829eef372a4 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/spin_lock.c @@ -0,0 +1,331 @@ +{ + "spin_lock: test1 success", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test2 direct ld/st", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "cannot be accessed directly", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test3 direct ld/st", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "cannot be accessed directly", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test4 direct ld/st", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "cannot be accessed directly", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test5 call within a locked region", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "calls are not allowed", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test6 missing unlock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "unlock is missing", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test7 unlock without lock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "without taking a lock", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test8 double lock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "calls are not allowed", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test9 different lock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3, 11 }, + .result = REJECT, + .errstr = "unlock of different lock", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test10 lock in subprog without unlock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "unlock is missing", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test11 ld_abs under lock", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_LD_ABS(BPF_B, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 4 }, + .result = REJECT, + .errstr = "inside bpf_spin_lock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, From ab963beb9f5db303b4fd7e34e422b96270e5b972 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:08 -0800 Subject: [PATCH 13/29] selftests/bpf: add bpf_spin_lock C test add bpf_spin_lock C based test that requires latest llvm with BTF support Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bpf_helpers.h | 4 + tools/testing/selftests/bpf/test_progs.c | 43 +++++++- tools/testing/selftests/bpf/test_spin_lock.c | 108 +++++++++++++++++++ 4 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_spin_lock.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8993e9c8f410..302b8e70dec9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ BPF_OBJ_FILES = \ sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ - xdp_dummy.o test_map_in_map.o + xdp_dummy.o test_map_in_map.o test_spin_lock.o # Objects are built with default compilation flags and with sub-register # code-gen enabled. diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6c77cf7bedce..6a0ce0f055c5 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -172,6 +172,10 @@ static int (*bpf_skb_vlan_pop)(void *ctx) = (void *) BPF_FUNC_skb_vlan_pop; static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = (void *) BPF_FUNC_rc_pointer_rel; +static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = + (void *) BPF_FUNC_spin_lock; +static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = + (void *) BPF_FUNC_spin_unlock; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index d8940b8b2f8d..d2e71d697340 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -28,7 +28,7 @@ typedef __u16 __sum16; #include #include #include - +#include #include #include #include @@ -1985,6 +1985,46 @@ static void test_flow_dissector(void) bpf_object__close(obj); } +static void *test_spin_lock(void *arg) +{ + __u32 duration, retval; + int err, prog_fd = *(u32 *) arg; + + err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + pthread_exit(arg); +} + +static void test_spinlock(void) +{ + const char *file = "./test_spin_lock.o"; + pthread_t thread_id[4]; + struct bpf_object *obj; + int prog_fd; + int err = 0, i; + void *ret; + + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (err) { + printf("test_spin_lock:bpf_prog_load errno %d\n", errno); + goto close_prog; + } + for (i = 0; i < 4; i++) + assert(pthread_create(&thread_id[i], NULL, + &test_spin_lock, &prog_fd) == 0); + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && + ret == (void *)&prog_fd); + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + int main(void) { srand(time(NULL)); @@ -2013,6 +2053,7 @@ int main(void) test_queue_stack_map(QUEUE); test_queue_stack_map(STACK); test_flow_dissector(); + test_spinlock(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_spin_lock.c b/tools/testing/selftests/bpf/test_spin_lock.c new file mode 100644 index 000000000000..40f904312090 --- /dev/null +++ b/tools/testing/selftests/bpf/test_spin_lock.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include +#include +#include "bpf_helpers.h" + +struct hmap_elem { + volatile int cnt; + struct bpf_spin_lock lock; + int test_padding; +}; + +struct bpf_map_def SEC("maps") hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct hmap_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem); + + +struct cls_elem { + struct bpf_spin_lock lock; + volatile int cnt; +}; + +struct bpf_map_def SEC("maps") cls_map = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct cls_elem), +}; + +BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key, + struct cls_elem); + +struct bpf_vqueue { + struct bpf_spin_lock lock; + /* 4 byte hole */ + unsigned long long lasttime; + int credit; + unsigned int rate; +}; + +struct bpf_map_def SEC("maps") vqueue = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_vqueue), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue); +#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) + +SEC("spin_lock_demo") +int bpf_sping_lock_test(struct __sk_buff *skb) +{ + volatile int credit = 0, max_credit = 100, pkt_len = 64; + struct hmap_elem zero = {}, *val; + unsigned long long curtime; + struct bpf_vqueue *q; + struct cls_elem *cls; + int key = 0; + int err = 0; + + val = bpf_map_lookup_elem(&hmap, &key); + if (!val) { + bpf_map_update_elem(&hmap, &key, &zero, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (!val) { + err = 1; + goto err; + } + } + /* spin_lock in hash map run time test */ + bpf_spin_lock(&val->lock); + if (val->cnt) + val->cnt--; + else + val->cnt++; + if (val->cnt != 0 && val->cnt != 1) + err = 1; + bpf_spin_unlock(&val->lock); + + /* spin_lock in array. virtual queue demo */ + q = bpf_map_lookup_elem(&vqueue, &key); + if (!q) + goto err; + curtime = bpf_ktime_get_ns(); + bpf_spin_lock(&q->lock); + q->credit += CREDIT_PER_NS(curtime - q->lasttime, q->rate); + q->lasttime = curtime; + if (q->credit > max_credit) + q->credit = max_credit; + q->credit -= pkt_len; + credit = q->credit; + bpf_spin_unlock(&q->lock); + + /* spin_lock in cgroup local storage */ + cls = bpf_get_local_storage(&cls_map, 0); + bpf_spin_lock(&cls->lock); + cls->cnt++; + bpf_spin_unlock(&cls->lock); + +err: + return err; +} +char _license[] SEC("license") = "GPL"; From 96049f3afd50fe8db69fa0068cdca822e747b1e4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:09 -0800 Subject: [PATCH 14/29] bpf: introduce BPF_F_LOCK flag Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands and for map_update() helper function. In all these cases take a lock of existing element (which was provided in BTF description) before copying (in or out) the rest of map value. Implementation details that are part of uapi: Array: The array map takes the element lock for lookup/update. Hash: hash map also takes the lock for lookup/update and tries to avoid the bucket lock. If old element exists it takes the element lock and updates the element in place. If element doesn't exist it allocates new one and inserts into hash table while holding the bucket lock. In rare case the hashmap has to take both the bucket lock and the element lock to update old value in place. Cgroup local storage: It is similar to array. update in place and lookup are done with lock taken. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 24 ++++++++++++++-------- kernel/bpf/hashtab.c | 42 +++++++++++++++++++++++++++++++++++--- kernel/bpf/helpers.c | 16 +++++++++++++++ kernel/bpf/local_storage.c | 14 ++++++++++++- kernel/bpf/syscall.c | 25 +++++++++++++++++++++-- 7 files changed, 110 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2ae615b48bb8..bd169a7bcc93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -119,6 +119,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) memcpy(dst, src, map->value_size); } } +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src); struct bpf_offload_dev; struct bpf_offloaded_map; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 86f7c438d40f..1777fa0c61e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -267,6 +267,7 @@ enum bpf_attach_type { #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ /* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index d6d979910a2a..c72e0d8e1e65 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; + char *val; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; @@ -262,18 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; - if (unlikely(map_flags == BPF_NOEXIST)) + if (unlikely(map_flags & BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; - if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + if (unlikely((map_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map))) + return -EINVAL; + + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); - else - copy_map_value(map, - array->value + - array->elem_size * (index & array->index_mask), - value); + } else { + val = array->value + + array->elem_size * (index & array->index_mask); + if (map_flags & BPF_F_LOCK) + copy_map_value_locked(map, val, value, false); + else + copy_map_value(map, val, value); + } return 0; } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 6d3b22c5ad68..937776531998 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -804,11 +804,11 @@ dec_count: static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, u64 map_flags) { - if (l_old && map_flags == BPF_NOEXIST) + if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) /* elem already exists */ return -EEXIST; - if (!l_old && map_flags == BPF_EXIST) + if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) /* elem doesn't exist, cannot update it */ return -ENOENT; @@ -827,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; @@ -840,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, b = __select_bucket(htab, hash); head = &b->head; + if (unlikely(map_flags & BPF_F_LOCK)) { + if (unlikely(!map_value_has_spin_lock(map))) + return -EINVAL; + /* find an element without taking the bucket lock */ + l_old = lookup_nulls_elem_raw(head, hash, key, key_size, + htab->n_buckets); + ret = check_flags(htab, l_old, map_flags); + if (ret) + return ret; + if (l_old) { + /* grab the element lock and update value in place */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + return 0; + } + /* fall through, grab the bucket lock and lookup again. + * 99.9% chance that the element won't be found, + * but second lookup under lock has to be done. + */ + } + /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&b->lock, flags); @@ -849,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (ret) goto err; + if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { + /* first lookup without the bucket lock didn't find the element, + * but second lookup with the bucket lock found it. + * This case is highly unlikely, but has to be dealt with: + * grab the element lock in addition to the bucket lock + * and update element in place + */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + ret = 0; + goto err; + } + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, l_old); if (IS_ERR(l_new)) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index fbe544761628..a411fc17d265 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = { .arg1_type = ARG_PTR_TO_SPIN_LOCK, }; +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src) +{ + struct bpf_spin_lock *lock; + + if (lock_src) + lock = src + map->spin_lock_off; + else + lock = dst + map->spin_lock_off; + preempt_disable(); + ____bpf_spin_lock(lock); + copy_map_value(map, dst, src); + ____bpf_spin_unlock(lock); + preempt_enable(); +} + #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 0295427f06e2..6b572e2de7fb 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; - if (flags != BPF_ANY && flags != BPF_EXIST) + if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST))) + return -EINVAL; + + if (unlikely(flags & BPF_NOEXIST)) + return -EINVAL; + + if (unlikely((flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map))) return -EINVAL; storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, @@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, if (!storage) return -ENOENT; + if (flags & BPF_F_LOCK) { + copy_map_value_locked(map, storage->buf->data, value, false); + return 0; + } + new = kmalloc_node(sizeof(struct bpf_storage_buffer) + map->value_size, __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b29e6dc44650..0834958f1dc4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -682,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags static int map_lookup_elem(union bpf_attr *attr) { @@ -698,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr) if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL; + if (attr->flags & ~BPF_F_LOCK) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) @@ -708,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -758,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr) err = -ENOENT; } else { err = 0; - copy_map_value(map, value, ptr); + if (attr->flags & BPF_F_LOCK) + /* lock 'ptr' and copy everything but lock */ + copy_map_value_locked(map, value, ptr, true); + else + copy_map_value(map, value, ptr); + /* mask lock, since value wasn't zero inited */ + check_and_init_map_lock(map, value); } rcu_read_unlock(); } @@ -818,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); From e44ac9a22b19eec92d10c7b7c69ad6f1995b4dcf Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:10 -0800 Subject: [PATCH 15/29] tools/bpf: sync uapi/bpf.h add BPF_F_LOCK definition to tools/include/uapi/linux/bpf.h Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 86f7c438d40f..1777fa0c61e4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -267,6 +267,7 @@ enum bpf_attach_type { #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ /* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) From df5d22facd78e475da2e0d506f239e32cdffaf99 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:11 -0800 Subject: [PATCH 16/29] libbpf: introduce bpf_map_lookup_elem_flags() Introduce int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) helper to lookup array/hash/cgroup_local_storage elements with BPF_F_LOCK flag. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/bpf.c | 13 +++++++++++++ tools/lib/bpf/bpf.h | 2 ++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 16 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 88cbd110ae58..3defad77dc7a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -368,6 +368,19 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } +int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + attr.flags = flags; + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8f09de482839..ed09eed2dc3b 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -110,6 +110,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, + __u64 flags); LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 266bc95d0142..f6f96fc38c50 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -130,4 +130,5 @@ LIBBPF_0.0.2 { bpf_probe_helper; bpf_probe_map_type; bpf_probe_prog_type; + bpf_map_lookup_elem_flags; } LIBBPF_0.0.1; From ba72a7b4badbf4dd3c49c585c3c662bacc54f46e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:12 -0800 Subject: [PATCH 17/29] selftests/bpf: test for BPF_F_LOCK Add C based test that runs 4 bpf programs in parallel that update the same hash and array maps. And another 2 threads that read from these two maps via lookup(key, value, BPF_F_LOCK) api to make sure the user space sees consistent value in both hash and array elements while user space races with kernel bpf progs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_map_lock.c | 66 ++++++++++++++++++ tools/testing/selftests/bpf/test_progs.c | 74 +++++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_map_lock.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 302b8e70dec9..9de0a1ae8d65 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ BPF_OBJ_FILES = \ sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ - xdp_dummy.o test_map_in_map.o test_spin_lock.o + xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o # Objects are built with default compilation flags and with sub-register # code-gen enabled. diff --git a/tools/testing/selftests/bpf/test_map_lock.c b/tools/testing/selftests/bpf/test_map_lock.c new file mode 100644 index 000000000000..af8cc68ed2f9 --- /dev/null +++ b/tools/testing/selftests/bpf/test_map_lock.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include +#include +#include "bpf_helpers.h" + +#define VAR_NUM 16 + +struct hmap_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct hmap_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem); + +struct array_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct array_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem); + +SEC("map_lock_demo") +int bpf_map_lock_test(struct __sk_buff *skb) +{ + struct hmap_elem zero = {}, *val; + int rnd = bpf_get_prandom_u32(); + int key = 0, err = 1, i; + struct array_elem *q; + + val = bpf_map_lookup_elem(&hash_map, &key); + if (!val) + goto err; + /* spin_lock in hash map */ + bpf_spin_lock(&val->lock); + for (i = 0; i < VAR_NUM; i++) + val->var[i] = rnd; + bpf_spin_unlock(&val->lock); + + /* spin_lock in array */ + q = bpf_map_lookup_elem(&array_map, &key); + if (!q) + goto err; + bpf_spin_lock(&q->lock); + for (i = 0; i < VAR_NUM; i++) + q->var[i] = rnd; + bpf_spin_unlock(&q->lock); + err = 0; +err: + return err; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index d2e71d697340..a08d026ac396 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -2025,6 +2025,79 @@ close_prog_noerr: bpf_object__close(obj); } +static void *parallel_map_access(void *arg) +{ + int err, map_fd = *(u32 *) arg; + int vars[17], i, j, rnd, key = 0; + + for (i = 0; i < 10000; i++) { + err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK); + if (err) { + printf("lookup failed\n"); + error_cnt++; + goto out; + } + if (vars[0] != 0) { + printf("lookup #%d var[0]=%d\n", i, vars[0]); + error_cnt++; + goto out; + } + rnd = vars[1]; + for (j = 2; j < 17; j++) { + if (vars[j] == rnd) + continue; + printf("lookup #%d var[1]=%d var[%d]=%d\n", + i, rnd, j, vars[j]); + error_cnt++; + goto out; + } + } +out: + pthread_exit(arg); +} + +static void test_map_lock(void) +{ + const char *file = "./test_map_lock.o"; + int prog_fd, map_fd[2], vars[17] = {}; + pthread_t thread_id[6]; + struct bpf_object *obj; + int err = 0, key = 0, i; + void *ret; + + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (err) { + printf("test_map_lock:bpf_prog_load errno %d\n", errno); + goto close_prog; + } + map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); + if (map_fd[0] < 0) + goto close_prog; + map_fd[1] = bpf_find_map(__func__, obj, "array_map"); + if (map_fd[1] < 0) + goto close_prog; + + bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK); + + for (i = 0; i < 4; i++) + assert(pthread_create(&thread_id[i], NULL, + &test_spin_lock, &prog_fd) == 0); + for (i = 4; i < 6; i++) + assert(pthread_create(&thread_id[i], NULL, + ¶llel_map_access, &map_fd[i - 4]) == 0); + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && + ret == (void *)&prog_fd); + for (i = 4; i < 6; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && + ret == (void *)&map_fd[i - 4]); + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + int main(void) { srand(time(NULL)); @@ -2054,6 +2127,7 @@ int main(void) test_queue_stack_map(STACK); test_flow_dissector(); test_spinlock(); + test_map_lock(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; From 6f20c71d8505d6ad0ddbfdb44838402aba9b7c4e Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 1 Feb 2019 16:02:32 +0530 Subject: [PATCH 18/29] bpf: powerpc64: add JIT support for bpf line info This adds support for generating bpf line info for JITed programs. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 15bba765fa79..4194d3cfb60c 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1185,6 +1185,7 @@ skip_codegen_passes: bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: kfree(addrs); kfree(jit_data); From f3cea32d56e96c510af454616104e3979f7626dd Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:23 +0100 Subject: [PATCH 19/29] libbpf: Add a helper for retrieving a map fd for a given name XDP samples are mostly cooperating with eBPF maps through their file descriptors. In case of a eBPF program that contains multiple maps it might be tiresome to iterate through them and call bpf_map__fd for each one. Add a helper mostly based on bpf_object__find_map_by_name, but instead of returning the struct bpf_map pointer, return map fd. Suggested-by: Jakub Kicinski Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 6 ++++++ tools/lib/bpf/libbpf.h | 3 +++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 10 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2ccde17957e6..03bc01ca2577 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2884,6 +2884,12 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) return NULL; } +int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name) +{ + return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); +} + struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 62ae6cb93da1..931be6f3408c 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -264,6 +264,9 @@ struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); +LIBBPF_API int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name); + /* * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f6f96fc38c50..43ba9bb8d24b 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -131,4 +131,5 @@ LIBBPF_0.0.2 { bpf_probe_map_type; bpf_probe_prog_type; bpf_map_lookup_elem_flags; + bpf_object__find_map_fd_by_name; } LIBBPF_0.0.1; From 7313798b144c5b945dc47502a233cabb7022cb70 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Feb 2019 22:42:24 +0100 Subject: [PATCH 20/29] samples/bpf: xdp_redirect_cpu have not need for read_trace_pipe The sample xdp_redirect_cpu is not using helper bpf_trace_printk. Thus it makes no sense that the --debug option us reading from /sys/kernel/debug/tracing/trace_pipe via read_trace_pipe. Simply remove it. Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_redirect_cpu_user.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 2d23054aaccf..f141e752ca0a 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -51,7 +51,6 @@ static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"dev", required_argument, NULL, 'd' }, {"skb-mode", no_argument, NULL, 'S' }, - {"debug", no_argument, NULL, 'D' }, {"sec", required_argument, NULL, 's' }, {"prognum", required_argument, NULL, 'p' }, {"qsize", required_argument, NULL, 'q' }, @@ -563,7 +562,6 @@ int main(int argc, char **argv) bool use_separators = true; bool stress_mode = false; char filename[256]; - bool debug = false; int added_cpus = 0; int longindex = 0; int interval = 2; @@ -624,9 +622,6 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; - case 'D': - debug = true; - break; case 'x': stress_mode = true; break; @@ -688,11 +683,6 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } - if (debug) { - printf("Debug-mode reading trace pipe (fix #define DEBUG)\n"); - read_trace_pipe(); - } - stats_poll(interval, use_separators, prog_num, stress_mode); return EXIT_OK; } From bbaf6029c49ccf2dc93d9564af58a20d125947a1 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:25 +0100 Subject: [PATCH 21/29] samples/bpf: Convert XDP samples to libbpf usage Some of XDP samples that are attaching the bpf program to the interface via libbpf's bpf_set_link_xdp_fd are still using the bpf_load.c for loading and manipulating the ebpf program and maps. Convert them to do this through libbpf usage and remove bpf_load from the picture. While at it remove what looks like debug leftover in xdp_redirect_map_user.c In xdp_redirect_cpu, change the way that the program to be loaded onto interface is chosen - user now needs to pass the program's section name instead of the relative number. In case of typo print out the section names to choose from. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 8 +- samples/bpf/xdp_redirect_cpu_user.c | 145 +++++++++++++++++++--------- samples/bpf/xdp_redirect_map_user.c | 47 ++++++--- samples/bpf/xdp_redirect_user.c | 44 +++++++-- samples/bpf/xdp_router_ipv4_user.c | 75 ++++++++++---- samples/bpf/xdp_tx_iptunnel_user.c | 37 ++++--- 6 files changed, 253 insertions(+), 103 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index db1a91dfa702..a0ef7eddd0b3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -87,18 +87,18 @@ test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o -xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o +xdp_router_ipv4-objs := xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS) sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o +xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect-objs := bpf_load.o xdp_redirect_user.o -xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o +xdp_redirect-objs := xdp_redirect_user.o +xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f141e752ca0a..8645ddc2da0e 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -24,12 +24,8 @@ static const char *__doc__ = /* How many xdp_progs are defined in _kern.c */ #define MAX_PROG 6 -/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead - * use bpf/libbpf.h), but cannot as (currently) needed for XDP - * attaching to a device via bpf_set_link_xdp_fd() - */ #include -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "bpf_util.h" @@ -38,6 +34,15 @@ static char ifname_buf[IF_NAMESIZE]; static char *ifname; static __u32 xdp_flags; +static int cpu_map_fd; +static int rx_cnt_map_fd; +static int redirect_err_cnt_map_fd; +static int cpumap_enqueue_cnt_map_fd; +static int cpumap_kthread_cnt_map_fd; +static int cpus_available_map_fd; +static int cpus_count_map_fd; +static int cpus_iterator_map_fd; +static int exception_cnt_map_fd; /* Exit return codes */ #define EXIT_OK 0 @@ -52,7 +57,7 @@ static const struct option long_options[] = { {"dev", required_argument, NULL, 'd' }, {"skb-mode", no_argument, NULL, 'S' }, {"sec", required_argument, NULL, 's' }, - {"prognum", required_argument, NULL, 'p' }, + {"progname", required_argument, NULL, 'p' }, {"qsize", required_argument, NULL, 'q' }, {"cpu", required_argument, NULL, 'c' }, {"stress-mode", no_argument, NULL, 'x' }, @@ -70,7 +75,17 @@ static void int_exit(int sig) exit(EXIT_OK); } -static void usage(char *argv[]) +static void print_avail_progs(struct bpf_object *obj) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (bpf_program__is_xdp(pos)) + printf(" %s\n", bpf_program__title(pos, false)); + } +} + +static void usage(char *argv[], struct bpf_object *obj) { int i; @@ -88,6 +103,8 @@ static void usage(char *argv[]) long_options[i].val); printf("\n"); } + printf("\n Programs to be used for --progname:\n"); + print_avail_progs(obj); printf("\n"); } @@ -262,7 +279,7 @@ static __u64 calc_errs_pps(struct datarec *r, static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, - int prog_num) + char *prog_name) { unsigned int nr_cpus = bpf_num_possible_cpus(); double pps = 0, drop = 0, err = 0; @@ -272,7 +289,7 @@ static void stats_print(struct stats_record *stats_rec, int i; /* Header */ - printf("Running XDP/eBPF prog_num:%d\n", prog_num); + printf("Running XDP/eBPF prog_name:%s\n", prog_name); printf("%-15s %-7s %-14s %-11s %-9s\n", "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info"); @@ -423,20 +440,20 @@ static void stats_collect(struct stats_record *rec) { int fd, i; - fd = map_fd[1]; /* map: rx_cnt */ + fd = rx_cnt_map_fd; map_collect_percpu(fd, 0, &rec->rx_cnt); - fd = map_fd[2]; /* map: redirect_err_cnt */ + fd = redirect_err_cnt_map_fd; map_collect_percpu(fd, 1, &rec->redir_err); - fd = map_fd[3]; /* map: cpumap_enqueue_cnt */ + fd = cpumap_enqueue_cnt_map_fd; for (i = 0; i < MAX_CPUS; i++) map_collect_percpu(fd, i, &rec->enq[i]); - fd = map_fd[4]; /* map: cpumap_kthread_cnt */ + fd = cpumap_kthread_cnt_map_fd; map_collect_percpu(fd, 0, &rec->kthread); - fd = map_fd[8]; /* map: exception_cnt */ + fd = exception_cnt_map_fd; map_collect_percpu(fd, 0, &rec->exception); } @@ -461,7 +478,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Add a CPU entry to cpumap, as this allocate a cpu entry in * the kernel for the cpu. */ - ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0); + ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0); if (ret) { fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); exit(EXIT_FAIL_BPF); @@ -470,23 +487,22 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Inform bpf_prog's that a new CPU is available to select * from via some control maps. */ - /* map_fd[5] = cpus_available */ - ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0); + ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0); if (ret) { fprintf(stderr, "Add to avail CPUs failed\n"); exit(EXIT_FAIL_BPF); } /* When not replacing/updating existing entry, bump the count */ - /* map_fd[6] = cpus_count */ - ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count); + ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count); if (ret) { fprintf(stderr, "Failed reading curr cpus_count\n"); exit(EXIT_FAIL_BPF); } if (new) { curr_cpus_count++; - ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0); + ret = bpf_map_update_elem(cpus_count_map_fd, &key, + &curr_cpus_count, 0); if (ret) { fprintf(stderr, "Failed write curr cpus_count\n"); exit(EXIT_FAIL_BPF); @@ -509,8 +525,8 @@ static void mark_cpus_unavailable(void) int ret, i; for (i = 0; i < MAX_CPUS; i++) { - /* map_fd[5] = cpus_available */ - ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0); + ret = bpf_map_update_elem(cpus_available_map_fd, &i, + &invalid_cpu, 0); if (ret) { fprintf(stderr, "Failed marking CPU unavailable\n"); exit(EXIT_FAIL_BPF); @@ -530,7 +546,7 @@ static void stress_cpumap(void) create_cpu_entry(1, 16000, 0, false); } -static void stats_poll(int interval, bool use_separators, int prog_num, +static void stats_poll(int interval, bool use_separators, char *prog_name, bool stress_mode) { struct stats_record *record, *prev; @@ -546,7 +562,7 @@ static void stats_poll(int interval, bool use_separators, int prog_num, while (1) { swap(&prev, &record); stats_collect(record); - stats_print(record, prev, prog_num); + stats_print(record, prev, prog_name); sleep(interval); if (stress_mode) stress_cpumap(); @@ -556,17 +572,51 @@ static void stats_poll(int interval, bool use_separators, int prog_num, free_stats_record(prev); } +static int init_map_fds(struct bpf_object *obj) +{ + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + redirect_err_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); + cpumap_enqueue_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); + cpumap_kthread_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); + cpus_available_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_available"); + cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); + cpus_iterator_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); + exception_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); + + if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || + redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || + cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 || + cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 || + exception_cnt_map_fd < 0) + return -ENOENT; + + return 0; +} + int main(int argc, char **argv) { struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; bool use_separators = true; bool stress_mode = false; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; int added_cpus = 0; int longindex = 0; int interval = 2; - int prog_num = 5; int add_cpu = -1; + int prog_fd; __u32 qsize; int opt; @@ -579,22 +629,25 @@ int main(int argc, char **argv) qsize = 128+64; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); return 1; } - if (load_bpf_file(filename)) { - fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return EXIT_FAIL; + + if (prog_fd < 0) { + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", + strerror(errno)); return EXIT_FAIL; } - - if (!prog_fd[0]) { - fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno)); + if (init_map_fds(obj) < 0) { + fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); return EXIT_FAIL; } - mark_cpus_unavailable(); /* Parse commands line args */ @@ -630,13 +683,7 @@ int main(int argc, char **argv) break; case 'p': /* Selecting eBPF prog to load */ - prog_num = atoi(optarg); - if (prog_num < 0 || prog_num >= MAX_PROG) { - fprintf(stderr, - "--prognum too large err(%d):%s\n", - errno, strerror(errno)); - goto error; - } + prog_name = optarg; break; case 'c': /* Add multiple CPUs */ @@ -656,21 +703,21 @@ int main(int argc, char **argv) case 'h': error: default: - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } } /* Required option */ if (ifindex == -1) { fprintf(stderr, "ERR: required option --dev missing\n"); - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } /* Required option */ if (add_cpu == -1) { fprintf(stderr, "ERR: required option --cpu missing\n"); fprintf(stderr, " Specify multiple --cpu option to add more\n"); - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } @@ -678,11 +725,23 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) { + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) { + fprintf(stderr, "bpf_object__find_program_by_title failed\n"); + return EXIT_FAIL; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "bpf_program__fd failed\n"); + return EXIT_FAIL; + } + + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { fprintf(stderr, "link set xdp fd failed\n"); return EXIT_FAIL_XDP; } - stats_poll(interval, use_separators, prog_num, stress_mode); + stats_poll(interval, use_separators, prog_name, stress_mode); return EXIT_OK; } diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 4445e76854b5..60d46eea225b 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -22,15 +22,16 @@ #include #include -#include "bpf_load.h" #include "bpf_util.h" #include +#include "bpf/libbpf.h" static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; static __u32 xdp_flags; +static int rxcnt_map_fd; static void int_exit(int sig) { @@ -53,7 +54,7 @@ static void poll_stats(int interval, int ifindex) int i; sleep(interval); - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[i]); if (sum) @@ -76,9 +77,16 @@ static void usage(const char *prog) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_program *prog, *dummy_prog; + int prog_fd, dummy_prog_fd; const char *optstr = "SN"; - char filename[256]; + struct bpf_object *obj; int ret, opt, key = 0; + char filename[256]; + int tx_port_map_fd; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -109,24 +117,40 @@ int main(int argc, char **argv) printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + prog = bpf_program__next(NULL, obj); + dummy_prog = bpf_program__next(prog, obj); + if (!prog || !dummy_prog) { + printf("finding a prog in obj file failed\n"); + return 1; + } + /* bpf_prog_load_xattr gives us the pointer to first prog's fd, + * so we're missing only the fd for dummy prog + */ + dummy_prog_fd = bpf_program__fd(dummy_prog); + if (prog_fd < 0 || dummy_prog_fd < 0) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1], + if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { printf("WARN: link set xdp fd failed on %d\n", ifindex_out); ifindex_out_xdp_dummy_attached = false; @@ -135,11 +159,8 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", - map_fd[0], map_fd[1], map_fd[2]); - /* populate virtual to physical port map */ - ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); if (ret) { perror("bpf_update_elem"); goto out; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 81a69e36cb78..93404820df68 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -22,15 +22,16 @@ #include #include -#include "bpf_load.h" #include "bpf_util.h" #include +#include "bpf/libbpf.h" static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; static __u32 xdp_flags; +static int rxcnt_map_fd; static void int_exit(int sig) { @@ -53,7 +54,7 @@ static void poll_stats(int interval, int ifindex) int i; sleep(interval); - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[i]); if (sum) @@ -77,9 +78,16 @@ static void usage(const char *prog) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_program *prog, *dummy_prog; + int prog_fd, tx_port_map_fd, opt; const char *optstr = "SN"; + struct bpf_object *obj; char filename[256]; - int ret, opt, key = 0; + int dummy_prog_fd; + int ret, key = 0; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -110,24 +118,40 @@ int main(int argc, char **argv) printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + prog = bpf_program__next(NULL, obj); + dummy_prog = bpf_program__next(prog, obj); + if (!prog || !dummy_prog) { + printf("finding a prog in obj file failed\n"); + return 1; + } + /* bpf_prog_load_xattr gives us the pointer to first prog's fd, + * so we're missing only the fd for dummy prog + */ + dummy_prog_fd = bpf_program__fd(dummy_prog); + if (prog_fd < 0 || dummy_prog_fd < 0) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1], + if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { printf("WARN: link set xdp fd failed on %d\n", ifindex_out); ifindex_out_xdp_dummy_attached = false; @@ -137,7 +161,7 @@ int main(int argc, char **argv) signal(SIGTERM, int_exit); /* bpf redirect port */ - ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); if (ret) { perror("bpf_update_elem"); goto out; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index b2b4dfa776c8..cea2306f5ab7 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -15,7 +15,6 @@ #include #include #include -#include "bpf_load.h" #include #include #include @@ -25,11 +24,17 @@ #include #include #include "bpf_util.h" +#include "bpf/libbpf.h" int sock, sock_arp, flags = 0; static int total_ifindex; int *ifindex_list; char buf[8192]; +static int lpm_map_fd; +static int rxcnt_map_fd; +static int arp_table_map_fd; +static int exact_match_map_fd; +static int tx_port_map_fd; static int get_route_table(int rtm_family); static void int_exit(int sig) @@ -186,7 +191,8 @@ static void read_route(struct nlmsghdr *nh, int nll) bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); exit(0); } - assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); + assert(bpf_map_update_elem(tx_port_map_fd, + &route.iface, &route.iface, 0) == 0); if (rtm_family == AF_INET) { struct trie_value { __u8 prefix[4]; @@ -207,11 +213,16 @@ static void read_route(struct nlmsghdr *nh, int nll) direct_entry.arp.dst = 0; if (route.dst_len == 32) { if (nh->nlmsg_type == RTM_DELROUTE) { - assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); + assert(bpf_map_delete_elem(exact_match_map_fd, + &route.dst) == 0); } else { - if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) + if (bpf_map_lookup_elem(arp_table_map_fd, + &route.dst, + &direct_entry.arp.mac) == 0) direct_entry.arp.dst = route.dst; - assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); + assert(bpf_map_update_elem(exact_match_map_fd, + &route.dst, + &direct_entry, 0) == 0); } } for (i = 0; i < 4; i++) @@ -225,7 +236,7 @@ static void read_route(struct nlmsghdr *nh, int nll) route.gw, route.dst_len, route.metric, route.iface_name); - if (bpf_map_lookup_elem(map_fd[0], prefix_key, + if (bpf_map_lookup_elem(lpm_map_fd, prefix_key, prefix_value) < 0) { for (i = 0; i < 4; i++) prefix_value->prefix[i] = prefix_key->data[i]; @@ -234,7 +245,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_value->gw = route.gw; prefix_value->metric = route.metric; - assert(bpf_map_update_elem(map_fd[0], + assert(bpf_map_update_elem(lpm_map_fd, prefix_key, prefix_value, 0 ) == 0); @@ -247,7 +258,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_key->data[2], prefix_key->data[3], prefix_key->prefixlen); - assert(bpf_map_delete_elem(map_fd[0], + assert(bpf_map_delete_elem(lpm_map_fd, prefix_key ) == 0); /* Rereading the route table to check if @@ -275,8 +286,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_value->ifindex = route.iface; prefix_value->gw = route.gw; prefix_value->metric = route.metric; - assert(bpf_map_update_elem( - map_fd[0], + assert(bpf_map_update_elem(lpm_map_fd, prefix_key, prefix_value, 0) == 0); @@ -401,7 +411,8 @@ static void read_arp(struct nlmsghdr *nh, int nll) arp_entry.mac = atol(mac); printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); if (ndm_family == AF_INET) { - if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, + if (bpf_map_lookup_elem(exact_match_map_fd, + &arp_entry.dst, &direct_entry) == 0) { if (nh->nlmsg_type == RTM_DELNEIGH) { direct_entry.arp.dst = 0; @@ -410,16 +421,17 @@ static void read_arp(struct nlmsghdr *nh, int nll) direct_entry.arp.dst = arp_entry.dst; direct_entry.arp.mac = arp_entry.mac; } - assert(bpf_map_update_elem(map_fd[3], + assert(bpf_map_update_elem(exact_match_map_fd, &arp_entry.dst, &direct_entry, 0 ) == 0); memset(&direct_entry, 0, sizeof(direct_entry)); } if (nh->nlmsg_type == RTM_DELNEIGH) { - assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); + assert(bpf_map_delete_elem(arp_table_map_fd, + &arp_entry.dst) == 0); } else if (nh->nlmsg_type == RTM_NEWNEIGH) { - assert(bpf_map_update_elem(map_fd[2], + assert(bpf_map_update_elem(arp_table_map_fd, &arp_entry.dst, &arp_entry.mac, 0 ) == 0); @@ -553,7 +565,8 @@ static int monitor_route(void) for (key = 0; key < nr_keys; key++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, + &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[key][i]); if (sum) @@ -596,11 +609,18 @@ cleanup: int main(int ac, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_object *obj; char filename[256]; char **ifname_list; + int prog_fd; int i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + if (ac < 2) { printf("usage: %s [-S] Interface name list\n", argv[0]); return 1; @@ -614,15 +634,28 @@ int main(int ac, char **argv) total_ifindex = ac - 1; ifname_list = (argv + 1); } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - } + printf("\n**************loading bpf file*********************\n\n\n"); - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + if (!prog_fd) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } + + lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table"); + exact_match_map_fd = bpf_object__find_map_fd_by_name(obj, + "exact_match"); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 || + exact_match_map_fd < 0 || tx_port_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + return 1; + } + ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); for (i = 0; i < total_ifindex; i++) { ifindex_list[i] = if_nametoindex(ifname_list[i]); @@ -633,7 +666,7 @@ int main(int ac, char **argv) } } for (i = 0; i < total_ifindex; i++) { - if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { printf("link set xdp fd failed\n"); int recovery_index = i; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index a4ccc33adac0..5093d8220da5 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -17,7 +17,7 @@ #include #include #include -#include "bpf_load.h" +#include "bpf/libbpf.h" #include #include "bpf_util.h" #include "xdp_tx_iptunnel_common.h" @@ -26,6 +26,7 @@ static int ifindex = -1; static __u32 xdp_flags = 0; +static int rxcnt_map_fd; static void int_exit(int sig) { @@ -53,7 +54,8 @@ static void poll_stats(unsigned int kill_after_s) for (proto = 0; proto < nr_protos; proto++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto, + values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[proto][i]); @@ -138,15 +140,19 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port) int main(int argc, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int min_port = 0, max_port = 0, vip2tnl_map_fd; + const char *optstr = "i:a:p:s:d:m:T:P:SNh"; unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:SNh"; - int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; struct vip vip = {}; char filename[256]; - int opt; + int opt, prog_fd; int i; tnl.family = AF_UNSPEC; @@ -232,14 +238,20 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + if (!prog_fd) { + printf("load_bpf_file: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl"); + if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } @@ -248,13 +260,14 @@ int main(int argc, char **argv) while (min_port <= max_port) { vip.dport = htons(min_port++); - if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { + if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl, + BPF_NOEXIST)) { perror("bpf_map_update_elem(&vip2tnl)"); return 1; } } - if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } From 6a5457618f62147aeac706d26ff28e0e57a324e3 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:26 +0100 Subject: [PATCH 22/29] samples/bpf: Extend RLIMIT_MEMLOCK for xdp_{sample_pkts, router_ipv4} There is a common problem with xdp samples that happens when user wants to run a particular sample and some bpf program is already loaded. The default 64kb RLIMIT_MEMLOCK resource limit will cause a following error (assuming that xdp sample that is failing was converted to libbpf usage): libbpf: Error in bpf_object__probe_name():Operation not permitted(1). Couldn't load basic 'r0 = 0' BPF program. libbpf: failed to load object './xdp_sample_pkts_kern.o' Fix it in xdp_sample_pkts and xdp_router_ipv4 by setting RLIMIT_MEMLOCK to RLIM_INFINITY. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_router_ipv4_user.c | 7 +++++++ samples/bpf/xdp_sample_pkts_user.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index cea2306f5ab7..c63c6beec7d6 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -25,6 +25,7 @@ #include #include "bpf_util.h" #include "bpf/libbpf.h" +#include int sock, sock_arp, flags = 0; static int total_ifindex; @@ -609,6 +610,7 @@ cleanup: int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -635,6 +637,11 @@ int main(int ac, char **argv) ifname_list = (argv + 1); } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 8dd87c1eb560..5f5828ee0761 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "perf-sys.h" #include "trace_helpers.h" @@ -99,6 +100,7 @@ static void sig_handler(int signo) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -114,6 +116,11 @@ int main(int argc, char **argv) return 1; } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + numcpus = get_nprocs(); if (numcpus > MAX_CPUS) numcpus = MAX_CPUS; From 01dde20ce04b3a18f1e91d6d1ee0ef484d20bbf2 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:27 +0100 Subject: [PATCH 23/29] xdp: Provide extack messages when prog attachment failed In order to provide more meaningful messages to user when the process of loading xdp program onto network interface failed, let's add extack messages within dev_change_xdp_fd. Suggested-by: Jakub Kicinski Signed-off-by: Maciej Fijalkowski Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- net/core/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 8e276e0192a1..bfa4be42afff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7983,8 +7983,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) + if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { + NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); return -EOPNOTSUPP; + } if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) bpf_op = generic_xdp_install; if (bpf_op == bpf_chk) @@ -7992,11 +7994,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, if (fd >= 0) { if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || - __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) + __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) { + NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; + } if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_query(dev, bpf_op, query)) + __dev_xdp_query(dev, bpf_op, query)) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; + } prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, bpf_op == ops->ndo_bpf); From 743e568c15860d4061202f73214c106a5bb0890b Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:28 +0100 Subject: [PATCH 24/29] samples/bpf: Add a "force" flag to XDP samples Make xdp samples consistent with iproute2 behavior and set the XDP_FLAGS_UPDATE_IF_NOEXIST by default when setting the xdp program on interface. Provide an option for user to force the program loading, which as a result will not include the mentioned flag in bpf_set_link_xdp_fd call. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- samples/bpf/xdp1_user.c | 10 ++++-- samples/bpf/xdp_adjust_tail_user.c | 8 +++-- samples/bpf/xdp_redirect_cpu_user.c | 8 +++-- samples/bpf/xdp_redirect_map_user.c | 10 ++++-- samples/bpf/xdp_redirect_user.c | 10 ++++-- samples/bpf/xdp_router_ipv4_user.c | 50 +++++++++++++++++++++-------- samples/bpf/xdp_rxq_info_user.c | 8 +++-- samples/bpf/xdp_sample_pkts_user.c | 40 ++++++++++++++++++----- samples/bpf/xdp_tx_iptunnel_user.c | 8 +++-- samples/bpf/xdpsock_user.c | 7 ++-- 10 files changed, 119 insertions(+), 40 deletions(-) diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 8bfda95c77ad..505bce207165 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -22,7 +22,7 @@ #include "bpf/libbpf.h" static int ifindex; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static void int_exit(int sig) { @@ -63,7 +63,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFACE\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -73,7 +74,7 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; - const char *optstr = "SN"; + const char *optstr = "FSN"; int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; @@ -87,6 +88,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 3042ce37dae8..049bddf7778b 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -24,7 +24,7 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static void int_exit(int sig) { @@ -60,6 +60,7 @@ static void usage(const char *cmd) printf(" -T Default: 0 (forever)\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); + printf(" -F force loading prog\n"); printf(" -h Display this help\n"); } @@ -70,8 +71,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; unsigned char opt_flags[256] = {}; + const char *optstr = "i:T:SNFh"; unsigned int kill_after_s = 0; - const char *optstr = "i:T:SNh"; int i, prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; @@ -96,6 +97,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(argv[0]); return 1; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 8645ddc2da0e..0224afb55845 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -33,7 +33,7 @@ static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int cpu_map_fd; static int rx_cnt_map_fd; static int redirect_err_cnt_map_fd; @@ -62,6 +62,7 @@ static const struct option long_options[] = { {"cpu", required_argument, NULL, 'c' }, {"stress-mode", no_argument, NULL, 'x' }, {"no-separators", no_argument, NULL, 'z' }, + {"force", no_argument, NULL, 'F' }, {0, 0, NULL, 0 } }; @@ -651,7 +652,7 @@ int main(int argc, char **argv) mark_cpus_unavailable(); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -700,6 +701,9 @@ int main(int argc, char **argv) case 'q': qsize = atoi(optarg); break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'h': error: default: diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 60d46eea225b..470e1a7e8810 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -30,7 +30,7 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) @@ -70,7 +70,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -82,7 +83,7 @@ int main(int argc, char **argv) }; struct bpf_program *prog, *dummy_prog; int prog_fd, dummy_prog_fd; - const char *optstr = "SN"; + const char *optstr = "FSN"; struct bpf_object *obj; int ret, opt, key = 0; char filename[256]; @@ -96,6 +97,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 93404820df68..be6058cda97c 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -30,7 +30,7 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) @@ -70,7 +70,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -83,7 +84,7 @@ int main(int argc, char **argv) }; struct bpf_program *prog, *dummy_prog; int prog_fd, tx_port_map_fd, opt; - const char *optstr = "SN"; + const char *optstr = "FSN"; struct bpf_object *obj; char filename[256]; int dummy_prog_fd; @@ -97,6 +98,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index c63c6beec7d6..208d6a996478 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -26,8 +26,9 @@ #include "bpf_util.h" #include "bpf/libbpf.h" #include +#include -int sock, sock_arp, flags = 0; +int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int total_ifindex; int *ifindex_list; char buf[8192]; @@ -608,33 +609,56 @@ cleanup: return ret; } +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTS] interface name list\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -F force loading prog\n", + __func__, prog); +} + int main(int ac, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + const char *optstr = "SF"; struct bpf_object *obj; char filename[256]; char **ifname_list; - int prog_fd; + int prog_fd, opt; int i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; - if (ac < 2) { - printf("usage: %s [-S] Interface name list\n", argv[0]); - return 1; + total_ifindex = ac - 1; + ifname_list = (argv + 1); + + while ((opt = getopt(ac, argv, optstr)) != -1) { + switch (opt) { + case 'S': + flags |= XDP_FLAGS_SKB_MODE; + total_ifindex--; + ifname_list++; + break; + case 'F': + flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + total_ifindex--; + ifname_list++; + break; + default: + usage(basename(argv[0])); + return 1; + } } - if (!strcmp(argv[1], "-S")) { - flags = XDP_FLAGS_SKB_MODE; - total_ifindex = ac - 2; - ifname_list = (argv + 2); - } else { - flags = 0; - total_ifindex = ac - 1; - ifname_list = (argv + 1); + + if (optind == ac) { + usage(basename(argv[0])); + return 1; } if (setrlimit(RLIMIT_MEMLOCK, &r)) { diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index ef26f882f92f..e7a98c2a440f 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -30,7 +30,7 @@ static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static struct bpf_map *stats_global_map; static struct bpf_map *rx_queue_index_map; @@ -52,6 +52,7 @@ static const struct option long_options[] = { {"action", required_argument, NULL, 'a' }, {"readmem", no_argument, NULL, 'r' }, {"swapmac", no_argument, NULL, 'm' }, + {"force", no_argument, NULL, 'F' }, {0, 0, NULL, 0 } }; @@ -487,7 +488,7 @@ int main(int argc, char **argv) } /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:", + while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -524,6 +525,9 @@ int main(int argc, char **argv) case 'm': cfg_options |= SWAP_MAC; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'h': error: default: diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 5f5828ee0761..62f34827c775 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "perf-sys.h" #include "trace_helpers.h" @@ -21,12 +23,13 @@ static int pmu_fds[MAX_CPUS], if_idx; static struct perf_event_mmap_page *headers[MAX_CPUS]; static char *if_name; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int do_attach(int idx, int fd, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, fd, 0); + err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); if (err < 0) printf("ERROR: failed to attach program to %s\n", name); @@ -98,21 +101,42 @@ static void sig_handler(int signo) exit(0); } +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTS] \n\n" + "OPTS:\n" + " -F force loading prog\n", + __func__, prog); +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + const char *optstr = "F"; + int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; - int prog_fd, map_fd; char filename[256]; int ret, err, i; int numcpus; - if (argc < 2) { - printf("Usage: %s \n", argv[0]); + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + usage(basename(argv[0])); return 1; } @@ -143,16 +167,16 @@ int main(int argc, char **argv) } map_fd = bpf_map__fd(map); - if_idx = if_nametoindex(argv[1]); + if_idx = if_nametoindex(argv[optind]); if (!if_idx) - if_idx = strtoul(argv[1], NULL, 0); + if_idx = strtoul(argv[optind], NULL, 0); if (!if_idx) { fprintf(stderr, "Invalid ifname\n"); return 1; } - if_name = argv[1]; - err = do_attach(if_idx, prog_fd, argv[1]); + if_name = argv[optind]; + err = do_attach(if_idx, prog_fd, if_name); if (err) return err; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 5093d8220da5..e3de60930d27 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -25,7 +25,7 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; -static __u32 xdp_flags = 0; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) @@ -83,6 +83,7 @@ static void usage(const char *cmd) printf(" -P Default is TCP\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); + printf(" -F Force loading the XDP prog\n"); printf(" -h Display this help\n"); } @@ -145,7 +146,7 @@ int main(int argc, char **argv) }; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int min_port = 0, max_port = 0, vip2tnl_map_fd; - const char *optstr = "i:a:p:s:d:m:T:P:SNh"; + const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; struct iptnl_info tnl = {}; @@ -217,6 +218,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(argv[0]); return 1; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 57ecadc58403..188723784768 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -68,7 +68,7 @@ enum benchmark_type { }; static enum benchmark_type opt_bench = BENCH_RXDROP; -static u32 opt_xdp_flags; +static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static const char *opt_if = ""; static int opt_ifindex; static int opt_queue; @@ -682,7 +682,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "rtli:q:psSNn:cz", long_options, + c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options, &option_index); if (c == -1) break; @@ -725,6 +725,9 @@ static void parse_command_line(int argc, char **argv) case 'c': opt_xdp_bind_flags |= XDP_COPY; break; + case 'F': + opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); } From 50db9f0731889b9f3839cab5f44163733eb44f04 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:29 +0100 Subject: [PATCH 25/29] libbpf: Add a support for getting xdp prog id on ifindex Since we have a dedicated netlink attributes for xdp setup on a particular interface, it is now possible to retrieve the program id that is currently attached to the interface. The use case is targeted for sample xdp programs, which will store the program id just after loading bpf program onto iface. On shutdown, the sample will make sure that it can unload the program by querying again the iface and verifying that both program id's matches. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/netlink.c | 85 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 931be6f3408c..43c77e98df6f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -317,6 +317,7 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 43ba9bb8d24b..62c680fb13d1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -132,4 +132,5 @@ LIBBPF_0.0.2 { bpf_probe_prog_type; bpf_map_lookup_elem_flags; bpf_object__find_map_fd_by_name; + bpf_get_link_xdp_id; } LIBBPF_0.0.1; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 0ce67aea8f3b..ce3ec81b71c0 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -21,6 +21,12 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); +struct xdp_id_md { + int ifindex; + __u32 flags; + __u32 id; +}; + int libbpf_netlink_open(__u32 *nl_pid) { struct sockaddr_nl sa; @@ -196,6 +202,85 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, return dump_link_nlmsg(cookie, ifi, tb); } +static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) +{ + if (mode != XDP_ATTACHED_MULTI) + return IFLA_XDP_PROG_ID; + if (flags & XDP_FLAGS_DRV_MODE) + return IFLA_XDP_DRV_PROG_ID; + if (flags & XDP_FLAGS_HW_MODE) + return IFLA_XDP_HW_PROG_ID; + if (flags & XDP_FLAGS_SKB_MODE) + return IFLA_XDP_SKB_PROG_ID; + + return IFLA_XDP_UNSPEC; +} + +static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; + struct xdp_id_md *xdp_id = cookie; + struct ifinfomsg *ifinfo = msg; + unsigned char mode, xdp_attr; + int ret; + + if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) + return 0; + + if (!tb[IFLA_XDP]) + return 0; + + ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); + if (ret) + return ret; + + if (!xdp_tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); + if (!xdp_attr || !xdp_tb[xdp_attr]) + return 0; + + xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_id_md xdp_id = {}; + int sock, ret; + __u32 nl_pid; + __u32 mask; + + if (flags & ~XDP_FLAGS_MASK) + return -EINVAL; + + /* Check whether the single {HW,DRV,SKB} mode is set */ + flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); + mask = flags - 1; + if (flags && flags & mask) + return -EINVAL; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + xdp_id.ifindex = ifindex; + xdp_id.flags = flags; + + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); + if (!ret) + *prog_id = xdp_id.id; + + close(sock); + return ret; +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { From 3b7a8ec2dec3e408288dbc80b8aef25df20ba119 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:30 +0100 Subject: [PATCH 26/29] samples/bpf: Check the prog id before exiting Check the program id within the signal handler on polling xdp samples that were previously converted to libbpf usage. Avoid the situation of unloading the program that was not attached by sample that is exiting. Handle also the case where bpf_get_link_xdp_id didn't exit with an error but the xdp program was not found on an interface. Reported-by: Michal Papaj Reported-by: Jakub Spizewski Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- samples/bpf/xdp1_user.c | 24 +++++++++++++- samples/bpf/xdp_adjust_tail_user.c | 30 ++++++++++++++--- samples/bpf/xdp_redirect_cpu_user.c | 35 ++++++++++++++++---- samples/bpf/xdp_redirect_map_user.c | 49 +++++++++++++++++++++++++-- samples/bpf/xdp_redirect_user.c | 49 +++++++++++++++++++++++++-- samples/bpf/xdp_router_ipv4_user.c | 51 +++++++++++++++++++---------- samples/bpf/xdp_rxq_info_user.c | 33 ++++++++++++++++--- samples/bpf/xdp_sample_pkts_user.c | 34 ++++++++++++++++--- samples/bpf/xdp_tx_iptunnel_user.c | 28 ++++++++++++++-- samples/bpf/xdpsock_user.c | 23 ++++++++++++- 10 files changed, 308 insertions(+), 48 deletions(-) diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 505bce207165..6a64e93365e1 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -23,10 +23,22 @@ static int ifindex; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); exit(0); } @@ -74,11 +86,14 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *optstr = "FSN"; int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; char filename[256]; + int err; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -139,6 +154,13 @@ int main(int argc, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + poll_stats(map_fd, 2); return 0; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 049bddf7778b..07e1b9269e49 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -25,11 +25,24 @@ static int ifindex = -1; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static void int_exit(int sig) { - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given iface\n"); + else + printf("program on interface changed, not removing\n"); + } exit(0); } @@ -72,11 +85,14 @@ int main(int argc, char **argv) }; unsigned char opt_flags[256] = {}; const char *optstr = "i:T:SNFh"; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; int i, prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; char filename[256]; + int err; for (i = 0; i < strlen(optstr); i++) if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') @@ -146,9 +162,15 @@ int main(int argc, char **argv) return 1; } - poll_stats(map_fd, kill_after_s); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return 1; + } + prog_id = info.id; - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + poll_stats(map_fd, kill_after_s); + int_exit(0); return 0; } diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 0224afb55845..586b294d72d3 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -32,6 +32,7 @@ static const char *__doc__ = static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; +static __u32 prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int cpu_map_fd; @@ -68,11 +69,24 @@ static const struct option long_options[] = { static void int_exit(int sig) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAIL); + } + if (prog_id == curr_prog_id) { + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a given iface\n"); + } else { + printf("program on interface changed, not removing\n"); + } + } exit(EXIT_OK); } @@ -608,6 +622,8 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); bool use_separators = true; bool stress_mode = false; struct bpf_program *prog; @@ -617,9 +633,9 @@ int main(int argc, char **argv) int longindex = 0; int interval = 2; int add_cpu = -1; + int opt, err; int prog_fd; __u32 qsize; - int opt; /* Notice: choosing he queue size is very important with the * ixgbe driver, because it's driver page recycling trick is @@ -746,6 +762,13 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + stats_poll(interval, use_separators, prog_name, stress_mode); return EXIT_OK; } diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 470e1a7e8810..327226be5a06 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -29,15 +29,41 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; +static __u32 prog_id; +static __u32 dummy_prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - if (ifindex_out_xdp_dummy_attached) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface IN\n"); + else + printf("program on iface IN changed, not removing\n"); + + if (ifindex_out_xdp_dummy_attached) { + curr_prog_id = 0; + if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, + xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface OUT\n"); + else + printf("program on iface OUT changed, not removing\n"); + } exit(0); } @@ -82,6 +108,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; struct bpf_program *prog, *dummy_prog; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int prog_fd, dummy_prog_fd; const char *optstr = "FSN"; struct bpf_object *obj; @@ -153,6 +181,13 @@ int main(int argc, char **argv) return 1; } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + prog_id = info.id; + /* Loading dummy XDP prog on out-device */ if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { @@ -160,6 +195,14 @@ int main(int argc, char **argv) ifindex_out_xdp_dummy_attached = false; } + memset(&info, 0, sizeof(info)); + ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + dummy_prog_id = info.id; + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index be6058cda97c..a5d8ad3129ed 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -29,15 +29,41 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; +static __u32 prog_id; +static __u32 dummy_prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - if (ifindex_out_xdp_dummy_attached) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface IN\n"); + else + printf("program on iface IN changed, not removing\n"); + + if (ifindex_out_xdp_dummy_attached) { + curr_prog_id = 0; + if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, + xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface OUT\n"); + else + printf("program on iface OUT changed, not removing\n"); + } exit(0); } @@ -84,6 +110,8 @@ int main(int argc, char **argv) }; struct bpf_program *prog, *dummy_prog; int prog_fd, tx_port_map_fd, opt; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *optstr = "FSN"; struct bpf_object *obj; char filename[256]; @@ -154,6 +182,13 @@ int main(int argc, char **argv) return 1; } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + prog_id = info.id; + /* Loading dummy XDP prog on out-device */ if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { @@ -161,6 +196,14 @@ int main(int argc, char **argv) ifindex_out_xdp_dummy_attached = false; } + memset(&info, 0, sizeof(info)); + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + dummy_prog_id = info.id; + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 208d6a996478..79fe7bc26ab4 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -30,7 +30,8 @@ int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int total_ifindex; -int *ifindex_list; +static int *ifindex_list; +static __u32 *prog_id_list; char buf[8192]; static int lpm_map_fd; static int rxcnt_map_fd; @@ -41,23 +42,34 @@ static int tx_port_map_fd; static int get_route_table(int rtm_family); static void int_exit(int sig) { + __u32 prog_id = 0; int i = 0; - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + for (i = 0; i < total_ifindex; i++) { + if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) { + printf("bpf_get_link_xdp_id on iface %d failed\n", + ifindex_list[i]); + exit(1); + } + if (prog_id_list[i] == prog_id) + bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + else if (!prog_id) + printf("couldn't find a prog id on iface %d\n", + ifindex_list[i]); + else + printf("program on iface %d changed, not removing\n", + ifindex_list[i]); + prog_id = 0; + } exit(0); } static void close_and_exit(int sig) { - int i = 0; - close(sock); close(sock_arp); - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); - exit(0); + int_exit(0); } /* Get the mac address of the interface given interface name */ @@ -186,13 +198,8 @@ static void read_route(struct nlmsghdr *nh, int nll) route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); route.iface_name = if_indextoname(route.iface, route.iface_name); route.mac = getmac(route.iface_name); - if (route.mac == -1) { - int i = 0; - - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); - exit(0); - } + if (route.mac == -1) + int_exit(0); assert(bpf_map_update_elem(tx_port_map_fd, &route.iface, &route.iface, 0) == 0); if (rtm_family == AF_INET) { @@ -625,12 +632,14 @@ int main(int ac, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *optstr = "SF"; struct bpf_object *obj; char filename[256]; char **ifname_list; int prog_fd, opt; - int i = 1; + int err, i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; @@ -687,7 +696,7 @@ int main(int ac, char **argv) return 1; } - ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); + ifindex_list = (int *)calloc(total_ifindex, sizeof(int *)); for (i = 0; i < total_ifindex; i++) { ifindex_list[i] = if_nametoindex(ifname_list[i]); if (!ifindex_list[i]) { @@ -696,6 +705,7 @@ int main(int ac, char **argv) return 1; } } + prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); for (i = 0; i < total_ifindex; i++) { if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { printf("link set xdp fd failed\n"); @@ -706,6 +716,13 @@ int main(int ac, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id_list[i] = info.id; + memset(&info, 0, sizeof(info)); printf("Attached to %d\n", ifindex_list[i]); } signal(SIGINT, int_exit); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index e7a98c2a440f..1210f3b170f0 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -29,6 +29,7 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; +static __u32 prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -58,11 +59,24 @@ static const struct option long_options[] = { static void int_exit(int sig) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAIL); + } + if (prog_id == curr_prog_id) { + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a given iface\n"); + } else { + printf("program on interface changed, not removing\n"); + } + } exit(EXIT_OK); } @@ -447,6 +461,8 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int prog_fd, map_fd, opt, err; bool use_separators = true; struct config cfg = { 0 }; @@ -580,6 +596,13 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + stats_poll(interval, action, cfg_options); return EXIT_OK; } diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 62f34827c775..dc66345a929a 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -24,25 +24,49 @@ static int pmu_fds[MAX_CPUS], if_idx; static struct perf_event_mmap_page *headers[MAX_CPUS]; static char *if_name; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static int do_attach(int idx, int fd, const char *name) { + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int err; err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); - if (err < 0) + if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); + return err; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; return err; } static int do_detach(int idx, const char *name) { - int err; + __u32 curr_prog_id = 0; + int err = 0; - err = bpf_set_link_xdp_fd(idx, -1, 0); - if (err < 0) - printf("ERROR: failed to detach program from %s\n", name); + err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0); + if (err) { + printf("bpf_get_link_xdp_id failed\n"); + return err; + } + if (prog_id == curr_prog_id) { + err = bpf_set_link_xdp_fd(idx, -1, 0); + if (err < 0) + printf("ERROR: failed to detach prog from %s\n", name); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a %s\n", name); + } else { + printf("program on interface changed, not removing\n"); + } return err; } diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index e3de60930d27..4a1511eb7812 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -27,11 +27,24 @@ static int ifindex = -1; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; +static __u32 prog_id; static void int_exit(int sig) { - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given iface\n"); + else + printf("program on interface changed, not removing\n"); + } exit(0); } @@ -148,13 +161,15 @@ int main(int argc, char **argv) int min_port = 0, max_port = 0, vip2tnl_map_fd; const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; struct iptnl_info tnl = {}; struct bpf_object *obj; struct vip vip = {}; char filename[256]; int opt, prog_fd; - int i; + int i, err; tnl.family = AF_UNSPEC; vip.protocol = IPPROTO_TCP; @@ -276,6 +291,13 @@ int main(int argc, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + poll_stats(kill_after_s); bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 188723784768..f73055e0191f 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -76,6 +76,7 @@ static int opt_poll; static int opt_shared_packet_buffer; static int opt_interval = 1; static u32 opt_xdp_bind_flags; +static __u32 prog_id; struct xdp_umem_uqueue { u32 cached_prod; @@ -631,9 +632,20 @@ static void *poller(void *arg) static void int_exit(int sig) { + __u32 curr_prog_id = 0; + (void)sig; dump_stats(); - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAILURE); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); exit(EXIT_SUCCESS); } @@ -907,6 +919,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; int prog_fd, qidconf_map, xsks_map; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); struct bpf_object *obj; char xdp_filename[256]; struct bpf_map *map; @@ -953,6 +967,13 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return 1; + } + prog_id = info.id; + ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); From 2a1181540952ed0c2b91928f414c388ffe66d2b6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 1 Feb 2019 15:46:38 -0800 Subject: [PATCH 27/29] selftests/bpf: remove generated verifier/tests.h on 'make clean' 'make clean' is supposed to remove generated files. Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9de0a1ae8d65..383d2ff13fc7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -216,7 +216,8 @@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -$(OUTPUT)/test_verifier: $(OUTPUT)/verifier/tests.h +VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h +$(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H) $(OUTPUT)/test_verifier: CFLAGS += -I$(OUTPUT) VERIFIER_TEST_FILES := $(wildcard verifier/*.c) @@ -227,6 +228,7 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TEST_FILES) ls *.c 2> /dev/null | \ sed -e 's@\(.*\)@#include \"\1\"@'; \ echo '#endif' \ - ) > $(OUTPUT)/verifier/tests.h) + ) > $(VERIFIER_TESTS_H)) -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \ + $(VERIFIER_TESTS_H) From db0a4b3b6b83a081a9ec309cc8178e5c9b4706a5 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 1 Feb 2019 22:39:28 +0000 Subject: [PATCH 28/29] nfp: bpf: correct the behavior for shifts by zero Shifts by zero do nothing, and should be treated as nops. Even though compiler is not supposed to generate such instructions and manual written assembly is unlikely to have them, but they are legal instructions and have defined behavior. This patch correct existing shifts code-gen to make sure they do nothing when shift amount is zero except when the instruction is ALU32 for which high bits need to be cleared. For shift amount bigger than type size, already, NFP JIT back-end errors out for immediate shift and only low 5 bits will be taken into account for indirect shift which is the same as x86. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 30 +++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index aa3a2098a583..093b8ff80241 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1967,6 +1967,9 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) */ static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, @@ -2079,6 +2082,9 @@ static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) */ static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, shift_amt); @@ -2180,6 +2186,9 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) */ static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, shift_amt); @@ -2388,10 +2397,13 @@ static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { - /* Set signedness bit (MSB of result). */ - emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, reg_imm(0)); - emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, reg_b(dst), - SHF_SC_R_SHF, shift_amt); + if (shift_amt) { + /* Set signedness bit (MSB of result). */ + emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + } wrp_immed(nfp_prog, reg_both(dst + 1), 0); return 0; @@ -2433,12 +2445,10 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; - if (!insn->imm) - return 1; /* TODO: zero shift means indirect */ - - emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), - reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), - SHF_SC_L_SHF, insn->imm); + if (insn->imm) + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); return 0; From ac7a1717a2cbf74c84126998db4e9864ac1fa99b Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 1 Feb 2019 22:39:29 +0000 Subject: [PATCH 29/29] nfp: bpf: complete ALU32 logic shift supports The following ALU32 logic shift supports are missing: BPF_ALU | BPF_LSH | BPF_X BPF_ALU | BPF_RSH | BPF_X BPF_ALU | BPF_RSH | BPF_K For BPF_RSH | BPF_K, it could be implemented using NFP direct shift instruction. For the other BPF_X shifts, NFP indirect shifts sequences need to be used. Separate code-gen hook is assigned to each instruction to make the implementation clear. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 72 ++++++++++++++++++-- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 093b8ff80241..4d9d3806908e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2441,16 +2441,75 @@ static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return __ashr_imm(nfp_prog, dst, insn->imm); } +static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + return 0; +} + +static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shr_imm(nfp_prog, dst, insn->imm); +} + +static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shr_imm(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + return 0; +} + +static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + return 0; +} + static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - if (insn->imm) - emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), - reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), - SHF_SC_L_SHF, insn->imm); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + return __shl_imm(nfp_prog, dst, insn->imm); +} +static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shl_imm(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + shl_reg64_lt32_low(nfp_prog, dst, src); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); return 0; } @@ -3360,7 +3419,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_DIV | BPF_X] = div_reg, [BPF_ALU | BPF_DIV | BPF_K] = div_imm, [BPF_ALU | BPF_NEG] = neg_reg, + [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, + [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32,