From 2058b38371d0dbd84831a085db64b996d623f81a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 6 Jul 2018 14:28:14 -0700 Subject: [PATCH 01/61] bpftool: introduce cgroup tree command This commit introduces a new bpftool command: cgroup tree. The idea is to iterate over the whole cgroup tree and print all attached programs. I was debugging a bpf/systemd issue, and found, that there is no simple way to listen all bpf programs attached to cgroups. I did master something in bash, but after some time got tired of it, and decided, that adding a dedicated bpftool command could be a better idea. So, here it is: $ sudo ./bpftool cgroup tree CgroupPath ID AttachType AttachFlags Name /sys/fs/cgroup/system.slice/systemd-machined.service 18 ingress 17 egress /sys/fs/cgroup/system.slice/systemd-logind.service 20 ingress 19 egress /sys/fs/cgroup/system.slice/systemd-udevd.service 16 ingress 15 egress /sys/fs/cgroup/system.slice/systemd-journald.service 14 ingress 13 egress Signed-off-by: Roman Gushchin Acked-by: Jakub Kicinski Cc: Quentin Monnet Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/cgroup.c | 170 +++++++++++++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 16bee011e16c..ee7a9765c6b3 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -2,7 +2,12 @@ // Copyright (C) 2017 Facebook // Author: Roman Gushchin +#define _XOPEN_SOURCE 500 +#include #include +#include +#include +#include #include #include #include @@ -53,7 +58,8 @@ static enum bpf_attach_type parse_attach_type(const char *str) } static int show_bpf_prog(int id, const char *attach_type_str, - const char *attach_flags_str) + const char *attach_flags_str, + int level) { struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); @@ -78,7 +84,8 @@ static int show_bpf_prog(int id, const char *attach_type_str, jsonw_string_field(json_wtr, "name", info.name); jsonw_end_object(json_wtr); } else { - printf("%-8u %-15s %-15s %-15s\n", info.id, + printf("%s%-8u %-15s %-15s %-15s\n", level ? " " : "", + info.id, attach_type_str, attach_flags_str, info.name); @@ -88,7 +95,20 @@ static int show_bpf_prog(int id, const char *attach_type_str, return 0; } -static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +{ + __u32 prog_cnt = 0; + int ret; + + ret = bpf_prog_query(cgroup_fd, type, 0, NULL, NULL, &prog_cnt); + if (ret) + return -1; + + return prog_cnt; +} + +static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, + int level) { __u32 prog_ids[1024] = {0}; char *attach_flags_str; @@ -123,7 +143,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) for (iter = 0; iter < prog_cnt; iter++) show_bpf_prog(prog_ids[iter], attach_type_strings[type], - attach_flags_str); + attach_flags_str, level); return 0; } @@ -161,7 +181,7 @@ static int do_show(int argc, char **argv) * If we were able to get the show for at least one * attach type, let's return 0. */ - if (show_attached_bpf_progs(cgroup_fd, type) == 0) + if (show_attached_bpf_progs(cgroup_fd, type, 0) == 0) ret = 0; } @@ -173,6 +193,143 @@ exit: return ret; } +/* + * To distinguish nftw() errors and do_show_tree_fn() errors + * and avoid duplicating error messages, let's return -2 + * from do_show_tree_fn() in case of error. + */ +#define NFTW_ERR -1 +#define SHOW_TREE_FN_ERR -2 +static int do_show_tree_fn(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftw) +{ + enum bpf_attach_type type; + bool skip = true; + int cgroup_fd; + + if (typeflag != FTW_D) + return 0; + + cgroup_fd = open(fpath, O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s: %s", fpath, strerror(errno)); + return SHOW_TREE_FN_ERR; + } + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + int count = count_attached_bpf_progs(cgroup_fd, type); + + if (count < 0 && errno != EINVAL) { + p_err("can't query bpf programs attached to %s: %s", + fpath, strerror(errno)); + close(cgroup_fd); + return SHOW_TREE_FN_ERR; + } + if (count > 0) { + skip = false; + break; + } + } + + if (skip) { + close(cgroup_fd); + return 0; + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "cgroup", fpath); + jsonw_name(json_wtr, "programs"); + jsonw_start_array(json_wtr); + } else { + printf("%s\n", fpath); + } + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) + show_attached_bpf_progs(cgroup_fd, type, ftw->level); + + if (json_output) { + jsonw_end_array(json_wtr); + jsonw_end_object(json_wtr); + } + + close(cgroup_fd); + + return 0; +} + +static char *find_cgroup_root(void) +{ + struct mntent *mnt; + FILE *f; + + f = fopen("/proc/mounts", "r"); + if (f == NULL) + return NULL; + + while ((mnt = getmntent(f))) { + if (strcmp(mnt->mnt_type, "cgroup2") == 0) { + fclose(f); + return strdup(mnt->mnt_dir); + } + } + + fclose(f); + return NULL; +} + +static int do_show_tree(int argc, char **argv) +{ + char *cgroup_root; + int ret; + + switch (argc) { + case 0: + cgroup_root = find_cgroup_root(); + if (!cgroup_root) { + p_err("cgroup v2 isn't mounted"); + return -1; + } + break; + case 1: + cgroup_root = argv[0]; + break; + default: + p_err("too many parameters for cgroup tree"); + return -1; + } + + + if (json_output) + jsonw_start_array(json_wtr); + else + printf("%s\n" + "%-8s %-15s %-15s %-15s\n", + "CgroupPath", + "ID", "AttachType", "AttachFlags", "Name"); + + switch (nftw(cgroup_root, do_show_tree_fn, 1024, FTW_MOUNT)) { + case NFTW_ERR: + p_err("can't iterate over %s: %s", cgroup_root, + strerror(errno)); + ret = -1; + break; + case SHOW_TREE_FN_ERR: + ret = -1; + break; + default: + ret = 0; + } + + if (json_output) + jsonw_end_array(json_wtr); + + if (argc == 0) + free(cgroup_root); + + return ret; +} + static int do_attach(int argc, char **argv) { enum bpf_attach_type attach_type; @@ -289,6 +446,7 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } CGROUP\n" + " %s %s tree [CGROUP_ROOT]\n" " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" " %s %s detach CGROUP ATTACH_TYPE PROG\n" " %s %s help\n" @@ -298,6 +456,7 @@ static int do_help(int argc, char **argv) " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" "", + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); @@ -307,6 +466,7 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, { "list", do_show }, + { "tree", do_show_tree }, { "attach", do_attach }, { "detach", do_detach }, { "help", do_help }, From 7d31a0a168979e671e25073c492c29c42610e7e2 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 6 Jul 2018 14:28:15 -0700 Subject: [PATCH 02/61] bpftool: document cgroup tree command Describe cgroup tree command in the corresponding bpftool man page. Signed-off-by: Roman Gushchin Acked-by: Jakub Kicinski Cc: Quentin Monnet Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 7b0e6d453e92..edbe81534c6d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -15,12 +15,13 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **list** | **attach** | **detach** | **help** } + { **show** | **list** | **tree** | **attach** | **detach** | **help** } MAP COMMANDS ============= | **bpftool** **cgroup { show | list }** *CGROUP* +| **bpftool** **cgroup tree** [*CGROUP_ROOT*] | **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] | **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* | **bpftool** **cgroup help** @@ -39,6 +40,15 @@ DESCRIPTION Output will start with program ID followed by attach type, attach flags and program name. + **bpftool cgroup tree** [*CGROUP_ROOT*] + Iterate over all cgroups in *CGROUP_ROOT* and list all + attached programs. If *CGROUP_ROOT* is not specified, + bpftool uses cgroup v2 mountpoint. + + The output is similar to the output of cgroup show/list + commands: it starts with absolute cgroup path, followed by + program ID, attach type, attach flags and program name. + **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] Attach program *PROG* to the cgroup *CGROUP* with attach type *ATTACH_TYPE* and optional *ATTACH_FLAGS*. From 02000b55850deeadffe433e4b4930a8831f477de Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 6 Jul 2018 14:28:16 -0700 Subject: [PATCH 03/61] bpftool: add bash completion for cgroup tree command This commit adds a bash completion to the bpftool cgroup tree command. Signed-off-by: Roman Gushchin Cc: Jakub Kicinski Cc: Quentin Monnet Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/bash-completion/bpftool | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index fffd76f4998b..ce0bc0cda361 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -414,6 +414,10 @@ _bpftool() _filedir return 0 ;; + tree) + _filedir + return 0 + ;; attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ device bind4 bind6 post_bind4 post_bind6 connect4 \ @@ -455,7 +459,7 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'help attach detach \ - show list' -- "$cur" ) ) + show list tree' -- "$cur" ) ) ;; esac ;; From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:18 -0700 Subject: [PATCH 04/61] lib: reciprocal_div: implement the improved algorithm on the paper mentioned The new added "reciprocal_value_adv" implements the advanced version of the algorithm described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then requires u128 divide on host. The exception case could be easily handled before calling "reciprocal_value_adv". The advanced version requires more complex calculation to get the reciprocal multiplier and other control variables, but then could reduce the required emulation operations. It makes no sense to use this advanced version for host divide emulation, those extra complexities for calculating multiplier etc could completely waive our saving on emulation operations. However, it makes sense to use it for JIT divide code generation (for example eBPF JIT backends) for which we are willing to trade performance of JITed code with that of host. As shown by the following pseudo code, the required emulation operations could go down from 6 (the basic version) to 3 or 4. To use the result of "reciprocal_value_adv", suppose we want to calculate n/d, the C-style pseudo code will be the following, it could be easily changed to real code generation for other JIT targets. struct reciprocal_value_adv rvalue; u8 pre_shift, exp; // handle exception case. if (d >= (1U << 31)) { result = n >= d; return; } rvalue = reciprocal_value_adv(d, 32) exp = rvalue.exp; if (rvalue.is_wide_m && !(d & 1)) { // floor(log2(d & (2^32 -d))) pre_shift = fls(d & -d) - 1; rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); } else { pre_shift = 0; } // code generation starts. if (imm == 1U << exp) { result = n >> exp; } else if (rvalue.is_wide_m) { // pre_shift must be zero when reached here. t = (n * rvalue.m) >> 32; result = n - t; result >>= 1; result += t; result >>= rvalue.sh - 1; } else { if (pre_shift) result = n >> pre_shift; result = ((u64)result * rvalue.m) >> 32; result >>= rvalue.sh; } Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++ lib/reciprocal_div.c | 41 ++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index e031e9f2f9d8..585ce89c0f33 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -25,6 +25,9 @@ struct reciprocal_value { u8 sh1, sh2; }; +/* "reciprocal_value" and "reciprocal_divide" together implement the basic + * version of the algorithm described in Figure 4.1 of the paper. + */ struct reciprocal_value reciprocal_value(u32 d); static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) @@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) return (t + ((a - t) >> R.sh1)) >> R.sh2; } +struct reciprocal_value_adv { + u32 m; + u8 sh, exp; + bool is_wide_m; +}; + +/* "reciprocal_value_adv" implements the advanced version of the algorithm + * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose + * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The + * exception case could be easily handled before calling "reciprocal_value_adv". + * + * The advanced version requires more complex calculation to get the reciprocal + * multiplier and other control variables, but then could reduce the required + * emulation operations. + * + * It makes no sense to use this advanced version for host divide emulation, + * those extra complexities for calculating multiplier etc could completely + * waive our saving on emulation operations. + * + * However, it makes sense to use it for JIT divide code generation for which + * we are willing to trade performance of JITed code with that of host. As shown + * by the following pseudo code, the required emulation operations could go down + * from 6 (the basic version) to 3 or 4. + * + * To use the result of "reciprocal_value_adv", suppose we want to calculate + * n/d, the pseudo C code will be: + * + * struct reciprocal_value_adv rvalue; + * u8 pre_shift, exp; + * + * // handle exception case. + * if (d >= (1U << 31)) { + * result = n >= d; + * return; + * } + * + * rvalue = reciprocal_value_adv(d, 32) + * exp = rvalue.exp; + * if (rvalue.is_wide_m && !(d & 1)) { + * // floor(log2(d & (2^32 -d))) + * pre_shift = fls(d & -d) - 1; + * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); + * } else { + * pre_shift = 0; + * } + * + * // code generation starts. + * if (imm == 1U << exp) { + * result = n >> exp; + * } else if (rvalue.is_wide_m) { + * // pre_shift must be zero when reached here. + * t = (n * rvalue.m) >> 32; + * result = n - t; + * result >>= 1; + * result += t; + * result >>= rvalue.sh - 1; + * } else { + * if (pre_shift) + * result = n >> pre_shift; + * result = ((u64)result * rvalue.m) >> 32; + * result >>= rvalue.sh; + * } + */ +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); + #endif /* _LINUX_RECIPROCAL_DIV_H */ diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index fcb4ce682c6f..bf043258fa00 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d) return R; } EXPORT_SYMBOL(reciprocal_value); + +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) +{ + struct reciprocal_value_adv R; + u32 l, post_shift; + u64 mhigh, mlow; + + /* ceil(log2(d)) */ + l = fls(d - 1); + /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to + * be handled before calling "reciprocal_value_adv", please see the + * comment at include/linux/reciprocal_div.h. + */ + WARN(l == 32, + "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", + d, __func__); + post_shift = l; + mlow = 1ULL << (32 + l); + do_div(mlow, d); + mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); + do_div(mhigh, d); + + for (; post_shift > 0; post_shift--) { + u64 lo = mlow >> 1, hi = mhigh >> 1; + + if (lo >= hi) + break; + + mlow = lo; + mhigh = hi; + } + + R.m = (u32)mhigh; + R.sh = post_shift; + R.exp = l; + R.is_wide_m = mhigh > U32_MAX; + + return R; +} +EXPORT_SYMBOL(reciprocal_value_adv); From 662c54721d3a1e8950029cb6b0ed264d59847711 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:19 -0700 Subject: [PATCH 05/61] nfp: bpf: rename umin/umax to umin_src/umax_src The two fields are a copy of umin and umax info of bpf_insn->src_reg generated by verifier. Rename to make their meaning clear. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 12 ++++++------ drivers/net/ethernet/netronome/nfp/bpf/main.h | 10 +++++----- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 2 +- drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 33111739b210..4a629e9b5c0f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1772,8 +1772,8 @@ static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst, src; dst = insn->dst_reg * 2; - umin = meta->umin; - umax = meta->umax; + umin = meta->umin_src; + umax = meta->umax_src; if (umin == umax) return __shl_imm64(nfp_prog, dst, umin); @@ -1881,8 +1881,8 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst, src; dst = insn->dst_reg * 2; - umin = meta->umin; - umax = meta->umax; + umin = meta->umin_src; + umax = meta->umax_src; if (umin == umax) return __shr_imm64(nfp_prog, dst, umin); @@ -1995,8 +1995,8 @@ static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst, src; dst = insn->dst_reg * 2; - umin = meta->umin; - umax = meta->umax; + umin = meta->umin_src; + umax = meta->umax_src; if (umin == umax) return __ashr_imm64(nfp_prog, dst, umin); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 654fe7823e5e..5975a19c28cb 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -263,8 +263,8 @@ struct nfp_bpf_reg_state { * @func_id: function id for call instructions * @arg1: arg1 for call instructions * @arg2: arg2 for call instructions - * @umin: copy of core verifier umin_value. - * @umax: copy of core verifier umax_value. + * @umin_src: copy of core verifier umin_value for src opearnd. + * @umax_src: copy of core verifier umax_value for src operand. * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @flags: eBPF instruction extra optimization flags @@ -301,11 +301,11 @@ struct nfp_insn_meta { struct nfp_bpf_reg_state arg2; }; /* We are interested in range info for some operands, - * for example, the shift amount. + * for example, the shift amount which is kept in src operand. */ struct { - u64 umin; - u64 umax; + u64 umin_src; + u64 umax_src; }; }; unsigned int off; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 7eae4c0266f8..856a0003bb75 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -191,7 +191,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, meta->insn = prog[i]; meta->n = i; if (is_mbpf_indir_shift(meta)) - meta->umin = U64_MAX; + meta->umin_src = U64_MAX; list_add_tail(&meta->l, &nfp_prog->insns); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 4bfeba7b21b2..e862b739441f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -555,8 +555,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg; - meta->umin = min(meta->umin, sreg->umin_value); - meta->umax = max(meta->umax, sreg->umax_value); + meta->umin_src = min(meta->umin_src, sreg->umin_value); + meta->umax_src = max(meta->umax_src, sreg->umax_value); } return 0; From 33b94310586b761fd04de0ef951d2f5d764b9b2a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:20 -0700 Subject: [PATCH 06/61] nfp: bpf: copy range info for all operands of all ALU operations NFP verifier hook is coping range information of the shift amount for indirect shift operation so optimized shift sequences could be generated. We want to use range info to do more things. For example, to decide whether multiplication and divide are supported on the given range. This patch simply let NFP verifier hook to copy range info for all operands of all ALU operands. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.h | 33 +++++++------------ .../net/ethernet/netronome/nfp/bpf/offload.c | 4 ++- .../net/ethernet/netronome/nfp/bpf/verifier.c | 6 +++- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 5975a19c28cb..c985d0ac61a3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -265,6 +265,8 @@ struct nfp_bpf_reg_state { * @arg2: arg2 for call instructions * @umin_src: copy of core verifier umin_value for src opearnd. * @umax_src: copy of core verifier umax_value for src operand. + * @umin_dst: copy of core verifier umin_value for dst opearnd. + * @umax_dst: copy of core verifier umax_value for dst operand. * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @flags: eBPF instruction extra optimization flags @@ -300,12 +302,15 @@ struct nfp_insn_meta { struct bpf_reg_state arg1; struct nfp_bpf_reg_state arg2; }; - /* We are interested in range info for some operands, - * for example, the shift amount which is kept in src operand. + /* We are interested in range info for operands of ALU + * operations. For example, shift amount, multiplicand and + * multiplier etc. */ struct { u64 umin_src; u64 umax_src; + u64 umin_dst; + u64 umax_dst; }; }; unsigned int off; @@ -339,6 +344,11 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) return BPF_MODE(meta->insn.code); } +static inline bool is_mbpf_alu(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_ALU64 || mbpf_class(meta) == BPF_ALU; +} + static inline bool is_mbpf_load(const struct nfp_insn_meta *meta) { return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM); @@ -384,25 +394,6 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); } -static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta) -{ - u8 code = meta->insn.code; - bool is_alu, is_shift; - u8 opclass, opcode; - - opclass = BPF_CLASS(code); - is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU; - if (!is_alu) - return false; - - opcode = BPF_OP(code); - is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH; - if (!is_shift) - return false; - - return BPF_SRC(code) == BPF_X; -} - /** * struct nfp_prog - nfp BPF program * @bpf: backpointer to the bpf app priv structure diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 856a0003bb75..78f44c4d95b4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -190,8 +190,10 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, meta->insn = prog[i]; meta->n = i; - if (is_mbpf_indir_shift(meta)) + if (is_mbpf_alu(meta)) { meta->umin_src = U64_MAX; + meta->umin_dst = U64_MAX; + } list_add_tail(&meta->l, &nfp_prog->insns); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index e862b739441f..7bd9666bd8ff 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -551,12 +551,16 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (is_mbpf_xadd(meta)) return nfp_bpf_check_xadd(nfp_prog, meta, env); - if (is_mbpf_indir_shift(meta)) { + if (is_mbpf_alu(meta)) { const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = + cur_regs(env) + meta->insn.dst_reg; meta->umin_src = min(meta->umin_src, sreg->umin_value); meta->umax_src = max(meta->umax_src, sreg->umax_value); + meta->umin_dst = min(meta->umin_dst, dreg->umin_value); + meta->umax_dst = max(meta->umax_dst, dreg->umax_value); } return 0; From d3d23fdb4688de4421e94227c95b1d54b233f432 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:21 -0700 Subject: [PATCH 07/61] nfp: bpf: support u16 and u32 multiplications NFP supports u16 and u32 multiplication. Multiplication is done 8-bits per step, therefore we need 2 steps for u16 and 4 steps for u32. We also need one start instruction to initialize the sequence and one or two instructions to fetch the result depending on either you need the high halve of u32 multiplication. For ALU64, if either operand is beyond u32's value range, we reject it. One thing to note, if the source operand is BPF_K, then we need to check "imm" field directly, and we'd reject it if it is negative. Because for ALU64, "imm" (with s32 type) is expected to be sign extended to s64 which NFP mul doesn't support. For ALU32, it is fine for "imm" be negative though, because the result is 32-bits and here is no difference on the low halve of result for signed/unsigned mul, so we will get correct result. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 137 ++++++++++++++++++ drivers/net/ethernet/netronome/nfp/bpf/main.h | 5 + .../net/ethernet/netronome/nfp/bpf/verifier.c | 58 ++++++-- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 28 ++++ 4 files changed, 217 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 4a629e9b5c0f..f1b27c3a4347 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -415,6 +415,60 @@ emit_alu(struct nfp_prog *nfp_prog, swreg dst, reg.dst_lmextn, reg.src_lmextn); } +static void +__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, + enum mul_type type, enum mul_step step, u16 breg, bool swap, + bool wr_both, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_MUL_BASE | + FIELD_PREP(OP_MUL_A_SRC, areg) | + FIELD_PREP(OP_MUL_B_SRC, breg) | + FIELD_PREP(OP_MUL_STEP, step) | + FIELD_PREP(OP_MUL_DST_AB, dst_ab) | + FIELD_PREP(OP_MUL_SW, swap) | + FIELD_PREP(OP_MUL_TYPE, type) | + FIELD_PREP(OP_MUL_WR_AB, wr_both) | + FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, + enum mul_step step, swreg rreg) +{ + struct nfp_insn_ur_regs reg; + u16 areg; + int err; + + if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { + nfp_prog->error = -EINVAL; + return; + } + + if (step == MUL_LAST || step == MUL_LAST_2) { + /* When type is step and step Number is LAST or LAST2, left + * source is used as destination. + */ + err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); + areg = reg.dst; + } else { + err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); + areg = reg.areg; + } + + if (err) { + nfp_prog->error = err; + return; + } + + __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); +} + static void __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, @@ -1380,6 +1434,65 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) SHF_SC_R_ROT, 16); } +static void +wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg, bool gen_high_half) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); + if (gen_high_half) + emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, + reg_none()); + else + wrp_immed(nfp_prog, dst_hi, 0); +} + +static void +wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); +} + +static int +wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + bool gen_high_half, bool ropnd_from_reg) +{ + swreg multiplier, multiplicand, dst_hi, dst_lo; + const struct bpf_insn *insn = &meta->insn; + u32 lopnd_max, ropnd_max; + u8 dst_reg; + + dst_reg = insn->dst_reg; + multiplicand = reg_a(dst_reg * 2); + dst_hi = reg_both(dst_reg * 2 + 1); + dst_lo = reg_both(dst_reg * 2); + lopnd_max = meta->umax_dst; + if (ropnd_from_reg) { + multiplier = reg_b(insn->src_reg * 2); + ropnd_max = meta->umax_src; + } else { + u32 imm = insn->imm; + + multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + ropnd_max = imm; + } + if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) + wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier, + gen_high_half); + else + wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier); + + return 0; +} + static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); @@ -1684,6 +1797,16 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, true); +} + +static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, false); +} + static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -2097,6 +2220,16 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); } +static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, true); +} + +static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, false); +} + static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { u8 dst = meta->insn.dst_reg * 2; @@ -2848,6 +2981,8 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, + [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, [BPF_ALU64 | BPF_NEG] = neg_reg64, [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, @@ -2867,6 +3002,8 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_ADD | BPF_K] = add_imm, [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, + [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, [BPF_ALU | BPF_NEG] = neg_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index c985d0ac61a3..c10079b1a312 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -394,6 +394,11 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); } +static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL; +} + /** * struct nfp_prog - nfp BPF program * @bpf: backpointer to the bpf app priv structure diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 7bd9666bd8ff..30d4f1580693 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -516,6 +516,51 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); } +static int +nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *sreg = + cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = + cur_regs(env) + meta->insn.dst_reg; + + meta->umin_src = min(meta->umin_src, sreg->umin_value); + meta->umax_src = max(meta->umax_src, sreg->umax_value); + meta->umin_dst = min(meta->umin_dst, dreg->umin_value); + meta->umax_dst = max(meta->umax_dst, dreg->umax_value); + + /* NFP supports u16 and u32 multiplication. + * + * For ALU64, if either operand is beyond u32's value range, we reject + * it. One thing to note, if the source operand is BPF_K, then we need + * to check "imm" field directly, and we'd reject it if it is negative. + * Because for ALU64, "imm" (with s32 type) is expected to be sign + * extended to s64 which NFP mul doesn't support. + * + * For ALU32, it is fine for "imm" be negative though, because the + * result is 32-bits and there is no difference on the low halve of + * the result for signed/unsigned mul, so we will get correct result. + */ + if (is_mbpf_mul(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "multiplier is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) { + pr_vlog(env, "multiplicand is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_class(meta) == BPF_ALU64 && + mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n"); + return -EINVAL; + } + } + + return 0; +} + static int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { @@ -551,17 +596,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (is_mbpf_xadd(meta)) return nfp_bpf_check_xadd(nfp_prog, meta, env); - if (is_mbpf_alu(meta)) { - const struct bpf_reg_state *sreg = - cur_regs(env) + meta->insn.src_reg; - const struct bpf_reg_state *dreg = - cur_regs(env) + meta->insn.dst_reg; - - meta->umin_src = min(meta->umin_src, sreg->umin_value); - meta->umax_src = max(meta->umax_src, sreg->umax_value); - meta->umin_dst = min(meta->umin_dst, dreg->umin_value); - meta->umax_dst = max(meta->umax_dst, dreg->umax_value); - } + if (is_mbpf_alu(meta)) + return nfp_bpf_check_alu(nfp_prog, meta, env); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index f6677bc9875a..cdc4e065f6f5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -426,4 +426,32 @@ static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset) return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR; } +enum mul_type { + MUL_TYPE_START = 0x00, + MUL_TYPE_STEP_24x8 = 0x01, + MUL_TYPE_STEP_16x16 = 0x02, + MUL_TYPE_STEP_32x32 = 0x03, +}; + +enum mul_step { + MUL_STEP_1 = 0x00, + MUL_STEP_NONE = MUL_STEP_1, + MUL_STEP_2 = 0x01, + MUL_STEP_3 = 0x02, + MUL_STEP_4 = 0x03, + MUL_LAST = 0x04, + MUL_LAST_2 = 0x05, +}; + +#define OP_MUL_BASE 0x0f800000000ULL +#define OP_MUL_A_SRC 0x000000003ffULL +#define OP_MUL_B_SRC 0x000000ffc00ULL +#define OP_MUL_STEP 0x00000700000ULL +#define OP_MUL_DST_AB 0x00000800000ULL +#define OP_MUL_SW 0x00040000000ULL +#define OP_MUL_TYPE 0x00180000000ULL +#define OP_MUL_WR_AB 0x20000000000ULL +#define OP_MUL_SRC_LMEXTN 0x40000000000ULL +#define OP_MUL_DST_LMEXTN 0x80000000000ULL + #endif From 2a952b03d1a011e2e7ddc9ca59cbb21df7dc3525 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:22 -0700 Subject: [PATCH 08/61] nfp: bpf: support u32 divide using reciprocal_div.h NFP doesn't have integer divide instruction, this patch use reciprocal algorithm (the basic one, reciprocal_div) to emulate it. For each u32 divide, we would need 11 instructions to finish the operation. 7 (for multiplication) + 4 (various ALUs) = 11 Given NFP only supports multiplication no bigger than u32, we'd require divisor and dividend no bigger than that as well. Also eBPF doesn't support signed divide and has enforced this on C language level by failing compilation. However LLVM assembler hasn't enforced this, so it is possible for negative constant to leak in as a BPF_K operand through assembly code, we reject such cases as well. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 58 ++++++++++++++++++- drivers/net/ethernet/netronome/nfp/bpf/main.h | 5 ++ .../net/ethernet/netronome/nfp/bpf/verifier.c | 31 ++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index f1b27c3a4347..7c9ee3d85907 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -34,10 +34,11 @@ #define pr_fmt(fmt) "NFP net bpf: " fmt #include -#include #include #include +#include #include +#include #include #include "main.h" @@ -1493,6 +1494,32 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return 0; } +static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) +{ + swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); + struct reciprocal_value rvalue; + swreg tmp_b = imm_b(nfp_prog); + swreg magic; + + if (imm > U32_MAX) { + wrp_immed(nfp_prog, dst_both, 0); + return 0; + } + + rvalue = reciprocal_value(imm); + magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); + wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, magic, + true); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, rvalue.sh1); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, rvalue.sh2); + + return 0; +} + static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); @@ -1807,6 +1834,21 @@ static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_mul(nfp_prog, meta, true, false); } +static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm); +} + +static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* NOTE: verifier hook has rejected cases for which verifier doesn't + * know whether the source operand is constant or not. + */ + return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src); +} + static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -2230,6 +2272,16 @@ static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_mul(nfp_prog, meta, false, false); } +static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_reg64(nfp_prog, meta); +} + +static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_imm64(nfp_prog, meta); +} + static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { u8 dst = meta->insn.dst_reg * 2; @@ -2983,6 +3035,8 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, + [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, + [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, [BPF_ALU64 | BPF_NEG] = neg_reg64, [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, @@ -3004,6 +3058,8 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, + [BPF_ALU | BPF_DIV | BPF_X] = div_reg, + [BPF_ALU | BPF_DIV | BPF_K] = div_imm, [BPF_ALU | BPF_NEG] = neg_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index c10079b1a312..9845c1a2d4c2 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -399,6 +399,11 @@ static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL; } +static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV; +} + /** * struct nfp_prog - nfp BPF program * @bpf: backpointer to the bpf app priv structure diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 30d4f1580693..49ba0d645d36 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -558,6 +558,37 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } } + /* NFP doesn't have divide instructions, we support divide by constant + * through reciprocal multiplication. Given NFP support multiplication + * no bigger than u32, we'd require divisor and dividend no bigger than + * that as well. + * + * Also eBPF doesn't support signed divide and has enforced this on C + * language level by failing compilation. However LLVM assembler hasn't + * enforced this, so it is possible for negative constant to leak in as + * a BPF_K operand through assembly code, we reject such cases as well. + */ + if (is_mbpf_div(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "dividend is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X) { + if (meta->umin_src != meta->umax_src) { + pr_vlog(env, "divisor is not constant\n"); + return -EINVAL; + } + if (meta->umax_src > U32_MAX) { + pr_vlog(env, "divisor is not within u32 value range\n"); + return -EINVAL; + } + } + if (mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "divide by negative constant is not supported\n"); + return -EINVAL; + } + } + return 0; } From 9fb410a89e8fa92f8ebc7aa95563442a14da21eb Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:23 -0700 Subject: [PATCH 09/61] nfp: bpf: migrate to advanced reciprocal divide in reciprocal_div.h As we are doing JIT, we would want to use the advanced version of the reciprocal divide (reciprocal_value_adv) to trade performance with host. We could reduce the required ALU instructions from 4 to 2 or 1. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 64 ++++++++++++++++---- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 7c9ee3d85907..1d9e36835404 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1497,8 +1497,8 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) { swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); - struct reciprocal_value rvalue; - swreg tmp_b = imm_b(nfp_prog); + struct reciprocal_value_adv rvalue; + u8 pre_shift, exp; swreg magic; if (imm > U32_MAX) { @@ -1506,16 +1506,58 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) return 0; } - rvalue = reciprocal_value(imm); + /* NOTE: because we are using "reciprocal_value_adv" which doesn't + * support "divisor > (1u << 31)", we need to JIT separate NFP sequence + * to handle such case which actually equals to the result of unsigned + * comparison "dst >= imm" which could be calculated using the following + * NFP sequence: + * + * alu[--, dst, -, imm] + * immed[imm, 0] + * alu[dst, imm, +carry, 0] + * + */ + if (imm > 1U << 31) { + swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + + emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b); + wrp_immed(nfp_prog, imm_a(nfp_prog), 0); + emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C, + reg_imm(0)); + return 0; + } + + rvalue = reciprocal_value_adv(imm, 32); + exp = rvalue.exp; + if (rvalue.is_wide_m && !(imm & 1)) { + pre_shift = fls(imm & -imm) - 1; + rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift); + } else { + pre_shift = 0; + } magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); - wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, magic, - true); - emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b); - emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, - SHF_SC_R_SHF, rvalue.sh1); - emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b); - emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, - SHF_SC_R_SHF, rvalue.sh2); + if (imm == 1U << exp) { + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, exp); + } else if (rvalue.is_wide_m) { + wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, + magic, true); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, 1); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, rvalue.sh - 1); + } else { + if (pre_shift) + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, pre_shift); + wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, rvalue.sh); + } return 0; } From 7479efc71f6da5fde6c67d91380ebc0fb455c53d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:42:55 -0700 Subject: [PATCH 10/61] selftests/bpf: remove duplicated word from test offloads Trivial removal of duplicated "mode" in error message. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index be800d0e7a84..a257e4b08392 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -830,7 +830,7 @@ try: check_extack_nsim(err, "program loaded with different flags.", args) ret, _, err = sim.unset_xdp("", force=True, fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode mode") + fail(ret == 0, "Removed program with a bad mode") check_extack_nsim(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") From 219f860d2a11b285ac2e880ca4a73e0d36811c0c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:42:56 -0700 Subject: [PATCH 11/61] selftests/bpf: add Error: prefix in check_extack helper Currently the test only checks errors, not warnings, so save typing and prefix the extack messages with "Error:" inside the check helper. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index a257e4b08392..f8d9bd81d9a4 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -547,11 +547,11 @@ def check_extack(output, reference, args): if skip_extack: return lines = output.split("\n") - comp = len(lines) >= 2 and lines[1] == reference + comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference fail(not comp, "Missing or incorrect netlink extack message") def check_extack_nsim(output, reference, args): - check_extack(output, "Error: netdevsim: " + reference, args) + check_extack(output, "netdevsim: " + reference, args) def check_no_extack(res, needle): fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), @@ -654,7 +654,7 @@ try: ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False, include_stderr=True) fail(ret == 0, "TC filter loaded without enabling TC offloads") - check_extack(err, "Error: TC offload is disabled on net device.", args) + check_extack(err, "TC offload is disabled on net device.", args) sim.wait_for_flush() sim.set_ethtool_tc_offloads(True) @@ -694,7 +694,7 @@ try: skip_sw=True, fail=False, include_stderr=True) fail(ret == 0, "Offloaded a filter to chain other than 0") - check_extack(err, "Error: Driver supports only offload of chain 0.", args) + check_extack(err, "Driver supports only offload of chain 0.", args) sim.tc_flush_filters() start_test("Test TC replace...") From 8d1fc3de3d9f9bda0d8ec719d8686e9c5b432573 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:42:57 -0700 Subject: [PATCH 12/61] tools: bpftool: refactor argument parsing for prog load Add a new macro for printing more informative message than straight usage() when parameters are missing, and use it for prog do_load(). Save the object and pin path argument to variables for clarity. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.h | 15 +++++++++++++++ tools/bpf/bpftool/prog.c | 11 +++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d39f7ef01d23..15b6c49ae533 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -50,6 +50,21 @@ #define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) #define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; }) +#define GET_ARG() ({ argc--; *argv++; }) +#define REQ_ARGS(cnt) \ + ({ \ + int _cnt = (cnt); \ + bool _res; \ + \ + if (argc < _cnt) { \ + p_err("'%s' needs at least %d arguments, %d found", \ + argv[-1], _cnt, argc); \ + _res = false; \ + } else { \ + _res = true; \ + } \ + _res; \ + }) #define ERR_MAX_LEN 1024 diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a740da99d477..a5ef46c59029 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -681,18 +681,21 @@ static int do_pin(int argc, char **argv) static int do_load(int argc, char **argv) { + const char *objfile, *pinfile; struct bpf_object *obj; int prog_fd; - if (argc != 2) - usage(); + if (!REQ_ARGS(2)) + return -1; + objfile = GET_ARG(); + pinfile = GET_ARG(); - if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { + if (bpf_prog_load(objfile, BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { p_err("failed to load program"); return -1; } - if (do_pin_fd(prog_fd, argv[1])) + if (do_pin_fd(prog_fd, pinfile)) goto err_close_obj; if (json_output) From ba6dd679a3e81af023ec091c2fb7c82003a27316 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:42:58 -0700 Subject: [PATCH 13/61] tools: bpftool: add support for loading programs for offload Extend the bpftool prog load command to also accept "dev" parameter, which will allow us to load programs onto devices. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- .../bpftool/Documentation/bpftool-prog.rst | 6 ++-- tools/bpf/bpftool/bash-completion/bpftool | 23 ++++++++++-- tools/bpf/bpftool/prog.c | 35 +++++++++++++++++-- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 43d34a5c3ec5..41723c6acaa6 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -24,7 +24,7 @@ MAP COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog load** *OBJ* *FILE* +| **bpftool** **prog load** *OBJ* *FILE* [**dev** *NAME*] | **bpftool** **prog help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } @@ -64,8 +64,10 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. - **bpftool prog load** *OBJ* *FILE* + **bpftool prog load** *OBJ* *FILE* [**dev** *NAME*] Load bpf program from binary *OBJ* and pin as *FILE*. + If **dev** *NAME* is specified program will be loaded onto + given networking device (offload). Note: *FILE* must be located in *bpffs* mount. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index ce0bc0cda361..238c2f80092a 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -99,6 +99,12 @@ _bpftool_get_prog_tags() command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) } +_sysfs_get_netdevs() +{ + COMPREPLY+=( $( compgen -W "$( ls /sys/class/net 2>/dev/null )" -- \ + "$cur" ) ) +} + # For bpftool map update: retrieve type of the map to update. _bpftool_map_update_map_type() { @@ -262,8 +268,21 @@ _bpftool() return 0 ;; load) - _filedir - return 0 + if [[ ${#words[@]} -lt 6 ]]; then + _filedir + return 0 + fi + + case $prev in + dev) + _sysfs_get_netdevs + return 0 + ;; + *) + _bpftool_once_attr 'dev' + return 0 + ;; + esac ;; *) [[ $prev == $object ]] && \ diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a5ef46c59029..21c74de7156f 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -681,6 +682,9 @@ static int do_pin(int argc, char **argv) static int do_load(int argc, char **argv) { + struct bpf_prog_load_attr attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; const char *objfile, *pinfile; struct bpf_object *obj; int prog_fd; @@ -690,7 +694,34 @@ static int do_load(int argc, char **argv) objfile = GET_ARG(); pinfile = GET_ARG(); - if (bpf_prog_load(objfile, BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { + while (argc) { + if (is_prefix(*argv, "dev")) { + NEXT_ARG(); + + if (attr.ifindex) { + p_err("offload device already specified"); + return -1; + } + if (!REQ_ARGS(1)) + return -1; + + attr.ifindex = if_nametoindex(*argv); + if (!attr.ifindex) { + p_err("unrecognized netdevice '%s': %s", + *argv, strerror(errno)); + return -1; + } + NEXT_ARG(); + } else { + p_err("expected no more arguments or 'dev', got: '%s'?", + *argv); + return -1; + } + } + + attr.file = objfile; + + if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { p_err("failed to load program"); return -1; } @@ -722,7 +753,7 @@ static int do_help(int argc, char **argv) " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" - " %s %s load OBJ FILE\n" + " %s %s load OBJ FILE [dev NAME]\n" " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" From b60df2a0e11fcd24186c312b0307ab8494031e27 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:42:59 -0700 Subject: [PATCH 14/61] tools: libbpf: expose the prog type guessing from section name logic libbpf can guess program type based on ELF section names. As libbpf becomes more popular its association between section name strings and types becomes more of a standard. Allow libbpf users to use the same logic for matching strings to types, e.g. when the string originates from command line. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++------------------ tools/lib/bpf/libbpf.h | 3 +++ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 38ed3e92e393..42f31eff5f3f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2081,23 +2081,31 @@ static const struct { #undef BPF_S_PROG_SEC #undef BPF_SA_PROG_SEC -static int bpf_program__identify_section(struct bpf_program *prog) +int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) { int i; - if (!prog->section_name) - goto err; + if (!name) + return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) - if (strncmp(prog->section_name, section_names[i].sec, - section_names[i].len) == 0) - return i; + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + *prog_type = section_names[i].prog_type; + *expected_attach_type = section_names[i].expected_attach_type; + return 0; + } + return -EINVAL; +} -err: - pr_warning("failed to guess program type based on section name %s\n", - prog->section_name); - - return -1; +static int +bpf_program__identify_section(struct bpf_program *prog, + enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + return libbpf_prog_type_by_name(prog->section_name, prog_type, + expected_attach_type); } int bpf_map__fd(struct bpf_map *map) @@ -2230,7 +2238,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, enum bpf_prog_type prog_type; struct bpf_object *obj; struct bpf_map *map; - int section_idx; int err; if (!attr) @@ -2252,14 +2259,14 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, prog->prog_ifindex = attr->ifindex; expected_attach_type = attr->expected_attach_type; if (prog_type == BPF_PROG_TYPE_UNSPEC) { - section_idx = bpf_program__identify_section(prog); - if (section_idx < 0) { + err = bpf_program__identify_section(prog, &prog_type, + &expected_attach_type); + if (err < 0) { + pr_warning("failed to guess program type based on section name %s\n", + prog->section_name); bpf_object__close(obj); return -EINVAL; } - prog_type = section_names[section_idx].prog_type; - expected_attach_type = - section_names[section_idx].expected_attach_type; } bpf_program__set_type(prog, prog_type); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 564f4be9bae0..d1ce5c828e2e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -92,6 +92,9 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv, bpf_object_clear_priv_t clear_priv); void *bpf_object__priv(struct bpf_object *prog); +int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type); + /* Accessors of bpf_program */ struct bpf_program; struct bpf_program *bpf_program__next(struct bpf_program *prog, From 49f2cba3e57a4d71e3e7001cc2934b563ee495f4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:00 -0700 Subject: [PATCH 15/61] tools: bpftool: allow users to specify program type for prog load Sometimes program section names don't match with libbpf's expectation. In particular XDP's default section names differ between libbpf and iproute2. Allow users to pass program type on command line. Name the types like the libbpf expected section names. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- .../bpftool/Documentation/bpftool-prog.rst | 15 ++++++- tools/bpf/bpftool/bash-completion/bpftool | 6 +++ tools/bpf/bpftool/prog.c | 44 +++++++++++++++++-- 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 41723c6acaa6..e53e1ad2caf0 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -24,10 +24,19 @@ MAP COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog load** *OBJ* *FILE* [**dev** *NAME*] +| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**dev** *NAME*] | **bpftool** **prog help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *TYPE* := { +| **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | +| **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** | +| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | +| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | +| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | +| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** +| } + DESCRIPTION =========== @@ -64,8 +73,10 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. - **bpftool prog load** *OBJ* *FILE* [**dev** *NAME*] + **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**dev** *NAME*] Load bpf program from binary *OBJ* and pin as *FILE*. + **type** is optional, if not specified program type will be + inferred from section names. If **dev** *NAME* is specified program will be loaded onto given networking device (offload). diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 238c2f80092a..caf8711993be 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -274,11 +274,17 @@ _bpftool() fi case $prev in + type) + COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \ + "$cur" ) ) + return 0 + ;; dev) _sysfs_get_netdevs return 0 ;; *) + _bpftool_once_attr 'type' _bpftool_once_attr 'dev' return 0 ;; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 21c74de7156f..98695585bbb6 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -688,6 +688,7 @@ static int do_load(int argc, char **argv) const char *objfile, *pinfile; struct bpf_object *obj; int prog_fd; + int err; if (!REQ_ARGS(2)) return -1; @@ -695,7 +696,37 @@ static int do_load(int argc, char **argv) pinfile = GET_ARG(); while (argc) { - if (is_prefix(*argv, "dev")) { + if (is_prefix(*argv, "type")) { + char *type; + + NEXT_ARG(); + + if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) { + p_err("program type already specified"); + return -1; + } + if (!REQ_ARGS(1)) + return -1; + + /* Put a '/' at the end of type to appease libbpf */ + type = malloc(strlen(*argv) + 2); + if (!type) { + p_err("mem alloc failed"); + return -1; + } + *type = 0; + strcat(type, *argv); + strcat(type, "/"); + + err = libbpf_prog_type_by_name(type, &attr.prog_type, + &attr.expected_attach_type); + free(type); + if (err < 0) { + p_err("unknown program type '%s'", *argv); + return err; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "dev")) { NEXT_ARG(); if (attr.ifindex) { @@ -713,7 +744,7 @@ static int do_load(int argc, char **argv) } NEXT_ARG(); } else { - p_err("expected no more arguments or 'dev', got: '%s'?", + p_err("expected no more arguments, 'type' or 'dev', got: '%s'?", *argv); return -1; } @@ -753,10 +784,17 @@ static int do_help(int argc, char **argv) " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" - " %s %s load OBJ FILE [dev NAME]\n" + " %s %s load OBJ FILE [type TYPE] [dev NAME]\n" " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" + " TYPE := { socket | kprobe | kretprobe | classifier | action |\n" + " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n" + " cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n" + " lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n" + " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" + " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" + " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], From f83fb22c6c68fdbc98c76291c9e12a40d1eb7ca5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:01 -0700 Subject: [PATCH 16/61] tools: libbpf: recognize offload neutral maps Add helper to libbpf for recognizing maps which should not have ifindex set when program is loaded. These maps only contain host metadata and therefore are not marked for offload, e.g. the perf event map. Use this helper in bpf_prog_load_xattr(). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 8 +++++++- tools/lib/bpf/libbpf.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 42f31eff5f3f..30992305f4c1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2154,6 +2154,11 @@ void *bpf_map__priv(struct bpf_map *map) return map ? map->priv : ERR_PTR(-EINVAL); } +bool bpf_map__is_offload_neutral(struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +} + void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { map->map_ifindex = ifindex; @@ -2278,7 +2283,8 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } bpf_map__for_each(map, obj) { - map->map_ifindex = attr->ifindex; + if (!bpf_map__is_offload_neutral(map)) + map->map_ifindex = attr->ifindex; } if (!first_prog) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index d1ce5c828e2e..749acf58a5da 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -255,6 +255,7 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); void *bpf_map__priv(struct bpf_map *map); +bool bpf_map__is_offload_neutral(struct bpf_map *map); void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); int bpf_map__pin(struct bpf_map *map, const char *path); From 07f2d4eac2a850fc9649b27aa935cdcd263fb1ff Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:02 -0700 Subject: [PATCH 17/61] tools: libbpf: add extended attributes version of bpf_object__open() Similarly to bpf_prog_load() users of bpf_object__open() may need to specify the expected program type. Program type is needed at open to avoid the kernel version check for program types which don't require it. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 26 ++++++++++++++++++++------ tools/lib/bpf/libbpf.h | 6 ++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 30992305f4c1..06cd534d2fba 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1520,15 +1520,26 @@ out: return ERR_PTR(err); } -struct bpf_object *bpf_object__open(const char *path) +struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) { /* param validation */ - if (!path) + if (!attr->file) return NULL; - pr_debug("loading %s\n", path); + pr_debug("loading %s\n", attr->file); - return __bpf_object__open(path, NULL, 0, true); + return __bpf_object__open(attr->file, NULL, 0, + bpf_prog_type__needs_kver(attr->prog_type)); +} + +struct bpf_object *bpf_object__open(const char *path) +{ + struct bpf_object_open_attr attr = { + .file = path, + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + + return bpf_object__open_xattr(&attr); } struct bpf_object *bpf_object__open_buffer(void *obj_buf, @@ -2238,6 +2249,10 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_object **pobj, int *prog_fd) { + struct bpf_object_open_attr open_attr = { + .file = attr->file, + .prog_type = attr->prog_type, + }; struct bpf_program *prog, *first_prog = NULL; enum bpf_attach_type expected_attach_type; enum bpf_prog_type prog_type; @@ -2250,8 +2265,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, if (!attr->file) return -EINVAL; - obj = __bpf_object__open(attr->file, NULL, 0, - bpf_prog_type__needs_kver(attr->prog_type)); + obj = bpf_object__open_xattr(&open_attr); if (IS_ERR_OR_NULL(obj)) return -ENOENT; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 749acf58a5da..e911ad32d02e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -66,7 +66,13 @@ void libbpf_set_print(libbpf_print_fn_t warn, /* Hide internal to user */ struct bpf_object; +struct bpf_object_open_attr { + const char *file; + enum bpf_prog_type prog_type; +}; + struct bpf_object *bpf_object__open(const char *path); +struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr); struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); From c8406848badd0a0b040b0d286e612678662a2ab3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:03 -0700 Subject: [PATCH 18/61] tools: bpftool: reimplement bpf_prog_load() for prog load bpf_prog_load() is a very useful helper but it doesn't give us full flexibility of modifying the BPF objects before loading. Open code bpf_prog_load() in bpftool so we can add extra logic in following commits. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 61 +++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 98695585bbb6..2bdd5ecd1aad 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -43,6 +43,8 @@ #include #include +#include + #include #include @@ -682,17 +684,20 @@ static int do_pin(int argc, char **argv) static int do_load(int argc, char **argv) { - struct bpf_prog_load_attr attr = { + enum bpf_attach_type expected_attach_type; + struct bpf_object_open_attr attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; - const char *objfile, *pinfile; + struct bpf_program *prog; struct bpf_object *obj; - int prog_fd; + struct bpf_map *map; + const char *pinfile; + __u32 ifindex = 0; int err; if (!REQ_ARGS(2)) return -1; - objfile = GET_ARG(); + attr.file = GET_ARG(); pinfile = GET_ARG(); while (argc) { @@ -719,7 +724,7 @@ static int do_load(int argc, char **argv) strcat(type, "/"); err = libbpf_prog_type_by_name(type, &attr.prog_type, - &attr.expected_attach_type); + &expected_attach_type); free(type); if (err < 0) { p_err("unknown program type '%s'", *argv); @@ -729,15 +734,15 @@ static int do_load(int argc, char **argv) } else if (is_prefix(*argv, "dev")) { NEXT_ARG(); - if (attr.ifindex) { + if (ifindex) { p_err("offload device already specified"); return -1; } if (!REQ_ARGS(1)) return -1; - attr.ifindex = if_nametoindex(*argv); - if (!attr.ifindex) { + ifindex = if_nametoindex(*argv); + if (!ifindex) { p_err("unrecognized netdevice '%s': %s", *argv, strerror(errno)); return -1; @@ -750,14 +755,44 @@ static int do_load(int argc, char **argv) } } - attr.file = objfile; - - if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { - p_err("failed to load program"); + obj = bpf_object__open_xattr(&attr); + if (IS_ERR_OR_NULL(obj)) { + p_err("failed to open object file"); return -1; } - if (do_pin_fd(prog_fd, pinfile)) + prog = bpf_program__next(NULL, obj); + if (!prog) { + p_err("object file doesn't contain any bpf program"); + goto err_close_obj; + } + + bpf_program__set_ifindex(prog, ifindex); + if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { + const char *sec_name = bpf_program__title(prog, false); + + err = libbpf_prog_type_by_name(sec_name, &attr.prog_type, + &expected_attach_type); + if (err < 0) { + p_err("failed to guess program type based on section name %s\n", + sec_name); + goto err_close_obj; + } + } + bpf_program__set_type(prog, attr.prog_type); + bpf_program__set_expected_attach_type(prog, expected_attach_type); + + bpf_map__for_each(map, obj) + if (!bpf_map__is_offload_neutral(map)) + bpf_map__set_ifindex(map, ifindex); + + err = bpf_object__load(obj); + if (err) { + p_err("failed to load object file"); + goto err_close_obj; + } + + if (do_pin_fd(bpf_program__fd(prog), pinfile)) goto err_close_obj; if (json_output) From 8d13406c02f9c38f106416e1dbe0e68059b9f59a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:04 -0700 Subject: [PATCH 19/61] tools: libbpf: move library error code into a separate file libbpf_strerror() depends on XSI-compliant (POSIX) version of strerror_r(), which prevents us from using GNU-extensions in libbpf.c, like reallocarray() or dup3(). Move error printing code into a separate file to allow it to continue using POSIX strerror_r(). No functional changes. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/libbpf.c | 48 ----------------------- tools/lib/bpf/libbpf_errno.c | 74 ++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 49 deletions(-) create mode 100644 tools/lib/bpf/libbpf_errno.c diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 6070e655042d..13a861135127 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 06cd534d2fba..f732237610e5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -95,54 +95,6 @@ void libbpf_set_print(libbpf_print_fn_t warn, #define STRERR_BUFSIZE 128 -#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) -#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) -#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) - -static const char *libbpf_strerror_table[NR_ERRNO] = { - [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", - [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", - [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", - [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", - [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", - [ERRCODE_OFFSET(RELOC)] = "Relocation failed", - [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", - [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", - [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", - [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", - [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", - [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", -}; - -int libbpf_strerror(int err, char *buf, size_t size) -{ - if (!buf || !size) - return -1; - - err = err > 0 ? err : -err; - - if (err < __LIBBPF_ERRNO__START) { - int ret; - - ret = strerror_r(err, buf, size); - buf[size - 1] = '\0'; - return ret; - } - - if (err < __LIBBPF_ERRNO__END) { - const char *msg; - - msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; - snprintf(buf, size, "%s", msg); - buf[size - 1] = '\0'; - return 0; - } - - snprintf(buf, size, "Unknown libbpf error %d", err); - buf[size - 1] = '\0'; - return -1; -} - #define CHECK_ERR(action, err, out) do { \ err = action; \ if (err) \ diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c new file mode 100644 index 000000000000..d9ba851bd7f9 --- /dev/null +++ b/tools/lib/bpf/libbpf_errno.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * Copyright (C) 2013-2015 Alexei Starovoitov + * Copyright (C) 2015 Wang Nan + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see + */ + +#include +#include + +#include "libbpf.h" + +#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) +#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) +#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) + +static const char *libbpf_strerror_table[NR_ERRNO] = { + [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", + [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", + [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", + [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", + [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", + [ERRCODE_OFFSET(RELOC)] = "Relocation failed", + [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", + [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", + [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", + [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", +}; + +int libbpf_strerror(int err, char *buf, size_t size) +{ + if (!buf || !size) + return -1; + + err = err > 0 ? err : -err; + + if (err < __LIBBPF_ERRNO__START) { + int ret; + + ret = strerror_r(err, buf, size); + buf[size - 1] = '\0'; + return ret; + } + + if (err < __LIBBPF_ERRNO__END) { + const char *msg; + + msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; + snprintf(buf, size, "%s", msg); + buf[size - 1] = '\0'; + return 0; + } + + snprintf(buf, size, "Unknown libbpf error %d", err); + buf[size - 1] = '\0'; + return -1; +} From 531b014e7a2fedaeff0b19b2934d830cd4b35dc0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:05 -0700 Subject: [PATCH 20/61] tools: bpf: make use of reallocarray reallocarray() is a safer variant of realloc which checks for multiplication overflow in case of array allocation. Since it's not available in Glibc < 2.26 import kernel's overflow.h and add a static inline implementation when needed. Use feature detection to probe for existence of reallocarray. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Makefile | 6 +- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/xlated_dumper.c | 6 +- tools/build/feature/Makefile | 4 + tools/build/feature/test-reallocarray.c | 8 + tools/include/linux/compiler-gcc.h | 4 + tools/include/linux/overflow.h | 278 ++++++++++++++++++++++++ tools/include/tools/libc_compat.h | 20 ++ tools/lib/bpf/Makefile | 6 +- tools/lib/bpf/libbpf.c | 10 +- 10 files changed, 334 insertions(+), 9 deletions(-) create mode 100644 tools/build/feature/test-reallocarray.c create mode 100644 tools/include/linux/overflow.h create mode 100644 tools/include/tools/libc_compat.h diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 0911b00b25cc..6c4830e18879 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -52,7 +52,7 @@ INSTALL ?= install RM ?= rm -f FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_TESTS = libbfd disassembler-four-args reallocarray FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 @@ -75,6 +75,10 @@ ifeq ($(feature-disassembler-four-args), 1) CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE endif +ifeq ($(feature-reallocarray), 0) +CFLAGS += -DCOMPAT_NEED_REALLOCARRAY +endif + include $(wildcard $(OUTPUT)*.d) all: $(OUTPUT)bpftool diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 15b6c49ae533..1e02e4031693 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "json_writer.h" diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index b97f1da60dd1..3284759df98a 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -35,6 +35,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define _GNU_SOURCE #include #include #include @@ -66,9 +67,8 @@ void kernel_syms_load(struct dump_data *dd) while (!feof(fp)) { if (!fgets(buff, sizeof(buff), fp)) break; - tmp = realloc(dd->sym_mapping, - (dd->sym_count + 1) * - sizeof(*dd->sym_mapping)); + tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1, + sizeof(*dd->sym_mapping)); if (!tmp) { out: free(dd->sym_mapping); diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index dac9563b5470..0516259be70f 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -14,6 +14,7 @@ FILES= \ test-libaudit.bin \ test-libbfd.bin \ test-disassembler-four-args.bin \ + test-reallocarray.bin \ test-liberty.bin \ test-liberty-z.bin \ test-cplus-demangle.bin \ @@ -204,6 +205,9 @@ $(OUTPUT)test-libbfd.bin: $(OUTPUT)test-disassembler-four-args.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes +$(OUTPUT)test-reallocarray.bin: + $(BUILD) + $(OUTPUT)test-liberty.bin: $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty diff --git a/tools/build/feature/test-reallocarray.c b/tools/build/feature/test-reallocarray.c new file mode 100644 index 000000000000..8170de35150d --- /dev/null +++ b/tools/build/feature/test-reallocarray.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include + +int main(void) +{ + return !!reallocarray(NULL, 1, 1); +} diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 70fe61295733..0d35f18006a1 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -36,3 +36,7 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) + +#if GCC_VERSION >= 50100 +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +#endif diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h new file mode 100644 index 000000000000..8712ff70995f --- /dev/null +++ b/tools/include/linux/overflow.h @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +#ifndef __LINUX_OVERFLOW_H +#define __LINUX_OVERFLOW_H + +#include + +/* + * In the fallback code below, we need to compute the minimum and + * maximum values representable in a given type. These macros may also + * be useful elsewhere, so we provide them outside the + * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. + * + * It would seem more obvious to do something like + * + * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) + * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) + * + * Unfortunately, the middle expressions, strictly speaking, have + * undefined behaviour, and at least some versions of gcc warn about + * the type_max expression (but not if -fsanitize=undefined is in + * effect; in that case, the warning is deferred to runtime...). + * + * The slightly excessive casting in type_min is to make sure the + * macros also produce sensible values for the exotic type _Bool. [The + * overflow checkers only almost work for _Bool, but that's + * a-feature-not-a-bug, since people shouldn't be doing arithmetic on + * _Bools. Besides, the gcc builtins don't allow _Bool* as third + * argument.] + * + * Idea stolen from + * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - + * credit to Christian Biere. + */ +#define is_signed_type(type) (((type)(-1)) < (type)1) +#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) +#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_min(T) ((T)((T)-type_max(T)-(T)1)) + + +#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW +/* + * For simplicity and code hygiene, the fallback code below insists on + * a, b and *d having the same type (similar to the min() and max() + * macros), whereas gcc's type-generic overflow checkers accept + * different types. Hence we don't just make check_add_overflow an + * alias for __builtin_add_overflow, but add type checks similar to + * below. + */ +#define check_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_add_overflow(__a, __b, __d); \ +}) + +#define check_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_sub_overflow(__a, __b, __d); \ +}) + +#define check_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + __builtin_mul_overflow(__a, __b, __d); \ +}) + +#else + + +/* Checking for unsigned overflow is relatively easy without causing UB. */ +#define __unsigned_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a + __b; \ + *__d < __a; \ +}) +#define __unsigned_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a - __b; \ + __a < __b; \ +}) +/* + * If one of a or b is a compile-time constant, this avoids a division. + */ +#define __unsigned_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = __a * __b; \ + __builtin_constant_p(__b) ? \ + __b > 0 && __a > type_max(typeof(__a)) / __b : \ + __a > 0 && __b > type_max(typeof(__b)) / __a; \ +}) + +/* + * For signed types, detecting overflow is much harder, especially if + * we want to avoid UB. But the interface of these macros is such that + * we must provide a result in *d, and in fact we must produce the + * result promised by gcc's builtins, which is simply the possibly + * wrapped-around value. Fortunately, we can just formally do the + * operations in the widest relevant unsigned type (u64) and then + * truncate the result - gcc is smart enough to generate the same code + * with and without the (u64) casts. + */ + +/* + * Adding two signed integers can overflow only if they have the same + * sign, and overflow has happened iff the result has the opposite + * sign. + */ +#define __signed_add_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a + (u64)__b; \ + (((~(__a ^ __b)) & (*__d ^ __a)) \ + & type_min(typeof(__a))) != 0; \ +}) + +/* + * Subtraction is similar, except that overflow can now happen only + * when the signs are opposite. In this case, overflow has happened if + * the result has the opposite sign of a. + */ +#define __signed_sub_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a - (u64)__b; \ + ((((__a ^ __b)) & (*__d ^ __a)) \ + & type_min(typeof(__a))) != 0; \ +}) + +/* + * Signed multiplication is rather hard. gcc always follows C99, so + * division is truncated towards 0. This means that we can write the + * overflow check like this: + * + * (a > 0 && (b > MAX/a || b < MIN/a)) || + * (a < -1 && (b > MIN/a || b < MAX/a) || + * (a == -1 && b == MIN) + * + * The redundant casts of -1 are to silence an annoying -Wtype-limits + * (included in -Wextra) warning: When the type is u8 or u16, the + * __b_c_e in check_mul_overflow obviously selects + * __unsigned_mul_overflow, but unfortunately gcc still parses this + * code and warns about the limited range of __b. + */ + +#define __signed_mul_overflow(a, b, d) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + typeof(d) __d = (d); \ + typeof(a) __tmax = type_max(typeof(a)); \ + typeof(a) __tmin = type_min(typeof(a)); \ + (void) (&__a == &__b); \ + (void) (&__a == __d); \ + *__d = (u64)__a * (u64)__b; \ + (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ + (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ + (__b == (typeof(__b))-1 && __a == __tmin); \ +}) + + +#define check_add_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_add_overflow(a, b, d), \ + __unsigned_add_overflow(a, b, d)) + +#define check_sub_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_sub_overflow(a, b, d), \ + __unsigned_sub_overflow(a, b, d)) + +#define check_mul_overflow(a, b, d) \ + __builtin_choose_expr(is_signed_type(typeof(a)), \ + __signed_mul_overflow(a, b, d), \ + __unsigned_mul_overflow(a, b, d)) + + +#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ + +/** + * array_size() - Calculate size of 2-dimensional array. + * + * @a: dimension one + * @b: dimension two + * + * Calculates size of 2-dimensional array: @a * @b. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +static inline __must_check size_t array_size(size_t a, size_t b) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** + * array3_size() - Calculate size of 3-dimensional array. + * + * @a: dimension one + * @b: dimension two + * @c: dimension three + * + * Calculates size of 3-dimensional array: @a * @b * @c. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) +{ + size_t bytes; + + if (check_mul_overflow(a, b, &bytes)) + return SIZE_MAX; + if (check_mul_overflow(bytes, c, &bytes)) + return SIZE_MAX; + + return bytes; +} + +static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c) +{ + size_t bytes; + + if (check_mul_overflow(n, size, &bytes)) + return SIZE_MAX; + if (check_add_overflow(bytes, c, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** + * struct_size() - Calculate size of structure with trailing array. + * @p: Pointer to the structure. + * @member: Name of the array member. + * @n: Number of elements in the array. + * + * Calculates size of memory needed for structure @p followed by an + * array of @n @member elements. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size(p, member, n) \ + __ab_c_size(n, \ + sizeof(*(p)->member) + __must_be_array((p)->member),\ + sizeof(*(p))) + +#endif /* __LINUX_OVERFLOW_H */ diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h new file mode 100644 index 000000000000..664ced8cb1b0 --- /dev/null +++ b/tools/include/tools/libc_compat.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2018 Netronome Systems, Inc. */ + +#ifndef __TOOLS_LIBC_COMPAT_H +#define __TOOLS_LIBC_COMPAT_H + +#include +#include + +#ifdef COMPAT_NEED_REALLOCARRAY +static inline void *reallocarray(void *ptr, size_t nmemb, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(nmemb, size, &bytes))) + return NULL; + return realloc(ptr, bytes); +} +#endif +#endif diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 5390e7725e43..7a8e4c98ef1a 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -66,7 +66,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf +FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf reallocarray FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf @@ -120,6 +120,10 @@ ifeq ($(feature-libelf-getphdrnum), 1) override CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT endif +ifeq ($(feature-reallocarray), 0) + override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY +endif + # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS) override CFLAGS += -Werror -Wall diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f732237610e5..fd2c4433863d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -22,6 +22,7 @@ * License along with this program; if not, see */ +#define _GNU_SOURCE #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -321,7 +323,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, progs = obj->programs; nr_progs = obj->nr_programs; - progs = realloc(progs, sizeof(progs[0]) * (nr_progs + 1)); + progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); if (!progs) { /* * In this case the original obj->programs @@ -822,8 +824,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) continue; } - reloc = realloc(reloc, - sizeof(*obj->efile.reloc) * nr_reloc); + reloc = reallocarray(reloc, nr_reloc, + sizeof(*obj->efile.reloc)); if (!reloc) { pr_warning("realloc failed\n"); err = -ENOMEM; @@ -1115,7 +1117,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, return -LIBBPF_ERRNO__RELOC; } new_cnt = prog->insns_cnt + text->insns_cnt; - new_insn = realloc(prog->insns, new_cnt * sizeof(*insn)); + new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); if (!new_insn) { pr_warning("oom in prog realloc\n"); return -ENOMEM; From 26736eb9a483715c2e971a8866f55fbb156903e2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:06 -0700 Subject: [PATCH 21/61] tools: libbpf: allow map reuse More advanced applications may want to only replace programs without destroying associated maps. Allow libbpf users to achieve that. Instead of always creating all of the maps at load time, expose to users an API to reconstruct the map object from already existing map. The map parameters are read from the kernel and replace the parameters of the ELF map. libbpf does not restrict the map replacement, i.e. the reused map does not have to be compatible with the ELF map definition. We relay on the verifier for checking the compatibility between maps and programs. The ELF map definition is completely overwritten by the information read from the kernel, to make sure libbpf's view of map object corresponds to the actual map. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 53 ++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 1 + 2 files changed, 54 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fd2c4433863d..955f8eafbf41 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1035,6 +1035,53 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) return 0; } +int bpf_map__reuse_fd(struct bpf_map *map, int fd) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int new_fd, err; + char *new_name; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) + return err; + + new_name = strdup(info.name); + if (!new_name) + return -errno; + + new_fd = open("/", O_RDONLY | O_CLOEXEC); + if (new_fd < 0) + goto err_free_new_name; + + new_fd = dup3(fd, new_fd, O_CLOEXEC); + if (new_fd < 0) + goto err_close_new_fd; + + err = zclose(map->fd); + if (err) + goto err_close_new_fd; + free(map->name); + + map->fd = new_fd; + map->name = new_name; + map->def.type = info.type; + map->def.key_size = info.key_size; + map->def.value_size = info.value_size; + map->def.max_entries = info.max_entries; + map->def.map_flags = info.map_flags; + map->btf_key_type_id = info.btf_key_type_id; + map->btf_value_type_id = info.btf_value_type_id; + + return 0; + +err_close_new_fd: + close(new_fd); +err_free_new_name: + free(new_name); + return -errno; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -1047,6 +1094,12 @@ bpf_object__create_maps(struct bpf_object *obj) struct bpf_map_def *def = &map->def; int *pfd = &map->fd; + if (map->fd >= 0) { + pr_debug("skip map create (preset) %s: fd=%d\n", + map->name, map->fd); + continue; + } + create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e911ad32d02e..1f8fc2060460 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -261,6 +261,7 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); void *bpf_map__priv(struct bpf_map *map); +int bpf_map__reuse_fd(struct bpf_map *map, int fd); bool bpf_map__is_offload_neutral(struct bpf_map *map); void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); int bpf_map__pin(struct bpf_map *map, const char *path); From 3ff5a4dc5d890963e669fc99cc62ee07d1da24e8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jul 2018 14:43:07 -0700 Subject: [PATCH 22/61] tools: bpftool: allow reuse of maps with bpftool prog load Add map parameter to prog load which will allow reuse of existing maps instead of creating new ones. We need feature detection and compat code for reallocarray, since it's not available in many libc versions. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- .../bpftool/Documentation/bpftool-prog.rst | 20 ++- tools/bpf/bpftool/bash-completion/bpftool | 67 +++++++- tools/bpf/bpftool/main.h | 3 + tools/bpf/bpftool/map.c | 4 +- tools/bpf/bpftool/prog.c | 148 ++++++++++++++++-- 5 files changed, 219 insertions(+), 23 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index e53e1ad2caf0..64156a16d530 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -24,9 +24,10 @@ MAP COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**dev** *NAME*] +| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] | **bpftool** **prog help** | +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *TYPE* := { | **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | @@ -73,10 +74,17 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. - **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**dev** *NAME*] + **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] Load bpf program from binary *OBJ* and pin as *FILE*. **type** is optional, if not specified program type will be inferred from section names. + By default bpftool will create new maps as declared in the ELF + object being loaded. **map** parameter allows for the reuse + of existing maps. It can be specified multiple times, each + time for a different map. *IDX* refers to index of the map + to be replaced in the ELF file counting from 0, while *NAME* + allows to replace a map by name. *MAP* specifies the map to + use, referring to it by **id** or through a **pinned** file. If **dev** *NAME* is specified program will be loaded onto given networking device (offload). @@ -172,6 +180,14 @@ EXAMPLES mov %rbx,0x0(%rbp) 48 89 5d 00 +| +| **# bpftool prog load xdp1_kern.o /sys/fs/bpf/xdp1 type xdp map name rxcnt id 7** +| **# bpftool prog show pinned /sys/fs/bpf/xdp1** +| 9: xdp name xdp_prog1 tag 539ec6ce11b52f98 gpl +| loaded_at 2018-06-25T16:17:31-0700 uid 0 +| xlated 488B jited 336B memlock 4096B map_ids 7 +| **# rm /sys/fs/bpf/xdp1** +| SEE ALSO ======== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index caf8711993be..598066c40191 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -99,6 +99,29 @@ _bpftool_get_prog_tags() command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_obj_map_names() +{ + local obj + + obj=$1 + + maps=$(objdump -j maps -t $obj 2>/dev/null | \ + command awk '/g . maps/ {print $NF}') + + COMPREPLY+=( $( compgen -W "$maps" -- "$cur" ) ) +} + +_bpftool_get_obj_map_idxs() +{ + local obj + + obj=$1 + + nmaps=$(objdump -j maps -t $obj 2>/dev/null | grep -c 'g . maps') + + COMPREPLY+=( $( compgen -W "$(seq 0 $((nmaps - 1)))" -- "$cur" ) ) +} + _sysfs_get_netdevs() { COMPREPLY+=( $( compgen -W "$( ls /sys/class/net 2>/dev/null )" -- \ @@ -220,12 +243,14 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - case $prev in - id) - _bpftool_get_prog_ids - return 0 - ;; - esac + if [[ $command != "load" ]]; then + case $prev in + id) + _bpftool_get_prog_ids + return 0 + ;; + esac + fi local PROG_TYPE='id pinned tag' case $command in @@ -268,22 +293,52 @@ _bpftool() return 0 ;; load) + local obj + if [[ ${#words[@]} -lt 6 ]]; then _filedir return 0 fi + obj=${words[3]} + + if [[ ${words[-4]} == "map" ]]; then + COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) + return 0 + fi + if [[ ${words[-3]} == "map" ]]; then + if [[ ${words[-2]} == "idx" ]]; then + _bpftool_get_obj_map_idxs $obj + elif [[ ${words[-2]} == "name" ]]; then + _bpftool_get_obj_map_names $obj + fi + return 0 + fi + if [[ ${words[-2]} == "map" ]]; then + COMPREPLY=( $( compgen -W "idx name" -- "$cur" ) ) + return 0 + fi + case $prev in type) COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \ "$cur" ) ) return 0 ;; + id) + _bpftool_get_map_ids + return 0 + ;; + pinned) + _filedir + return 0 + ;; dev) _sysfs_get_netdevs return 0 ;; *) + COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) _bpftool_once_attr 'type' _bpftool_once_attr 'dev' return 0 diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 1e02e4031693..41004bb2644a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -75,6 +75,8 @@ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" #define HELP_SPEC_OPTIONS \ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }" +#define HELP_SPEC_MAP \ + "MAP := { id MAP_ID | pinned FILE }" enum bpf_obj_type { BPF_OBJ_UNKNOWN, @@ -136,6 +138,7 @@ int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); +int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 5989e1575ae4..e2baec1122fb 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -93,7 +93,7 @@ static void *alloc_value(struct bpf_map_info *info) return malloc(info->value_size); } -static int map_parse_fd(int *argc, char ***argv) +int map_parse_fd(int *argc, char ***argv) { int fd; @@ -824,7 +824,7 @@ static int do_help(int argc, char **argv) " %s %s event_pipe MAP [cpu N index M]\n" " %s %s help\n" "\n" - " MAP := { id MAP_ID | pinned FILE }\n" + " " HELP_SPEC_MAP "\n" " DATA := { [hex] BYTES }\n" " " HELP_SPEC_PROGRAM "\n" " VALUE := { DATA | MAP | PROG }\n" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2bdd5ecd1aad..dce960d22106 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#define _GNU_SOURCE #include #include #include @@ -682,18 +683,34 @@ static int do_pin(int argc, char **argv) return err; } +struct map_replace { + int idx; + int fd; + char *name; +}; + +int map_replace_compar(const void *p1, const void *p2) +{ + const struct map_replace *a = p1, *b = p2; + + return a->idx - b->idx; +} + static int do_load(int argc, char **argv) { enum bpf_attach_type expected_attach_type; struct bpf_object_open_attr attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; + struct map_replace *map_replace = NULL; + unsigned int old_map_fds = 0; struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; + unsigned int i, j; __u32 ifindex = 0; - int err; + int idx, err; if (!REQ_ARGS(2)) return -1; @@ -708,16 +725,16 @@ static int do_load(int argc, char **argv) if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) { p_err("program type already specified"); - return -1; + goto err_free_reuse_maps; } if (!REQ_ARGS(1)) - return -1; + goto err_free_reuse_maps; /* Put a '/' at the end of type to appease libbpf */ type = malloc(strlen(*argv) + 2); if (!type) { p_err("mem alloc failed"); - return -1; + goto err_free_reuse_maps; } *type = 0; strcat(type, *argv); @@ -728,37 +745,81 @@ static int do_load(int argc, char **argv) free(type); if (err < 0) { p_err("unknown program type '%s'", *argv); - return err; + goto err_free_reuse_maps; } NEXT_ARG(); + } else if (is_prefix(*argv, "map")) { + char *endptr, *name; + int fd; + + NEXT_ARG(); + + if (!REQ_ARGS(4)) + goto err_free_reuse_maps; + + if (is_prefix(*argv, "idx")) { + NEXT_ARG(); + + idx = strtoul(*argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as IDX", *argv); + goto err_free_reuse_maps; + } + name = NULL; + } else if (is_prefix(*argv, "name")) { + NEXT_ARG(); + + name = *argv; + idx = -1; + } else { + p_err("expected 'idx' or 'name', got: '%s'?", + *argv); + goto err_free_reuse_maps; + } + NEXT_ARG(); + + fd = map_parse_fd(&argc, &argv); + if (fd < 0) + goto err_free_reuse_maps; + + map_replace = reallocarray(map_replace, old_map_fds + 1, + sizeof(*map_replace)); + if (!map_replace) { + p_err("mem alloc failed"); + goto err_free_reuse_maps; + } + map_replace[old_map_fds].idx = idx; + map_replace[old_map_fds].name = name; + map_replace[old_map_fds].fd = fd; + old_map_fds++; } else if (is_prefix(*argv, "dev")) { NEXT_ARG(); if (ifindex) { p_err("offload device already specified"); - return -1; + goto err_free_reuse_maps; } if (!REQ_ARGS(1)) - return -1; + goto err_free_reuse_maps; ifindex = if_nametoindex(*argv); if (!ifindex) { p_err("unrecognized netdevice '%s': %s", *argv, strerror(errno)); - return -1; + goto err_free_reuse_maps; } NEXT_ARG(); } else { - p_err("expected no more arguments, 'type' or 'dev', got: '%s'?", + p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", *argv); - return -1; + goto err_free_reuse_maps; } } obj = bpf_object__open_xattr(&attr); if (IS_ERR_OR_NULL(obj)) { p_err("failed to open object file"); - return -1; + goto err_free_reuse_maps; } prog = bpf_program__next(NULL, obj); @@ -782,10 +843,62 @@ static int do_load(int argc, char **argv) bpf_program__set_type(prog, attr.prog_type); bpf_program__set_expected_attach_type(prog, expected_attach_type); - bpf_map__for_each(map, obj) + qsort(map_replace, old_map_fds, sizeof(*map_replace), + map_replace_compar); + + /* After the sort maps by name will be first on the list, because they + * have idx == -1. Resolve them. + */ + j = 0; + while (j < old_map_fds && map_replace[j].name) { + i = 0; + bpf_map__for_each(map, obj) { + if (!strcmp(bpf_map__name(map), map_replace[j].name)) { + map_replace[j].idx = i; + break; + } + i++; + } + if (map_replace[j].idx == -1) { + p_err("unable to find map '%s'", map_replace[j].name); + goto err_close_obj; + } + j++; + } + /* Resort if any names were resolved */ + if (j) + qsort(map_replace, old_map_fds, sizeof(*map_replace), + map_replace_compar); + + /* Set ifindex and name reuse */ + j = 0; + idx = 0; + bpf_map__for_each(map, obj) { if (!bpf_map__is_offload_neutral(map)) bpf_map__set_ifindex(map, ifindex); + if (j < old_map_fds && idx == map_replace[j].idx) { + err = bpf_map__reuse_fd(map, map_replace[j++].fd); + if (err) { + p_err("unable to set up map reuse: %d", err); + goto err_close_obj; + } + + /* Next reuse wants to apply to the same map */ + if (j < old_map_fds && map_replace[j].idx == idx) { + p_err("replacement for map idx %d specified more than once", + idx); + goto err_close_obj; + } + } + + idx++; + } + if (j < old_map_fds) { + p_err("map idx '%d' not used", map_replace[j].idx); + goto err_close_obj; + } + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); @@ -799,11 +912,18 @@ static int do_load(int argc, char **argv) jsonw_null(json_wtr); bpf_object__close(obj); + for (i = 0; i < old_map_fds; i++) + close(map_replace[i].fd); + free(map_replace); return 0; err_close_obj: bpf_object__close(obj); +err_free_reuse_maps: + for (i = 0; i < old_map_fds; i++) + close(map_replace[i].fd); + free(map_replace); return -1; } @@ -819,9 +939,11 @@ static int do_help(int argc, char **argv) " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" - " %s %s load OBJ FILE [type TYPE] [dev NAME]\n" + " %s %s load OBJ FILE [type TYPE] [dev NAME] \\\n" + " [map { idx IDX | name NAME } MAP]\n" " %s %s help\n" "\n" + " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" " TYPE := { socket | kprobe | kretprobe | classifier | action |\n" " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n" From 2bae79d2d38f3dc50bfef81d3b4f7328b2883a17 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 12 Jul 2018 12:52:22 +0100 Subject: [PATCH 23/61] bpf: fix documentation for eBPF helpers Minor formatting edits for eBPF helpers documentation, including blank lines removal, fix of item list for return values in bpf_fib_lookup(), and missing prefix on bpf_skb_load_bytes_relative(). Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b7db3261c62d..6bcb287a888d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1826,7 +1826,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) + * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -1877,7 +1877,7 @@ union bpf_attr { * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the - * * packet is not forwarded or needs assist from full stack + * packet is not forwarded or needs assist from full stack * * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * Description @@ -2033,7 +2033,6 @@ union bpf_attr { * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". - * * Return * 0 * @@ -2053,7 +2052,6 @@ union bpf_attr { * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". - * * Return * 0 * From 9b8ca3795199f333269859b0c8393f810b9616a3 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 12 Jul 2018 12:52:23 +0100 Subject: [PATCH 24/61] tools: bpf: synchronise BPF UAPI header with tools Update with latest changes from include/uapi/linux/bpf.h header. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 59b19b6a40d7..6bcb287a888d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1826,7 +1826,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) + * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -1857,7 +1857,8 @@ union bpf_attr { * is resolved), the nexthop address is returned in ipv4_dst * or ipv6_dst based on family, smac is set to mac address of * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only). + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. * * *plen* argument is the size of the passed in struct. * *flags* argument can be a combination of one or more of the @@ -1873,9 +1874,10 @@ union bpf_attr { * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. * Return - * Egress device index on success, 0 if packet needs to continue - * up the stack for further processing or a negative error in case - * of failure. + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * packet is not forwarded or needs assist from full stack * * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * Description @@ -2031,7 +2033,6 @@ union bpf_attr { * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". - * * Return * 0 * @@ -2051,7 +2052,6 @@ union bpf_attr { * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". - * * Return * 0 * @@ -2612,6 +2612,18 @@ struct bpf_raw_tracepoint_args { #define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_OUTPUT BIT(1) +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + struct bpf_fib_lookup { /* input: network family for lookup (AF_INET, AF_INET6) * output: network family of egress nexthop @@ -2625,7 +2637,11 @@ struct bpf_fib_lookup { /* total length of packet from network header - used for MTU check */ __u16 tot_len; - __u32 ifindex; /* L3 device index for lookup */ + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; union { /* inputs to lookup */ From 86f7d85cec9e1dba735ac5bd4b751b1908fedf80 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 12 Jul 2018 12:52:24 +0100 Subject: [PATCH 25/61] tools: bpf: build and install man page for eBPF helpers from bpftool/ Provide a new Makefile.helpers in tools/bpf, in order to build and install the man page for eBPF helpers. This Makefile is also included in the one used to build bpftool documentation, so that it can be called either on its own (cd tools/bpf && make -f Makefile.helpers) or from bpftool directory (cd tools/bpf/bpftool && make doc, or cd tools/bpf/bpftool/Documentation && make helpers). Makefile.helpers is not added directly to bpftool to avoid changing its Makefile too much (helpers are not 100% directly related with bpftool). But the possibility to build the page from bpftool directory makes us able to package the helpers man page with bpftool, and to install it along with bpftool documentation, so that the doc for helpers becomes easily available to developers through the "man" program. Cc: linux-man@vger.kernel.org Suggested-by: Daniel Borkmann Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/Makefile.helpers | 59 ++++++++++++++++++++++++ tools/bpf/bpftool/Documentation/Makefile | 13 ++++-- 2 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 tools/bpf/Makefile.helpers diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers new file mode 100644 index 000000000000..c34fea77f39f --- /dev/null +++ b/tools/bpf/Makefile.helpers @@ -0,0 +1,59 @@ +ifndef allow-override + include ../scripts/Makefile.include + include ../scripts/utilities.mak +else + # Assume Makefile.helpers is being run from bpftool/Documentation + # subdirectory. Go up two more directories to fetch bpf.h header and + # associated script. + UP2DIR := ../../ +endif + +INSTALL ?= install +RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +prefix ?= /usr/local +mandir ?= $(prefix)/man +man7dir = $(mandir)/man7 + +HELPERS_RST = bpf-helpers.rst +MAN7_RST = $(HELPERS_RST) + +_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST)) +DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7)) + +helpers: man7 +man7: $(DOC_MAN7) + +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) + +$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h + $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@ + +$(OUTPUT)%.7: $(OUTPUT)%.rst +ifndef RST2MAN_DEP + $(error "rst2man not found, but required to generate man pages") +endif + $(QUIET_GEN)rst2man $< > $@ + +helpers-clean: + $(call QUIET_CLEAN, eBPF_helpers-manpage) + $(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST) + +helpers-install: helpers + $(call QUIET_INSTALL, eBPF_helpers-manpage) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) + $(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) + +helpers-uninstall: + $(call QUIET_UNINST, eBPF_helpers-manpage) + $(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7)) + $(Q)$(RMDIR) $(DESTDIR)$(man7dir) + +.PHONY: helpers helpers-clean helpers-install helpers-uninstall diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index a9d47c1558bb..f7663a3e60c9 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -15,12 +15,15 @@ prefix ?= /usr/local mandir ?= $(prefix)/man man8dir = $(mandir)/man8 -MAN8_RST = $(wildcard *.rst) +# Load targets for building eBPF helpers man page. +include ../../Makefile.helpers + +MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst)) _DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST)) DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) -man: man8 +man: man8 helpers man8: $(DOC_MAN8) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) @@ -31,16 +34,16 @@ ifndef RST2MAN_DEP endif $(QUIET_GEN)rst2man $< > $@ -clean: +clean: helpers-clean $(call QUIET_CLEAN, Documentation) $(Q)$(RM) $(DOC_MAN8) -install: man +install: man helpers-install $(call QUIET_INSTALL, Documentation-man) $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir) -uninstall: +uninstall: helpers-uninstall $(call QUIET_UNINST, Documentation-man) $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8)) $(Q)$(RMDIR) $(DESTDIR)$(man8dir) From d449ceb11b3884770c06e71cf15edc9f3b4c9b05 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:31 +0100 Subject: [PATCH 26/61] ARM: net: bpf: enumerate the JIT scratch stack layout Enumerate the contents of the JIT scratch stack layout used for storing some of the JITs 64-bit registers, tail call counter and AX register. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 59 ++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f6a62ae44a65..f2e6ffe57788 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -72,7 +72,38 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) -#define STACK_OFFSET(k) (k) +enum { + /* Stack layout - these are offsets from (top of stack - 4) */ + BPF_R2_HI, + BPF_R2_LO, + BPF_R3_HI, + BPF_R3_LO, + BPF_R4_HI, + BPF_R4_LO, + BPF_R5_HI, + BPF_R5_LO, + BPF_R7_HI, + BPF_R7_LO, + BPF_R8_HI, + BPF_R8_LO, + BPF_R9_HI, + BPF_R9_LO, + BPF_FP_HI, + BPF_FP_LO, + BPF_TC_HI, + BPF_TC_LO, + BPF_AX_HI, + BPF_AX_LO, + /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, + * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, + * BPF_REG_FP and Tail call counts. + */ + BPF_JIT_SCRATCH_REGS, +}; + +#define STACK_OFFSET(k) ((k) * 4) +#define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ @@ -100,29 +131,29 @@ static const u8 bpf2a32[][2] = { /* arguments from eBPF program to in-kernel function */ [BPF_REG_1] = {ARM_R3, ARM_R2}, /* Stored on stack scratch space */ - [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, - [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, - [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, - [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, + [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)}, + [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)}, + [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)}, + [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, /* callee saved registers that in-kernel function will preserve */ [BPF_REG_6] = {ARM_R5, ARM_R4}, /* Stored on stack scratch space */ - [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, - [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, - [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, + [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, /* Read only Frame Pointer to access Stack */ - [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, + [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, /* Temporary Register for internal BPF JIT, can be used * for constant blindings and others. */ [TMP_REG_1] = {ARM_R7, ARM_R6}, [TMP_REG_2] = {ARM_R10, ARM_R8}, /* Tail call count. Stored on stack scratch space. */ - [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, + [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, /* temporary register for blinding constants. * Stored on stack scratch space. */ - [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, }; #define dst_lo dst[1] @@ -227,12 +258,6 @@ static void jit_fill_hole(void *area, unsigned int size) #define STACK_ALIGNMENT 4 #endif -/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, - * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, - * BPF_REG_FP and Tail call counts. - */ -#define SCRATCH_SIZE 80 - /* total stack size used in JITed code */ #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) From a8ef95a034233190b1dd73ff03472ff0f7f4fbdf Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:36 +0100 Subject: [PATCH 27/61] ARM: net: bpf: provide load/store ops with negative immediates Provide a set of load/store opcode generators that work with negative immediates as well as positive ones. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 28 ++++++++++++++++++++++++++++ arch/arm/net/bpf_jit_32.h | 35 +++++++++++++---------------------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f2e6ffe57788..c81da1a50834 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -239,6 +239,34 @@ static int16_t imm8m(u32 x) return -1; } +static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) +{ + op |= rt << 12 | rn << 16; + if (imm12 >= 0) + op |= ARM_INST_LDST__U; + else + imm12 = -imm12; + return op | (imm12 & 0xfff); +} + +static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) +{ + op |= rt << 12 | rn << 16; + if (imm8 >= 0) + op |= ARM_INST_LDST__U; + else + imm8 = -imm8; + return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f); +} + +#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) +#define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) +#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) + +#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) +#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) +#define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) + /* * Initializes the JIT space with undefined instructions. */ diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index d5cf5f6208aa..c55bc39d3e22 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -77,11 +77,12 @@ #define ARM_INST_EOR_R 0x00200000 #define ARM_INST_EOR_I 0x02200000 -#define ARM_INST_LDRB_I 0x05d00000 +#define ARM_INST_LDST__U 0x00800000 +#define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 -#define ARM_INST_LDRH_I 0x01d000b0 +#define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 -#define ARM_INST_LDR_I 0x05900000 +#define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 #define ARM_INST_LDM 0x08900000 @@ -124,9 +125,9 @@ #define ARM_INST_SBC_R 0x00c00000 #define ARM_INST_SBCS_R 0x00d00000 -#define ARM_INST_STR_I 0x05800000 -#define ARM_INST_STRB_I 0x05c00000 -#define ARM_INST_STRH_I 0x01c000b0 +#define ARM_INST_STR_I 0x05000000 +#define ARM_INST_STRB_I 0x05400000 +#define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 #define ARM_INST_TST_I 0x03100000 @@ -183,17 +184,14 @@ #define ARM_EOR_R(rd, rn, rm) _AL3_R(ARM_INST_EOR, rd, rn, rm) #define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm) -#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \ - | ((off) & 0xfff)) -#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) -#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \ - | (off)) -#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) -#define ARM_LDRH_I(rt, rn, off) (ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) -#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) #define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs)) @@ -254,13 +252,6 @@ #define ARM_SUBS_I(rd, rn, imm) _AL3_I(ARM_INST_SUBS, rd, rn, imm) #define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm) -#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \ - | ((off) & 0xfff)) -#define ARM_STRH_I(rt, rn, off) (ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) -#define ARM_STRB_I(rt, rn, off) (ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) - #define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm) #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) From 1c35ba122d4a4eb32c3f8d63a445c1ebfd66d7bc Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:41 +0100 Subject: [PATCH 28/61] ARM: net: bpf: use negative numbers for stacked registers Use negative numbers for eBPF registers that live on the stack. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 200 +++++++++++++++++++------------------- 1 file changed, 102 insertions(+), 98 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c81da1a50834..69bf7ab18bf9 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -101,7 +101,11 @@ enum { BPF_JIT_SCRATCH_REGS, }; -#define STACK_OFFSET(k) ((k) * 4) +/* + * Negative "register" values indicate the register is stored on the stack + * and are the offset from the top of the eBPF JIT scratch space. + */ +#define STACK_OFFSET(k) (-4 - (k) * 4) #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ @@ -125,7 +129,7 @@ enum { * scratch memory space and we have to build eBPF 64 bit register from those. * */ -static const u8 bpf2a32[][2] = { +static const s8 bpf2a32[][2] = { /* return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = {ARM_R1, ARM_R0}, /* arguments from eBPF program to in-kernel function */ @@ -291,7 +295,7 @@ static void jit_fill_hole(void *area, unsigned int size) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) /* Get the offset of eBPF REGISTERs stored on scratch space. */ -#define STACK_VAR(off) (STACK_SIZE - off) +#define STACK_VAR(off) (STACK_SIZE + (off)) #if __LINUX_ARM_ARCH__ < 7 @@ -408,7 +412,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { @@ -470,10 +474,10 @@ static inline bool is_on_stack(u8 bpf_reg) return false; } -static inline void emit_a32_mov_i(const u8 dst, const u32 val, +static inline void emit_a32_mov_i(const s8 dst, const u32 val, bool dstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp = bpf2a32[TMP_REG_1]; if (dstk) { emit_mov_i(tmp[1], val, ctx); @@ -484,7 +488,7 @@ static inline void emit_a32_mov_i(const u8 dst, const u32 val, } /* Sign extended move */ -static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], +static inline void emit_a32_mov_i64(const bool is64, const s8 dst[], const u32 val, bool dstk, struct jit_ctx *ctx) { u32 hi = 0; @@ -574,12 +578,12 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, /* ALU operation (32 bit) * dst = dst (op) src */ -static inline void emit_a32_alu_r(const u8 dst, const u8 src, +static inline void emit_a32_alu_r(const s8 dst, const s8 src, bool dstk, bool sstk, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rn = sstk ? tmp[1] : src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rn = sstk ? tmp[1] : src; if (sstk) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); @@ -595,8 +599,8 @@ static inline void emit_a32_alu_r(const u8 dst, const u8 src, } /* ALU operation (64 bit) */ -static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], - const u8 src[], bool dstk, +static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], + const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx, const u8 op) { emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); @@ -607,11 +611,11 @@ static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], } /* dst = imm (4 bytes)*/ -static inline void emit_a32_mov_r(const u8 dst, const u8 src, +static inline void emit_a32_mov_r(const s8 dst, const s8 src, bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rt = sstk ? tmp[0] : src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rt = sstk ? tmp[0] : src; if (sstk) emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); @@ -622,8 +626,8 @@ static inline void emit_a32_mov_r(const u8 dst, const u8 src, } /* dst = src */ -static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], - const u8 src[], bool dstk, +static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], + const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); if (is64) { @@ -636,10 +640,10 @@ static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], } /* Shift operations */ -static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, +static inline void emit_a32_alu_i(const s8 dst, const u32 val, bool dstk, struct jit_ctx *ctx, const u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[0] : dst; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd = dstk ? tmp[0] : dst; if (dstk) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); @@ -662,11 +666,11 @@ static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, } /* dst = ~dst (64 bit) */ -static inline void emit_a32_neg64(const u8 dst[], bool dstk, +static inline void emit_a32_neg64(const s8 dst[], bool dstk, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst[1]; - u8 rm = dstk ? tmp[0] : dst[0]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd = dstk ? tmp[1] : dst[1]; + s8 rm = dstk ? tmp[0] : dst[0]; /* Setup Operand */ if (dstk) { @@ -685,15 +689,15 @@ static inline void emit_a32_neg64(const u8 dst[], bool dstk, } /* dst = dst << src */ -static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (sstk) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); @@ -720,14 +724,14 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst >> src (signed)*/ -static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (sstk) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); @@ -754,14 +758,14 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst >> src */ -static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (sstk) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); @@ -787,13 +791,13 @@ static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst << val */ -static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_lsh_i64(const s8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -820,13 +824,13 @@ static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, } /* dst = dst >> val */ -static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_rsh_i64(const s8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -853,13 +857,13 @@ static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, } /* dst = dst >> val (signed) */ -static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_arsh_i64(const s8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -885,15 +889,15 @@ static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, } } -static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands for multiplication */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rn = sstk ? tmp2[0] : src_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rn = sstk ? tmp2[0] : src_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -920,10 +924,10 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, } /* *(size *)(dst + off) = src */ -static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, +static inline void emit_str_r(const s8 dst, const s8 src, bool dstk, const s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd = dstk ? tmp[1] : dst; if (dstk) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); @@ -949,11 +953,11 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, +static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *rd = dstk ? tmp : dst; - u8 rm = src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = dstk ? tmp : dst; + s8 rm = src; s32 off_max; if (sz == BPF_H) @@ -1034,11 +1038,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) { /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ - const u8 *r2 = bpf2a32[BPF_REG_2]; - const u8 *r3 = bpf2a32[BPF_REG_3]; - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; - const u8 *tcc = bpf2a32[TCALL_CNT]; + const s8 *r2 = bpf2a32[BPF_REG_2]; + const s8 *r3 = bpf2a32[BPF_REG_3]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tcc = bpf2a32[TCALL_CNT]; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) @@ -1112,7 +1116,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 6 - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); @@ -1127,7 +1131,7 @@ static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 6 - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); @@ -1147,10 +1151,10 @@ static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) } // push the scratch stack register on top of the stack -static inline void emit_push_r64(const u8 src[], const u8 shift, +static inline void emit_push_r64(const s8 src[], const u8 shift, struct jit_ctx *ctx) { - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; u16 reg_set = 0; emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); @@ -1162,13 +1166,13 @@ static inline void emit_push_r64(const u8 src[], const u8 shift, static void build_prologue(struct jit_ctx *ctx) { - const u8 r0 = bpf2a32[BPF_REG_0][1]; - const u8 r2 = bpf2a32[BPF_REG_1][1]; - const u8 r3 = bpf2a32[BPF_REG_1][0]; - const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 fplo = bpf2a32[BPF_REG_FP][1]; - const u8 fphi = bpf2a32[BPF_REG_FP][0]; - const u8 *tcc = bpf2a32[TCALL_CNT]; + const s8 r0 = bpf2a32[BPF_REG_0][1]; + const s8 r2 = bpf2a32[BPF_REG_1][1]; + const s8 r3 = bpf2a32[BPF_REG_1][0]; + const s8 r4 = bpf2a32[BPF_REG_6][1]; + const s8 fplo = bpf2a32[BPF_REG_FP][1]; + const s8 fphi = bpf2a32[BPF_REG_FP][0]; + const s8 *tcc = bpf2a32[TCALL_CNT]; /* Save callee saved registers. */ #ifdef CONFIG_FRAME_POINTER @@ -1231,17 +1235,17 @@ static void build_epilogue(struct jit_ctx *ctx) static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; - const u8 *dst = bpf2a32[insn->dst_reg]; - const u8 *src = bpf2a32[insn->src_reg]; - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *dst = bpf2a32[insn->dst_reg]; + const s8 *src = bpf2a32[insn->src_reg]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; const bool dstk = is_on_stack(insn->dst_reg); const bool sstk = is_on_stack(insn->src_reg); - u8 rd, rt, rm, rn; + s8 rd, rt, rm, rn; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -1672,12 +1676,12 @@ go_jmp: /* function call */ case BPF_JMP | BPF_CALL: { - const u8 *r0 = bpf2a32[BPF_REG_0]; - const u8 *r1 = bpf2a32[BPF_REG_1]; - const u8 *r2 = bpf2a32[BPF_REG_2]; - const u8 *r3 = bpf2a32[BPF_REG_3]; - const u8 *r4 = bpf2a32[BPF_REG_4]; - const u8 *r5 = bpf2a32[BPF_REG_5]; + const s8 *r0 = bpf2a32[BPF_REG_0]; + const s8 *r1 = bpf2a32[BPF_REG_1]; + const s8 *r2 = bpf2a32[BPF_REG_2]; + const s8 *r3 = bpf2a32[BPF_REG_3]; + const s8 *r4 = bpf2a32[BPF_REG_4]; + const s8 *r5 = bpf2a32[BPF_REG_5]; const u32 func = (u32)__bpf_call_base + (u32)imm; emit_a32_mov_r64(true, r0, r1, false, false, ctx); From 47b9c3bf416d80515901469f05aef2870b37c010 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:47 +0100 Subject: [PATCH 29/61] ARM: net: bpf: remove is_on_stack() and sstk/dstk The decision about whether a BPF register is on the stack or in a CPU register is detected at the top BPF insn processing level, and then percolated throughout the remainder of the code. Since we now use negative register values to represent stacked registers, we can detect where a BPF register is stored without restoring to carrying this additional metadata through all code paths. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 336 ++++++++++++++++++-------------------- 1 file changed, 160 insertions(+), 176 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 69bf7ab18bf9..e81401aca2df 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -459,27 +459,18 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } -/* Checks whether BPF register is on scratch stack space or not. */ -static inline bool is_on_stack(u8 bpf_reg) +/* Is the translated BPF register on stack? */ +static bool is_stacked(s8 reg) { - static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, - BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, - BPF_REG_2, BPF_REG_FP}; - int i, reg_len = sizeof(stack_regs); - - for (i = 0 ; i < reg_len ; i++) { - if (bpf_reg == stack_regs[i]) - return true; - } - return false; + return reg < 0; } static inline void emit_a32_mov_i(const s8 dst, const u32 val, - bool dstk, struct jit_ctx *ctx) + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - if (dstk) { + if (is_stacked(dst)) { emit_mov_i(tmp[1], val, ctx); emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); } else { @@ -489,14 +480,13 @@ static inline void emit_a32_mov_i(const s8 dst, const u32 val, /* Sign extended move */ static inline void emit_a32_mov_i64(const bool is64, const s8 dst[], - const u32 val, bool dstk, - struct jit_ctx *ctx) { + const u32 val, struct jit_ctx *ctx) { u32 hi = 0; if (is64 && (val & (1<<31))) hi = (u32)~0; - emit_a32_mov_i(dst_lo, val, dstk, ctx); - emit_a32_mov_i(dst_hi, hi, dstk, ctx); + emit_a32_mov_i(dst_lo, val, ctx); + emit_a32_mov_i(dst_hi, hi, ctx); } static inline void emit_a32_add_r(const u8 dst, const u8 src, @@ -579,17 +569,16 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, * dst = dst (op) src */ static inline void emit_a32_alu_r(const s8 dst, const s8 src, - bool dstk, bool sstk, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rn = sstk ? tmp[1] : src; + s8 rn = is_stacked(src) ? tmp[1] : src; - if (sstk) + if (is_stacked(src)) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); /* ALU operation */ - if (dstk) { + if (is_stacked(dst)) { emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); emit_alu_r(tmp[0], rn, is64, hi, op, ctx); emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); @@ -600,26 +589,24 @@ static inline void emit_a32_alu_r(const s8 dst, const s8 src, /* ALU operation (64 bit) */ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], - const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx, + const s8 src[], struct jit_ctx *ctx, const u8 op) { - emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); + emit_a32_alu_r(dst_lo, src_lo, ctx, is64, false, op); if (is64) - emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); + emit_a32_alu_r(dst_hi, src_hi, ctx, is64, true, op); else - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); } /* dst = imm (4 bytes)*/ static inline void emit_a32_mov_r(const s8 dst, const s8 src, - bool dstk, bool sstk, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rt = sstk ? tmp[0] : src; + s8 rt = is_stacked(src) ? tmp[0] : src; - if (sstk) + if (is_stacked(src)) emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); - if (dstk) + if (is_stacked(dst)) emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); else emit(ARM_MOV_R(dst, rt), ctx); @@ -627,25 +614,25 @@ static inline void emit_a32_mov_r(const s8 dst, const s8 src, /* dst = src */ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], - const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); + const s8 src[], + struct jit_ctx *ctx) { + emit_a32_mov_r(dst_lo, src_lo, ctx); if (is64) { /* complete 8 byte move */ - emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); } else { /* Zero out high 4 bytes */ - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); } } /* Shift operations */ -static inline void emit_a32_alu_i(const s8 dst, const u32 val, bool dstk, +static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = dstk ? tmp[0] : dst; + s8 rd = is_stacked(dst) ? tmp[0] : dst; - if (dstk) + if (is_stacked(dst)) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); /* Do shift operation */ @@ -661,19 +648,19 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, bool dstk, break; } - if (dstk) + if (is_stacked(dst)) emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); } /* dst = ~dst (64 bit) */ -static inline void emit_a32_neg64(const s8 dst[], bool dstk, +static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = dstk ? tmp[1] : dst[1]; - s8 rm = dstk ? tmp[0] : dst[0]; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst[1]; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst[0]; /* Setup Operand */ - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -682,26 +669,26 @@ static inline void emit_a32_neg64(const s8 dst[], bool dstk, emit(ARM_RSBS_I(rd, rd, 0), ctx); emit(ARM_RSC_I(rm, rm, 0), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } /* dst = dst << src */ -static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (sstk) + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -714,7 +701,7 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -724,18 +711,18 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, } /* dst = dst >> src (signed)*/ -static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (sstk) + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -748,7 +735,7 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, _emit(ARM_COND_MI, ARM_B(0), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -758,18 +745,18 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, } /* dst = dst >> src */ -static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (sstk) + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -781,7 +768,7 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -791,15 +778,15 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, } /* dst = dst << val */ -static inline void emit_a32_lsh_i64(const s8 dst[], bool dstk, - const u32 val, struct jit_ctx *ctx){ +static inline void emit_a32_lsh_i64(const s8 dst[], + const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -817,22 +804,22 @@ static inline void emit_a32_lsh_i64(const s8 dst[], bool dstk, emit(ARM_EOR_R(rd, rd, rd), ctx); } - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } /* dst = dst >> val */ -static inline void emit_a32_rsh_i64(const s8 dst[], bool dstk, +static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -850,22 +837,22 @@ static inline void emit_a32_rsh_i64(const s8 dst[], bool dstk, emit(ARM_MOV_I(rm, 0), ctx); } - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } /* dst = dst >> val (signed) */ -static inline void emit_a32_arsh_i64(const s8 dst[], bool dstk, +static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -883,27 +870,27 @@ static inline void emit_a32_arsh_i64(const s8 dst[], bool dstk, emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); } - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } -static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands for multiplication */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rn = sstk ? tmp2[0] : src_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rn = is_stacked(src_lo) ? tmp2[0] : src_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } - if (sstk) { + if (is_stacked(src_lo)) { emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); } @@ -915,7 +902,7 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -924,15 +911,15 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, } /* *(size *)(dst + off) = src */ -static inline void emit_str_r(const s8 dst, const s8 src, bool dstk, +static inline void emit_str_r(const s8 dst, const s8 src, const s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = dstk ? tmp[1] : dst; + s8 rd = is_stacked(dst) ? tmp[1] : dst; - if (dstk) + if (is_stacked(dst)) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); if (off) { - emit_a32_mov_i(tmp[0], off, false, ctx); + emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); rd = tmp[0]; } @@ -953,10 +940,10 @@ static inline void emit_str_r(const s8 dst, const s8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, +static inline void emit_ldx_r(const s8 dst[], const s8 src, s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - const s8 *rd = dstk ? tmp : dst; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; s32 off_max; @@ -966,7 +953,7 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, off_max = 0xfff; if (off < 0 || off > off_max) { - emit_a32_mov_i(tmp[0], off, false, ctx); + emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; off = 0; @@ -978,17 +965,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(dst[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(dst[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(dst[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -996,10 +983,10 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - if (dstk) - emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); - if (dstk && sz == BPF_DW) - emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); + if (is_stacked(dst_lo)) + emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst_lo)), ctx); + if (is_stacked(dst_lo) && sz == BPF_DW) + emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst_hi)), ctx); } /* Arithmatic Operation */ @@ -1053,7 +1040,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ off = offsetof(struct bpf_array, map.max_entries); /* array->map.max_entries */ - emit_a32_mov_i(tmp[1], off, false, ctx); + emit_a32_mov_i(tmp[1], off, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); /* index is 32-bit for arrays */ @@ -1083,7 +1070,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * goto out; */ off = offsetof(struct bpf_array, ptrs); - emit_a32_mov_i(tmp[1], off, false, ctx); + emit_a32_mov_i(tmp[1], off, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); @@ -1094,7 +1081,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* goto *(prog->bpf_func + prologue_size); */ off = offsetof(struct bpf_prog, bpf_func); - emit_a32_mov_i(tmp2[1], off, false, ctx); + emit_a32_mov_i(tmp2[1], off, ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); emit_bx_r(tmp[1], ctx); @@ -1193,8 +1180,8 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); /* Set up BPF prog stack base register */ - emit_a32_mov_r(fplo, ARM_IP, true, false, ctx); - emit_a32_mov_i(fphi, 0, true, ctx); + emit_a32_mov_r(fplo, ARM_IP, ctx); + emit_a32_mov_i(fphi, 0, ctx); /* mov r4, 0 */ emit(ARM_MOV_I(r4, 0), ctx); @@ -1243,8 +1230,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; - const bool dstk = is_on_stack(insn->dst_reg); - const bool sstk = is_on_stack(insn->src_reg); s8 rd, rt, rm, rn; s32 jmp_offset; @@ -1268,11 +1253,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx); + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ - emit_a32_mov_i64(is64, dst, imm, dstk, ctx); + emit_a32_mov_i64(is64, dst, imm, ctx); break; } break; @@ -1312,8 +1297,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_XOR | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_alu_r64(is64, dst, src, dstk, sstk, - ctx, BPF_OP(code)); + emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code)); break; case BPF_K: /* Move immediate value to the temporary register @@ -1322,9 +1306,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * value into temporary reg and then it would be * safe to do the operation on it. */ - emit_a32_mov_i64(is64, tmp2, imm, false, ctx); - emit_a32_alu_r64(is64, dst, tmp2, dstk, false, - ctx, BPF_OP(code)); + emit_a32_mov_i64(is64, tmp2, imm, ctx); + emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); break; } break; @@ -1334,26 +1317,28 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rt = src_lo; - rd = dstk ? tmp2[1] : dst_lo; - if (dstk) + rd = is_stacked(dst_lo) ? tmp2[1] : dst_lo; + if (is_stacked(dst_lo)) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); switch (BPF_SRC(code)) { case BPF_X: - rt = sstk ? tmp2[0] : rt; - if (sstk) + rt = is_stacked(rt) ? tmp2[0] : src_lo; + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); break; case BPF_K: rt = tmp2[0]; - emit_a32_mov_i(rt, imm, false, ctx); + emit_a32_mov_i(rt, imm, ctx); + break; + default: + rt = src_lo; break; } emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - if (dstk) + if (is_stacked(dst_lo)) emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1367,54 +1352,54 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) if (unlikely(imm > 31)) return -EINVAL; if (imm) - emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_lsh_i64(dst, dstk, imm, ctx); + emit_a32_lsh_i64(dst, imm, ctx); break; /* dst = dst >> imm */ case BPF_ALU64 | BPF_RSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_rsh_i64(dst, dstk, imm, ctx); + emit_a32_rsh_i64(dst, imm, ctx); break; /* dst = dst << src */ case BPF_ALU64 | BPF_LSH | BPF_X: - emit_a32_lsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_lsh_r64(dst, src, ctx); break; /* dst = dst >> src */ case BPF_ALU64 | BPF_RSH | BPF_X: - emit_a32_rsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_rsh_r64(dst, src, ctx); break; /* dst = dst >> src (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_X: - emit_a32_arsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_arsh_r64(dst, src, ctx); break; /* dst = dst >> imm (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_arsh_i64(dst, dstk, imm, ctx); + emit_a32_arsh_i64(dst, imm, ctx); break; /* dst = ~dst */ case BPF_ALU | BPF_NEG: - emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: - emit_a32_neg64(dst, dstk, ctx); + emit_a32_neg64(dst, ctx); break; /* dst = dst * src/imm */ case BPF_ALU64 | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_mul_r64(dst, src, dstk, sstk, ctx); + emit_a32_mul_r64(dst, src, ctx); break; case BPF_K: /* Move immediate value to the temporary register @@ -1423,8 +1408,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * reg then it would be safe to do the operation * on it. */ - emit_a32_mov_i64(is64, tmp2, imm, false, ctx); - emit_a32_mul_r64(dst, tmp2, dstk, false, ctx); + emit_a32_mov_i64(is64, tmp2, imm, ctx); + emit_a32_mul_r64(dst, tmp2, ctx); break; } break; @@ -1432,9 +1417,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rd = dstk ? tmp[0] : dst_hi; - rt = dstk ? tmp[1] : dst_lo; - if (dstk) { + rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; + rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -1459,7 +1444,7 @@ emit_bswap_uxt: case 16: /* zero-extend 16 bits into 64 bits */ #if __LINUX_ARM_ARCH__ < 6 - emit_a32_mov_i(tmp2[1], 0xffff, false, ctx); + emit_a32_mov_i(tmp2[1], 0xffff, ctx); emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); #else /* ARMv6+ */ emit(ARM_UXTH(rt, rt), ctx); @@ -1475,7 +1460,7 @@ emit_bswap_uxt: break; } exit: - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -1487,8 +1472,8 @@ exit: u32 hi, lo = imm; hi = insn1.imm; - emit_a32_mov_i(dst_lo, lo, dstk, ctx); - emit_a32_mov_i(dst_hi, hi, dstk, ctx); + emit_a32_mov_i(dst_lo, lo, ctx); + emit_a32_mov_i(dst_hi, hi, ctx); return 1; } @@ -1497,10 +1482,10 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: - rn = sstk ? tmp2[1] : src_lo; - if (sstk) + rn = is_stacked(src_lo) ? tmp2[1] : src_lo; + if (is_stacked(src_lo)) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: @@ -1510,16 +1495,15 @@ exit: switch (BPF_SIZE(code)) { case BPF_DW: /* Sign-extend immediate value into temp reg */ - emit_a32_mov_i64(true, tmp2, imm, false, ctx); - emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W); - emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W); + emit_a32_mov_i64(true, tmp2, imm, ctx); + emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_W); + emit_str_r(dst_lo, tmp2[0], off+4, ctx, BPF_W); break; case BPF_W: case BPF_H: case BPF_B: - emit_a32_mov_i(tmp2[1], imm, false, ctx); - emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, - BPF_SIZE(code)); + emit_a32_mov_i(tmp2[1], imm, ctx); + emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_SIZE(code)); break; } break; @@ -1536,19 +1520,19 @@ exit: { u8 sz = BPF_SIZE(code); - rn = sstk ? tmp2[1] : src_lo; - rm = sstk ? tmp2[0] : src_hi; - if (sstk) { + rn = is_stacked(src_lo) ? tmp2[1] : src_lo; + rm = is_stacked(src_lo) ? tmp2[0] : src_hi; + if (is_stacked(src_lo)) { emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); } /* Store the value */ if (BPF_SIZE(code) == BPF_DW) { - emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W); - emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W); + emit_str_r(dst_lo, rn, off, ctx, BPF_W); + emit_str_r(dst_lo, rm, off+4, ctx, BPF_W); } else { - emit_str_r(dst_lo, rn, dstk, off, ctx, sz); + emit_str_r(dst_lo, rn, off, ctx, sz); } break; } @@ -1575,9 +1559,9 @@ exit: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: /* Setup source registers */ - rm = sstk ? tmp2[0] : src_hi; - rn = sstk ? tmp2[1] : src_lo; - if (sstk) { + rm = is_stacked(src_lo) ? tmp2[0] : src_hi; + rn = is_stacked(src_lo) ? tmp2[1] : src_lo; + if (is_stacked(src_lo)) { emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); } @@ -1609,12 +1593,12 @@ exit: rm = tmp2[0]; rn = tmp2[1]; /* Sign-extend immediate value */ - emit_a32_mov_i64(true, tmp2, imm, false, ctx); + emit_a32_mov_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rd = dstk ? tmp[0] : dst_hi; - rt = dstk ? tmp[1] : dst_lo; - if (dstk) { + rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; + rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -1684,13 +1668,13 @@ go_jmp: const s8 *r5 = bpf2a32[BPF_REG_5]; const u32 func = (u32)__bpf_call_base + (u32)imm; - emit_a32_mov_r64(true, r0, r1, false, false, ctx); - emit_a32_mov_r64(true, r1, r2, false, true, ctx); + emit_a32_mov_r64(true, r0, r1, ctx); + emit_a32_mov_r64(true, r1, r2, ctx); emit_push_r64(r5, 0, ctx); emit_push_r64(r4, 8, ctx); emit_push_r64(r3, 16, ctx); - emit_a32_mov_i(tmp[1], func, false, ctx); + emit_a32_mov_i(tmp[1], func, ctx); emit_blx_r(tmp[1], ctx); emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean From 7a9870256361d4a36cb42e0301540256bb4b864e Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:52 +0100 Subject: [PATCH 30/61] ARM: net: bpf: provide accessor functions for BPF registers Many of the code paths need to have knowledge about whether a register is stacked or in a CPU register. Move this decision making to a pair of helper functions instead of having it scattered throughout the code. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 329 +++++++++++++++----------------------- 1 file changed, 128 insertions(+), 201 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e81401aca2df..08fb4eb285a2 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -465,6 +465,31 @@ static bool is_stacked(s8 reg) return reg < 0; } +/* If a BPF register is on the stack (stk is true), load it to the + * supplied temporary register and return the temporary register + * for subsequent operations, otherwise just use the CPU register. + */ +static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) +{ + if (is_stacked(reg)) { + emit(ARM_LDR_I(tmp, ARM_SP, STACK_VAR(reg)), ctx); + reg = tmp; + } + return reg; +} + +/* If a BPF register is on the stack (stk is true), save the register + * back to the stack. If the source register is not the same, then + * move it into the correct register. + */ +static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) +{ + if (is_stacked(reg)) + emit(ARM_STR_I(src, ARM_SP, STACK_VAR(reg)), ctx); + else if (reg != src) + emit(ARM_MOV_R(reg, src), ctx); +} + static inline void emit_a32_mov_i(const s8 dst, const u32 val, struct jit_ctx *ctx) { @@ -472,7 +497,7 @@ static inline void emit_a32_mov_i(const s8 dst, const u32 val, if (is_stacked(dst)) { emit_mov_i(tmp[1], val, ctx); - emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); + arm_bpf_put_reg32(dst, tmp[1], ctx); } else { emit_mov_i(dst, val, ctx); } @@ -572,19 +597,13 @@ static inline void emit_a32_alu_r(const s8 dst, const s8 src, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rn = is_stacked(src) ? tmp[1] : src; - - if (is_stacked(src)) - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); + s8 rn, rd; + rn = arm_bpf_get_reg32(src, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst, tmp[0], ctx); /* ALU operation */ - if (is_stacked(dst)) { - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); - emit_alu_r(tmp[0], rn, is64, hi, op, ctx); - emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); - } else { - emit_alu_r(dst, rn, is64, hi, op, ctx); - } + emit_alu_r(rd, rn, is64, hi, op, ctx); + arm_bpf_put_reg32(dst, rd, ctx); } /* ALU operation (64 bit) */ @@ -598,18 +617,14 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], emit_a32_mov_i(dst_hi, 0, ctx); } -/* dst = imm (4 bytes)*/ +/* dst = src (4 bytes)*/ static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rt = is_stacked(src) ? tmp[0] : src; + s8 rt; - if (is_stacked(src)) - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); - if (is_stacked(dst)) - emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); - else - emit(ARM_MOV_R(dst, rt), ctx); + rt = arm_bpf_get_reg32(src, tmp[0], ctx); + arm_bpf_put_reg32(dst, rt, ctx); } /* dst = src */ @@ -630,10 +645,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = is_stacked(dst) ? tmp[0] : dst; + s8 rd; - if (is_stacked(dst)) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + rd = arm_bpf_get_reg32(dst, tmp[0], ctx); /* Do shift operation */ switch (op) { @@ -648,31 +662,25 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, break; } - if (is_stacked(dst)) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + arm_bpf_put_reg32(dst, rd, ctx); } /* dst = ~dst (64 bit) */ static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst[1]; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst[0]; + s8 rd, rm; /* Setup Operand */ - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do Negate Operation */ emit(ARM_RSBS_I(rd, rd, 0), ctx); emit(ARM_RSC_I(rm, rm, 0), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* dst = dst << src */ @@ -680,18 +688,12 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; + s8 rt, rd, rm; /* Setup Operands */ - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); @@ -701,13 +703,8 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst >> src (signed)*/ @@ -715,17 +712,12 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup Operands */ - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rt, rd, rm; - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup Operands */ + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); @@ -735,13 +727,9 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], _emit(ARM_COND_MI, ARM_B(0), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst >> src */ @@ -749,17 +737,12 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup Operands */ - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rt, rd, rm; - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup Operands */ + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do RSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); @@ -768,13 +751,9 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst << val */ @@ -782,14 +761,11 @@ static inline void emit_a32_lsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rd, rm; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do LSH operation */ if (val < 32) { @@ -804,10 +780,8 @@ static inline void emit_a32_lsh_i64(const s8 dst[], emit(ARM_EOR_R(rd, rd, rd), ctx); } - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* dst = dst >> val */ @@ -815,14 +789,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rd, rm; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do LSR operation */ if (val < 32) { @@ -837,10 +808,8 @@ static inline void emit_a32_rsh_i64(const s8 dst[], emit(ARM_MOV_I(rm, 0), ctx); } - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* dst = dst >> val (signed) */ @@ -848,14 +817,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rd, rm; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do ARSH operation */ if (val < 32) { @@ -870,30 +836,21 @@ static inline void emit_a32_arsh_i64(const s8 dst[], emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); } - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands for multiplication */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rn = is_stacked(src_lo) ? tmp2[0] : src_hi; + s8 rd, rm, rt, rn; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } - if (is_stacked(src_lo)) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); - } + /* Setup operands for multiplication */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rn = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); /* Do Multiplication */ emit(ARM_MUL(ARM_IP, rd, rn), ctx); @@ -902,22 +859,18 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_IP), ctx); - } + + arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src, const s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = is_stacked(dst) ? tmp[1] : dst; + s8 rd; - if (is_stacked(dst)) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + rd = arm_bpf_get_reg32(dst, tmp[1], ctx); if (off) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); @@ -983,10 +936,9 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - if (is_stacked(dst_lo)) - emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst_lo)), ctx); - if (is_stacked(dst_lo) && sz == BPF_DW) - emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst_hi)), ctx); + arm_bpf_put_reg32(dst[1], rd[1], ctx); + if (sz == BPF_DW) + arm_bpf_put_reg32(dst[0], rd[0], ctx); } /* Arithmatic Operation */ @@ -1034,6 +986,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; + s8 r_array, r_index, r_tc_lo, r_tc_hi; /* if (index >= array->map.max_entries) * goto out; @@ -1041,12 +994,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) off = offsetof(struct bpf_array, map.max_entries); /* array->map.max_entries */ emit_a32_mov_i(tmp[1], off, ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); - emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); + emit(ARM_LDR_R(tmp[1], r_array, tmp[1]), ctx); /* index is 32-bit for arrays */ - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); + r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); /* index >= array->map.max_entries */ - emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); + emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -1055,15 +1008,15 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ lo = (u32)MAX_TAIL_CALL_CNT; hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); - emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); - emit(ARM_CMP_I(tmp[0], hi), ctx); - _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx); + r_tc_lo = arm_bpf_get_reg32(tcc[1], tmp[1], ctx); + r_tc_hi = arm_bpf_get_reg32(tcc[0], tmp[0], ctx); + emit(ARM_CMP_I(r_tc_hi, hi), ctx); + _emit(ARM_COND_EQ, ARM_CMP_I(r_tc_lo, lo), ctx); _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); - emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx); - emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx); - emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); - emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); + emit(ARM_ADDS_I(r_tc_lo, r_tc_lo, 1), ctx); + emit(ARM_ADC_I(r_tc_hi, r_tc_hi, 0), ctx); + arm_bpf_put_reg32(tcc[1], r_tc_lo, ctx); + arm_bpf_put_reg32(tcc[0], r_tc_hi, ctx); /* prog = array->ptrs[index] * if (prog == NULL) @@ -1071,10 +1024,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ off = offsetof(struct bpf_array, ptrs); emit_a32_mov_i(tmp[1], off, ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); - emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); - emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); + emit(ARM_ADD_R(tmp[1], r_array, tmp[1]), ctx); + r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); + emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); @@ -1317,15 +1270,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rd = is_stacked(dst_lo) ? tmp2[1] : dst_lo; - if (is_stacked(dst_lo)) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); switch (BPF_SRC(code)) { case BPF_X: - rt = is_stacked(rt) ? tmp2[0] : src_lo; - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), - ctx); + rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); break; case BPF_K: rt = tmp2[0]; @@ -1336,8 +1284,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; } emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - if (is_stacked(dst_lo)) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + arm_bpf_put_reg32(dst_lo, rd, ctx); emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: @@ -1417,12 +1364,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; - rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); if (BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; switch (imm) { @@ -1460,10 +1403,8 @@ emit_bswap_uxt: break; } exit: - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rt, ctx); + arm_bpf_put_reg32(dst_hi, rd, ctx); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: @@ -1482,9 +1423,7 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: - rn = is_stacked(src_lo) ? tmp2[1] : src_lo; - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* ST: *(size *)(dst + off) = imm */ @@ -1520,12 +1459,8 @@ exit: { u8 sz = BPF_SIZE(code); - rn = is_stacked(src_lo) ? tmp2[1] : src_lo; - rm = is_stacked(src_lo) ? tmp2[0] : src_hi; - if (is_stacked(src_lo)) { - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); /* Store the value */ if (BPF_SIZE(code) == BPF_DW) { @@ -1559,12 +1494,8 @@ exit: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: /* Setup source registers */ - rm = is_stacked(src_lo) ? tmp2[0] : src_hi; - rn = is_stacked(src_lo) ? tmp2[1] : src_lo; - if (is_stacked(src_lo)) { - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); goto go_jmp; /* PC += off if dst == imm */ /* PC += off if dst > imm */ @@ -1596,12 +1527,8 @@ exit: emit_a32_mov_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; - rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Check for the condition */ emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); From a6eccac507e5e4aed63fb23320fcadeb253c2af6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:57 +0100 Subject: [PATCH 31/61] ARM: net: bpf: 64-bit accessor functions for BPF registers Provide a couple of 64-bit register accessors, and use them where appropriate Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 235 ++++++++++++++++++++------------------ 1 file changed, 122 insertions(+), 113 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 08fb4eb285a2..45a3599e94a4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -478,6 +478,17 @@ static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) return reg; } +static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, + struct jit_ctx *ctx) +{ + if (is_stacked(reg[1])) { + emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(reg[1])), ctx); + emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(reg[0])), ctx); + reg = tmp; + } + return reg; +} + /* If a BPF register is on the stack (stk is true), save the register * back to the stack. If the source register is not the same, then * move it into the correct register. @@ -490,6 +501,20 @@ static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) emit(ARM_MOV_R(reg, src), ctx); } +static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, + struct jit_ctx *ctx) +{ + if (is_stacked(reg[1])) { + emit(ARM_STR_I(src[1], ARM_SP, STACK_VAR(reg[1])), ctx); + emit(ARM_STR_I(src[0], ARM_SP, STACK_VAR(reg[0])), ctx); + } else { + if (reg[1] != src[1]) + emit(ARM_MOV_R(reg[1], src[1]), ctx); + if (reg[0] != src[0]) + emit(ARM_MOV_R(reg[0], src[0]), ctx); + } +} + static inline void emit_a32_mov_i(const s8 dst, const u32 val, struct jit_ctx *ctx) { @@ -669,18 +694,16 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd, rm; + const s8 *rd; /* Setup Operand */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do Negate Operation */ - emit(ARM_RSBS_I(rd, rd, 0), ctx); - emit(ARM_RSC_I(rm, rm, 0), ctx); + emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx); + emit(ARM_RSC_I(rd[0], rd[0], 0), ctx); - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst << src */ @@ -688,20 +711,20 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rt, rd, rm; + const s8 *rd; + s8 rt; /* Setup Operands */ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); - emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); @@ -712,21 +735,21 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rt, rd, rm; + const s8 *rd; + s8 rt; /* Setup Operands */ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); _emit(ARM_COND_MI, ARM_B(0), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); @@ -737,20 +760,20 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rt, rd, rm; + const s8 *rd; + s8 rt; /* Setup Operands */ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do RSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); @@ -761,27 +784,25 @@ static inline void emit_a32_lsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm; + const s8 *rd; /* Setup operands */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSH operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); - emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); - emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); + emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx); + emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx); + emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx); } else { if (val == 32) - emit(ARM_MOV_R(rm, rd), ctx); + emit(ARM_MOV_R(rd[0], rd[1]), ctx); else - emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx); + emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx); } - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst >> val */ @@ -789,27 +810,25 @@ static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm; + const s8 *rd; /* Setup operands */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); - emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); + emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); } else if (val == 32) { - emit(ARM_MOV_R(rd, rm), ctx); - emit(ARM_MOV_I(rm, 0), ctx); + emit(ARM_MOV_R(rd[1], rd[0]), ctx); + emit(ARM_MOV_I(rd[0], 0), ctx); } else { - emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); - emit(ARM_MOV_I(rm, 0), ctx); + emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx); + emit(ARM_MOV_I(rd[0], 0), ctx); } - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst >> val (signed) */ @@ -817,51 +836,47 @@ static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm; + const s8 *rd; /* Setup operands */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); - emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); + emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); } else if (val == 32) { - emit(ARM_MOV_R(rd, rm), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); + emit(ARM_MOV_R(rd[1], rd[0]), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); } else { - emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); + emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); } - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm, rt, rn; + const s8 *rd, *rt; /* Setup operands for multiplication */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); - rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rn = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); + rt = arm_bpf_get_reg64(src, tmp2, ctx); /* Do Multiplication */ - emit(ARM_MUL(ARM_IP, rd, rn), ctx); - emit(ARM_MUL(ARM_LR, rm, rt), ctx); + emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx); + emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx); emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); - emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); - emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); + emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx); + emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx); arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg32(dst_hi, rd[0], ctx); } /* *(size *)(dst + off) = src */ @@ -918,17 +933,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -936,9 +951,7 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - arm_bpf_put_reg32(dst[1], rd[1], ctx); - if (sz == BPF_DW) - arm_bpf_put_reg32(dst[0], rd[0], ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* Arithmatic Operation */ @@ -982,11 +995,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; const s8 *tcc = bpf2a32[TCALL_CNT]; + const s8 *tc; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; - s8 r_array, r_index, r_tc_lo, r_tc_hi; + s8 r_array, r_index; /* if (index >= array->map.max_entries) * goto out; @@ -1008,15 +1022,13 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ lo = (u32)MAX_TAIL_CALL_CNT; hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); - r_tc_lo = arm_bpf_get_reg32(tcc[1], tmp[1], ctx); - r_tc_hi = arm_bpf_get_reg32(tcc[0], tmp[0], ctx); - emit(ARM_CMP_I(r_tc_hi, hi), ctx); - _emit(ARM_COND_EQ, ARM_CMP_I(r_tc_lo, lo), ctx); + tc = arm_bpf_get_reg64(tcc, tmp, ctx); + emit(ARM_CMP_I(tc[0], hi), ctx); + _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); - emit(ARM_ADDS_I(r_tc_lo, r_tc_lo, 1), ctx); - emit(ARM_ADC_I(r_tc_hi, r_tc_hi, 0), ctx); - arm_bpf_put_reg32(tcc[1], r_tc_lo, ctx); - arm_bpf_put_reg32(tcc[0], r_tc_hi, ctx); + emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); + emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); + arm_bpf_put_reg64(tcc, tmp, ctx); /* prog = array->ptrs[index] * if (prog == NULL) @@ -1183,7 +1195,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; - s8 rd, rt, rm, rn; + const s8 *rd, *rs; + s8 rd_lo, rt, rm, rn; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -1270,7 +1283,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rd = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); + rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); switch (BPF_SRC(code)) { case BPF_X: rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); @@ -1283,8 +1296,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rt = src_lo; break; } - emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - arm_bpf_put_reg32(dst_lo, rd, ctx); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + arm_bpf_put_reg32(dst_lo, rd_lo, ctx); emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: @@ -1364,21 +1377,20 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); if (BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; switch (imm) { case 16: - emit_rev16(rt, rt, ctx); + emit_rev16(rd[1], rd[1], ctx); goto emit_bswap_uxt; case 32: - emit_rev32(rt, rt, ctx); + emit_rev32(rd[1], rd[1], ctx); goto emit_bswap_uxt; case 64: - emit_rev32(ARM_LR, rt, ctx); - emit_rev32(rt, rd, ctx); - emit(ARM_MOV_R(rd, ARM_LR), ctx); + emit_rev32(ARM_LR, rd[1], ctx); + emit_rev32(rd[1], rd[0], ctx); + emit(ARM_MOV_R(rd[0], ARM_LR), ctx); break; } goto exit; @@ -1388,23 +1400,22 @@ emit_bswap_uxt: /* zero-extend 16 bits into 64 bits */ #if __LINUX_ARM_ARCH__ < 6 emit_a32_mov_i(tmp2[1], 0xffff, ctx); - emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); + emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx); #else /* ARMv6+ */ - emit(ARM_UXTH(rt, rt), ctx); + emit(ARM_UXTH(rd[1], rd[1]), ctx); #endif - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 32: /* zero-extend 32 bits into 64 bits */ - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 64: /* nop */ break; } exit: - arm_bpf_put_reg32(dst_lo, rt, ctx); - arm_bpf_put_reg32(dst_hi, rd, ctx); + arm_bpf_put_reg64(dst, rd, ctx); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: @@ -1459,15 +1470,14 @@ exit: { u8 sz = BPF_SIZE(code); - rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rs = arm_bpf_get_reg64(src, tmp2, ctx); /* Store the value */ if (BPF_SIZE(code) == BPF_DW) { - emit_str_r(dst_lo, rn, off, ctx, BPF_W); - emit_str_r(dst_lo, rm, off+4, ctx, BPF_W); + emit_str_r(dst_lo, rs[1], off, ctx, BPF_W); + emit_str_r(dst_lo, rs[0], off+4, ctx, BPF_W); } else { - emit_str_r(dst_lo, rn, off, ctx, sz); + emit_str_r(dst_lo, rs[1], off, ctx, sz); } break; } @@ -1527,11 +1537,10 @@ exit: emit_a32_mov_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Check for the condition */ - emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); + emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code)); /* Setup JUMP instruction */ jmp_offset = bpf2a32_offset(i+off, i, ctx); From 96cced4e774a2728710c8f8f48441fc7b29d6177 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:02 +0100 Subject: [PATCH 32/61] ARM: net: bpf: access eBPF scratch space using ARM FP register Access the eBPF scratch space using the frame pointer rather than our stack pointer, as the offsets from the ARM frame pointer are constant across all eBPF programs. Since we no longer reference the scratch space registers from the stack pointer, this simplifies emit_push_r64() as it no longer needs to know how many words are pushed onto the stack. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 44 ++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 45a3599e94a4..753b5b2b2e3d 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -108,6 +108,12 @@ enum { #define STACK_OFFSET(k) (-4 - (k) * 4) #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) +#ifdef CONFIG_FRAME_POINTER +#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4) +#else +#define EBPF_SCRATCH_TO_ARM_FP(x) (x) +#endif + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ @@ -294,9 +300,6 @@ static void jit_fill_hole(void *area, unsigned int size) #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) -/* Get the offset of eBPF REGISTERs stored on scratch space. */ -#define STACK_VAR(off) (STACK_SIZE + (off)) - #if __LINUX_ARM_ARCH__ < 7 static u16 imm_offset(u32 k, struct jit_ctx *ctx) @@ -472,7 +475,7 @@ static bool is_stacked(s8 reg) static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) { if (is_stacked(reg)) { - emit(ARM_LDR_I(tmp, ARM_SP, STACK_VAR(reg)), ctx); + emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); reg = tmp; } return reg; @@ -482,8 +485,10 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(reg[1])), ctx); - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(reg[0])), ctx); + emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), + ctx); + emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), + ctx); reg = tmp; } return reg; @@ -496,7 +501,7 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) { if (is_stacked(reg)) - emit(ARM_STR_I(src, ARM_SP, STACK_VAR(reg)), ctx); + emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); else if (reg != src) emit(ARM_MOV_R(reg, src), ctx); } @@ -505,8 +510,10 @@ static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_STR_I(src[1], ARM_SP, STACK_VAR(reg[1])), ctx); - emit(ARM_STR_I(src[0], ARM_SP, STACK_VAR(reg[0])), ctx); + emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), + ctx); + emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), + ctx); } else { if (reg[1] != src[1]) emit(ARM_MOV_R(reg[1], src[1]), ctx); @@ -1103,16 +1110,15 @@ static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) } // push the scratch stack register on top of the stack -static inline void emit_push_r64(const s8 src[], const u8 shift, - struct jit_ctx *ctx) +static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) { const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rt; u16 reg_set = 0; - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); - emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx); + rt = arm_bpf_get_reg64(src, tmp2, ctx); - reg_set = (1 << tmp2[1]) | (1 << tmp2[0]); + reg_set = (1 << rt[1]) | (1 << rt[0]); emit(ARM_PUSH(reg_set), ctx); } @@ -1155,8 +1161,8 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_R(r3, r4), ctx); emit(ARM_MOV_R(r2, r0), ctx); /* Initialize Tail Count */ - emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx); - emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx); + emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); + emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); /* end of prologue */ } @@ -1606,9 +1612,9 @@ go_jmp: emit_a32_mov_r64(true, r0, r1, ctx); emit_a32_mov_r64(true, r1, r2, ctx); - emit_push_r64(r5, 0, ctx); - emit_push_r64(r4, 8, ctx); - emit_push_r64(r3, 16, ctx); + emit_push_r64(r5, ctx); + emit_push_r64(r4, ctx); + emit_push_r64(r3, ctx); emit_a32_mov_i(tmp[1], func, ctx); emit_blx_r(tmp[1], ctx); From 1ca3b17b777c4136f9dba0195e13502c445f4ade Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:07 +0100 Subject: [PATCH 33/61] ARM: net: bpf: imm12 constant conversion Provide a version of the imm8m() function that the compiler can optimise when used with a constant expression. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 50 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 753b5b2b2e3d..2cc66aa44dfe 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -236,10 +236,56 @@ static inline void emit(u32 inst, struct jit_ctx *ctx) _emit(ARM_COND_AL, inst, ctx); } +/* + * This is rather horrid, but necessary to convert an integer constant + * to an immediate operand for the opcodes, and be able to detect at + * build time whether the constant can't be converted (iow, usable in + * BUILD_BUG_ON()). + */ +#define imm12val(v, s) (rol32(v, (s)) | (s) << 7) +#define const_imm8m(x) \ + ({ int r; \ + u32 v = (x); \ + if (!(v & ~0x000000ff)) \ + r = imm12val(v, 0); \ + else if (!(v & ~0xc000003f)) \ + r = imm12val(v, 2); \ + else if (!(v & ~0xf000000f)) \ + r = imm12val(v, 4); \ + else if (!(v & ~0xfc000003)) \ + r = imm12val(v, 6); \ + else if (!(v & ~0xff000000)) \ + r = imm12val(v, 8); \ + else if (!(v & ~0x3fc00000)) \ + r = imm12val(v, 10); \ + else if (!(v & ~0x0ff00000)) \ + r = imm12val(v, 12); \ + else if (!(v & ~0x03fc0000)) \ + r = imm12val(v, 14); \ + else if (!(v & ~0x00ff0000)) \ + r = imm12val(v, 16); \ + else if (!(v & ~0x003fc000)) \ + r = imm12val(v, 18); \ + else if (!(v & ~0x000ff000)) \ + r = imm12val(v, 20); \ + else if (!(v & ~0x0003fc00)) \ + r = imm12val(v, 22); \ + else if (!(v & ~0x0000ff00)) \ + r = imm12val(v, 24); \ + else if (!(v & ~0x00003fc0)) \ + r = imm12val(v, 26); \ + else if (!(v & ~0x00000ff0)) \ + r = imm12val(v, 28); \ + else if (!(v & ~0x000003fc)) \ + r = imm12val(v, 30); \ + else \ + r = -1; \ + r; }) + /* * Checks if immediate value can be converted to imm12(12 bits) value. */ -static int16_t imm8m(u32 x) +static int imm8m(u32 x) { u32 rot; @@ -249,6 +295,8 @@ static int16_t imm8m(u32 x) return -1; } +#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x)) + static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) { op |= rt << 12 | rn << 16; From 828e2b90e8e9b5bd844a25f22ceeb8df4dd18b07 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:12 +0100 Subject: [PATCH 34/61] ARM: net: bpf: use immediate forms of instructions where possible Rather than moving constants to a register and then using them in a subsequent instruction, use them directly in the desired instruction cutting out the "middle" register. This removes two instructions from the tail call code path. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 21 ++++++++++++--------- arch/arm/net/bpf_jit_32.h | 1 + 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 2cc66aa44dfe..645653e1931e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -304,7 +304,7 @@ static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) op |= ARM_INST_LDST__U; else imm12 = -imm12; - return op | (imm12 & 0xfff); + return op | (imm12 & ARM_INST_LDST__IMM12); } static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) @@ -1054,17 +1054,19 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) - u32 off, lo, hi; + u32 lo, hi; s8 r_array, r_index; + int off; /* if (index >= array->map.max_entries) * goto out; */ + BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > + ARM_INST_LDST__IMM12); off = offsetof(struct bpf_array, map.max_entries); /* array->map.max_entries */ - emit_a32_mov_i(tmp[1], off, ctx); r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); - emit(ARM_LDR_R(tmp[1], r_array, tmp[1]), ctx); + emit(ARM_LDR_I(tmp[1], r_array, off), ctx); /* index is 32-bit for arrays */ r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); /* index >= array->map.max_entries */ @@ -1089,10 +1091,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * if (prog == NULL) * goto out; */ - off = offsetof(struct bpf_array, ptrs); - emit_a32_mov_i(tmp[1], off, ctx); + BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); + off = imm8m(offsetof(struct bpf_array, ptrs)); r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); - emit(ARM_ADD_R(tmp[1], r_array, tmp[1]), ctx); + emit(ARM_ADD_I(tmp[1], r_array, off), ctx); r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); @@ -1100,9 +1102,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); /* goto *(prog->bpf_func + prologue_size); */ + BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) > + ARM_INST_LDST__IMM12); off = offsetof(struct bpf_prog, bpf_func); - emit_a32_mov_i(tmp2[1], off, ctx); - emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); + emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); emit_bx_r(tmp[1], ctx); diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index c55bc39d3e22..dee8a76fb0bc 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -78,6 +78,7 @@ #define ARM_INST_EOR_I 0x02200000 #define ARM_INST_LDST__U 0x00800000 +#define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 #define ARM_INST_LDRH_I 0x015000b0 From 2b6958ef1151452cb2160fde75a5c5382b512c34 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:17 +0100 Subject: [PATCH 35/61] ARM: net: bpf: use ldr instructions with shifted rm register Rather than pre-shifting the rm register for the ldr in the tail call, shift it in the load instruction. This eliminates one unnecessary instruction. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 3 +-- arch/arm/net/bpf_jit_32.h | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 645653e1931e..e22dc828420c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1096,8 +1096,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); emit(ARM_ADD_I(tmp[1], r_array, off), ctx); r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); - emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx); - emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); + emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index dee8a76fb0bc..e541a7a6139a 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -188,6 +188,10 @@ #define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \ | (rt) << 12 | (rn) << 16 \ | (rm)) +#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \ + (ARM_INST_LDR_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ + | (imm) << 7 | (type) << 5 | (rm)) #define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \ | (rt) << 12 | (rn) << 16 \ | (rm)) From aaffd2f5c3d58f154ca7b3d104a2ee6b6e40bc6b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:22 +0100 Subject: [PATCH 36/61] ARM: net: bpf: avoid reloading 'index' Avoid reloading 'index' after we have validated it - it remains in tmp2[1] up to the point that we begin the code to index the pointer array, so with a little rearrangement of the registers, we can use the already loaded value. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e22dc828420c..0a8b3d0903c4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1073,6 +1073,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); + /* tmp2[1] = index */ + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; @@ -1093,9 +1095,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); off = imm8m(offsetof(struct bpf_array, ptrs)); - r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); emit(ARM_ADD_I(tmp[1], r_array, off), ctx); - r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); From b50452299864fbc00a576241e1490541c8754d50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:28 +0100 Subject: [PATCH 37/61] ARM: net: bpf: avoid reloading 'array' Rearranging the order of the initial tail call code a little allows is to avoid reloading the 'array' pointer. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 0a8b3d0903c4..f0cad9692952 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1064,16 +1064,16 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > ARM_INST_LDST__IMM12); off = offsetof(struct bpf_array, map.max_entries); - /* array->map.max_entries */ - r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); - emit(ARM_LDR_I(tmp[1], r_array, off), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); /* index is 32-bit for arrays */ r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); + /* array->map.max_entries */ + emit(ARM_LDR_I(tmp[1], r_array, off), ctx); /* index >= array->map.max_entries */ emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); - /* tmp2[1] = index */ + /* tmp2[0] = array, tmp2[1] = index */ /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; @@ -1095,7 +1095,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); off = imm8m(offsetof(struct bpf_array, ptrs)); - r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); emit(ARM_ADD_I(tmp[1], r_array, off), ctx); emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); From bef8968df8a6e3eb91081d68affc64b8d87d5721 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:33 +0100 Subject: [PATCH 38/61] ARM: net: bpf: always use odd/even register pair Always use an odd/even register pair for our 64-bit registers, so that we're able to use the double-word load/store instructions in the future. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f0cad9692952..006ff9615850 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -47,27 +47,27 @@ * The callee saved registers depends on whether frame pointers are enabled. * With frame pointers (to be compliant with the ABI): * - * high - * original ARM_SP => +------------------+ \ - * | pc | | - * current ARM_FP => +------------------+ } callee saved registers - * |r4-r8,r10,fp,ip,lr| | - * +------------------+ / - * low + * high + * original ARM_SP => +--------------+ \ + * | pc | | + * current ARM_FP => +--------------+ } callee saved registers + * |r4-r9,fp,ip,lr| | + * +--------------+ / + * low * * Without frame pointers: * - * high - * original ARM_SP => +------------------+ - * | r4-r8,r10,fp,lr | callee saved registers - * current ARM_FP => +------------------+ - * low + * high + * original ARM_SP => +--------------+ + * | r4-r9,fp,lr | callee saved registers + * current ARM_FP => +--------------+ + * low * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ - 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ + 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ 1 << ARM_FP) #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) @@ -157,7 +157,7 @@ static const s8 bpf2a32[][2] = { * for constant blindings and others. */ [TMP_REG_1] = {ARM_R7, ARM_R6}, - [TMP_REG_2] = {ARM_R10, ARM_R8}, + [TMP_REG_2] = {ARM_R9, ARM_R8}, /* Tail call count. Stored on stack scratch space. */ [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, /* temporary register for blinding constants. From 8c9602d38c7262664c31332101f540c1e179797d Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:38 +0100 Subject: [PATCH 39/61] ARM: net: bpf: use double-word load/stores where available Use double-word load and stores where support for this instruction is supported by the CPU architecture. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 57 ++++++++++++++++++++++++++++++--------- arch/arm/net/bpf_jit_32.h | 2 ++ 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 006ff9615850..a9f68a924800 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "bpf_jit_32.h" @@ -192,6 +193,7 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; + unsigned int cpu_architecture; u32 flags; u32 *offsets; u32 *target; @@ -319,10 +321,12 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) #define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) +#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) +#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) #define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) /* @@ -533,10 +537,16 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), - ctx); - emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), - ctx); + if (__LINUX_ARM_ARCH__ >= 6 || + ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { + emit(ARM_LDRD_I(tmp[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + } else { + emit(ARM_LDR_I(tmp[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + emit(ARM_LDR_I(tmp[0], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); + } reg = tmp; } return reg; @@ -558,10 +568,16 @@ static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), - ctx); - emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), - ctx); + if (__LINUX_ARM_ARCH__ >= 6 || + ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { + emit(ARM_STRD_I(src[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + } else { + emit(ARM_STR_I(src[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + emit(ARM_STR_I(src[0], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); + } } else { if (reg[1] != src[1]) emit(ARM_MOV_R(reg[1], src[1]), ctx); @@ -711,13 +727,27 @@ static inline void emit_a32_mov_r(const s8 dst, const s8 src, static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { - emit_a32_mov_r(dst_lo, src_lo, ctx); - if (is64) { - /* complete 8 byte move */ - emit_a32_mov_r(dst_hi, src_hi, ctx); - } else { + if (!is64) { + emit_a32_mov_r(dst_lo, src_lo, ctx); /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); + } else if (__LINUX_ARM_ARCH__ < 6 && + ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { + /* complete 8 byte move */ + emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); + } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + + emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); + emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); + } else if (is_stacked(src_lo)) { + emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); + } else if (is_stacked(dst_lo)) { + emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); + } else { + emit(ARM_MOV_R(dst[0], src[0]), ctx); + emit(ARM_MOV_R(dst[1], src[1]), ctx); } } @@ -1778,6 +1808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; + ctx.cpu_architecture = cpu_architecture(); /* Not able to allocate memory for offsets[] , then * we must fall back to the interpreter diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index e541a7a6139a..f4e58bcdaa43 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -81,6 +81,7 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 +#define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 #define ARM_INST_LDR_I 0x05100000 @@ -128,6 +129,7 @@ #define ARM_INST_STR_I 0x05000000 #define ARM_INST_STRB_I 0x05400000 +#define ARM_INST_STRD_I 0x014000f0 #define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 From f9ff5018c13b4918c98178ee81d41372b455201e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 12 Jul 2018 21:50:41 +0100 Subject: [PATCH 40/61] ARM: net: bpf: improve 64-bit load immediate implementation Rather than writing each 32-bit half of the 64-bit immediate value separately when the register is on the stack: movw r6, #45056 ; 0xb000 movt r6, #60979 ; 0xee33 str r6, [fp, #-44] ; 0xffffffd4 mov r6, #0 str r6, [fp, #-40] ; 0xffffffd8 arrange to use the double-word store when available instead: movw r6, #45056 ; 0xb000 movt r6, #60979 ; 0xee33 mov r7, #0 strd r6, [fp, #-44] ; 0xffffffd4 Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a9f68a924800..6558bd73bbb9 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -599,9 +599,20 @@ static inline void emit_a32_mov_i(const s8 dst, const u32 val, } } +static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx) +{ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + + emit_mov_i(rd[1], (u32)val, ctx); + emit_mov_i(rd[0], val >> 32, ctx); + + arm_bpf_put_reg64(dst, rd, ctx); +} + /* Sign extended move */ -static inline void emit_a32_mov_i64(const bool is64, const s8 dst[], - const u32 val, struct jit_ctx *ctx) { +static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[], + const u32 val, struct jit_ctx *ctx) { u32 hi = 0; if (is64 && (val & (1<<31))) @@ -1309,7 +1320,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_K: /* Sign-extend immediate value to destination reg */ - emit_a32_mov_i64(is64, dst, imm, ctx); + emit_a32_mov_se_i64(is64, dst, imm, ctx); break; } break; @@ -1358,7 +1369,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * value into temporary reg and then it would be * safe to do the operation on it. */ - emit_a32_mov_i64(is64, tmp2, imm, ctx); + emit_a32_mov_se_i64(is64, tmp2, imm, ctx); emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); break; } @@ -1454,7 +1465,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * reg then it would be safe to do the operation * on it. */ - emit_a32_mov_i64(is64, tmp2, imm, ctx); + emit_a32_mov_se_i64(is64, tmp2, imm, ctx); emit_a32_mul_r64(dst, tmp2, ctx); break; } @@ -1506,12 +1517,9 @@ exit: /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: { - const struct bpf_insn insn1 = insn[1]; - u32 hi, lo = imm; + u64 val = (u32)imm | (u64)insn[1].imm << 32; - hi = insn1.imm; - emit_a32_mov_i(dst_lo, lo, ctx); - emit_a32_mov_i(dst_hi, hi, ctx); + emit_a32_mov_i64(dst, val, ctx); return 1; } @@ -1531,7 +1539,7 @@ exit: switch (BPF_SIZE(code)) { case BPF_DW: /* Sign-extend immediate value into temp reg */ - emit_a32_mov_i64(true, tmp2, imm, ctx); + emit_a32_mov_se_i64(true, tmp2, imm, ctx); emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_W); emit_str_r(dst_lo, tmp2[0], off+4, ctx, BPF_W); break; @@ -1620,7 +1628,7 @@ exit: rm = tmp2[0]; rn = tmp2[1]; /* Sign-extend immediate value */ - emit_a32_mov_i64(true, tmp2, imm, ctx); + emit_a32_mov_se_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ rd = arm_bpf_get_reg64(dst, tmp, ctx); From 077513b89424ba814432b1cd7d7f793958059ed8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 12 Jul 2018 21:50:46 +0100 Subject: [PATCH 41/61] ARM: net: bpf: improve 64-bit sign-extended immediate load Improve the 64-bit sign-extended immediate from: mov r6, #1 str r6, [fp, #-52] ; 0xffffffcc mov r6, #0 str r6, [fp, #-48] ; 0xffffffd0 to: mov r6, #1 mov r7, #0 strd r6, [fp, #-52] ; 0xffffffcc Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6558bd73bbb9..3a182e618441 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -613,12 +613,11 @@ static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx) /* Sign extended move */ static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[], const u32 val, struct jit_ctx *ctx) { - u32 hi = 0; + u64 val64 = val; if (is64 && (val & (1<<31))) - hi = (u32)~0; - emit_a32_mov_i(dst_lo, val, ctx); - emit_a32_mov_i(dst_hi, hi, ctx); + val64 |= 0xffffffff00000000ULL; + emit_a32_mov_i64(dst, val64, ctx); } static inline void emit_a32_add_r(const u8 dst, const u8 src, From c5eae692571d6c44efeb27fe78a5ed8b5ded9b4a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 12 Jul 2018 21:50:51 +0100 Subject: [PATCH 42/61] ARM: net: bpf: improve 64-bit store implementation Improve the 64-bit store implementation from: ldr r6, [fp, #-8] str r8, [r6] ldr r6, [fp, #-8] mov r7, #4 add r7, r6, r7 str r9, [r7] to: ldr r6, [fp, #-8] str r8, [r6] str r9, [r6, #4] We leave the store as two separate STR instructions rather than using STRD as the store may not be aligned, and STR can handle misalignment. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 52 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 3a182e618441..026612ee8151 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -975,29 +975,42 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], } /* *(size *)(dst + off) = src */ -static inline void emit_str_r(const s8 dst, const s8 src, - const s32 off, struct jit_ctx *ctx, const u8 sz){ +static inline void emit_str_r(const s8 dst, const s8 src[], + s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; + s32 off_max; s8 rd; rd = arm_bpf_get_reg32(dst, tmp[1], ctx); - if (off) { + + if (sz == BPF_H) + off_max = 0xff; + else + off_max = 0xfff; + + if (off < 0 || off > off_max) { emit_a32_mov_i(tmp[0], off, ctx); - emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; + off = 0; } switch (sz) { - case BPF_W: - /* Store a Word */ - emit(ARM_STR_I(src, rd, 0), ctx); + case BPF_B: + /* Store a Byte */ + emit(ARM_STRB_I(src_lo, rd, off), ctx); break; case BPF_H: /* Store a HalfWord */ - emit(ARM_STRH_I(src, rd, 0), ctx); + emit(ARM_STRH_I(src_lo, rd, off), ctx); break; - case BPF_B: - /* Store a Byte */ - emit(ARM_STRB_I(src, rd, 0), ctx); + case BPF_W: + /* Store a Word */ + emit(ARM_STR_I(src_lo, rd, off), ctx); + break; + case BPF_DW: + /* Store a Double Word */ + emit(ARM_STR_I(src_lo, rd, off), ctx); + emit(ARM_STR_I(src_hi, rd, off + 4), ctx); break; } } @@ -1539,16 +1552,14 @@ exit: case BPF_DW: /* Sign-extend immediate value into temp reg */ emit_a32_mov_se_i64(true, tmp2, imm, ctx); - emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_W); - emit_str_r(dst_lo, tmp2[0], off+4, ctx, BPF_W); break; case BPF_W: case BPF_H: case BPF_B: emit_a32_mov_i(tmp2[1], imm, ctx); - emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_SIZE(code)); break; } + emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); break; /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: @@ -1560,20 +1571,9 @@ exit: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - { - u8 sz = BPF_SIZE(code); - rs = arm_bpf_get_reg64(src, tmp2, ctx); - - /* Store the value */ - if (BPF_SIZE(code) == BPF_DW) { - emit_str_r(dst_lo, rs[1], off, ctx, BPF_W); - emit_str_r(dst_lo, rs[0], off+4, ctx, BPF_W); - } else { - emit_str_r(dst_lo, rs[1], off, ctx, sz); - } + emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code)); break; - } /* PC += off if dst == src */ /* PC += off if dst > src */ /* PC += off if dst >= src */ From b18bea2a45b136c4da7ff75305b54d66cf57de81 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 12 Jul 2018 21:50:56 +0100 Subject: [PATCH 43/61] ARM: net: bpf: improve 64-bit ALU implementation Improbe the 64-bit ALU implementation from: movw r8, #65532 movt r8, #65535 movw r9, #65535 movt r9, #65535 ldr r7, [fp, #-44] adds r7, r7, r8 str r7, [fp, #-44] ldr r7, [fp, #-40] adc r7, r7, r9 str r7, [fp, #-40] to: movw r8, #65532 movt r8, #65535 movw r9, #65535 movt r9, #65535 ldrd r6, [fp, #-44] adds r6, r6, r8 adc r7, r7, r9 strd r6, [fp, #-44] Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 026612ee8151..25b3ee85066e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -716,11 +716,30 @@ static inline void emit_a32_alu_r(const s8 dst, const s8 src, static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx, const u8 op) { - emit_a32_alu_r(dst_lo, src_lo, ctx, is64, false, op); - if (is64) - emit_a32_alu_r(dst_hi, src_hi, ctx, is64, true, op); - else - emit_a32_mov_i(dst_hi, 0, ctx); + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; + + rd = arm_bpf_get_reg64(dst, tmp, ctx); + if (is64) { + const s8 *rs; + + rs = arm_bpf_get_reg64(src, tmp2, ctx); + + /* ALU operation */ + emit_alu_r(rd[1], rs[1], true, false, op, ctx); + emit_alu_r(rd[0], rs[0], true, true, op, ctx); + } else { + s8 rs; + + rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + + /* ALU operation */ + emit_alu_r(rd[1], rs, true, false, op, ctx); + emit_a32_mov_i(rd[0], 0, ctx); + } + + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = src (4 bytes)*/ From 4f91da26c81145f255cb153152ffed70014b1c41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:38 -0700 Subject: [PATCH 44/61] xdp: add per mode attributes for attached programs In preparation for support of simultaneous driver and hardware XDP support add per-mode attributes. The catch-all IFLA_XDP_PROG_ID will still be reported, but user space can now also access the program ID in a new IFLA_XDP__PROG_ID attribute. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/uapi/linux/if_link.h | 3 +++ net/core/rtnetlink.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cf01b6824244..bc86c2b105ec 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -928,6 +928,9 @@ enum { IFLA_XDP_ATTACHED, IFLA_XDP_FLAGS, IFLA_XDP_PROG_ID, + IFLA_XDP_DRV_PROG_ID, + IFLA_XDP_SKB_PROG_ID, + IFLA_XDP_HW_PROG_ID, __IFLA_XDP_MAX, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ef61222fdef..b40242459907 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -964,7 +964,8 @@ static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ nla_total_size(1) + /* XDP_ATTACHED */ - nla_total_size(4); /* XDP_PROG_ID */ + nla_total_size(4) + /* XDP_PROG_ID */ + nla_total_size(4); /* XDP__PROG_ID */ return xdp_size; } @@ -1378,16 +1379,17 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { + u32 prog_attr, prog_id; struct nlattr *xdp; - u32 prog_id; int err; + u8 mode; xdp = nla_nest_start(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; - err = nla_put_u8(skb, IFLA_XDP_ATTACHED, - rtnl_xdp_attached_mode(dev, &prog_id)); + mode = rtnl_xdp_attached_mode(dev, &prog_id); + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode); if (err) goto err_cancel; @@ -1395,6 +1397,26 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id); if (err) goto err_cancel; + + switch (mode) { + case XDP_ATTACHED_DRV: + prog_attr = IFLA_XDP_DRV_PROG_ID; + break; + case XDP_ATTACHED_SKB: + prog_attr = IFLA_XDP_SKB_PROG_ID; + break; + case XDP_ATTACHED_HW: + prog_attr = IFLA_XDP_HW_PROG_ID; + break; + case XDP_ATTACHED_NONE: + default: + err = -EINVAL; + goto err_cancel; + } + + err = nla_put_u32(skb, prog_attr, prog_id); + if (err) + goto err_cancel; } nla_nest_end(skb, xdp); From 6b8675897338f874c41612655a85d8e10cdb23d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:39 -0700 Subject: [PATCH 45/61] xdp: don't make drivers report attachment mode prog_attached of struct netdev_bpf should have been superseded by simply setting prog_id long time ago, but we kept it around to allow offloading drivers to communicate attachment mode (drv vs hw). Subsequently drivers were also allowed to report back attachment flags (prog_flags), and since nowadays only programs attached will XDP_FLAGS_HW_MODE can get offloaded, we can tell the attachment mode from the flags driver reports. Remove prog_attached member. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 1 - drivers/net/ethernet/cavium/thunder/nicvf_main.c | 1 - drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 - drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 - drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 1 - drivers/net/netdevsim/bpf.c | 1 - drivers/net/tun.c | 1 - drivers/net/virtio_net.c | 1 - include/linux/netdevice.h | 5 ----- net/core/dev.c | 7 +++---- net/core/rtnetlink.c | 8 ++++++-- 15 files changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 1f0e872d0667..0584d07c8c33 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -219,7 +219,6 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) rc = bnxt_xdp_set(bp, xdp->prog); break; case XDP_QUERY_PROG: - xdp->prog_attached = !!bp->xdp_prog; xdp->prog_id = bp->xdp_prog ? bp->xdp_prog->aux->id : 0; rc = 0; break; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 135766c4296b..768f584f8392 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1848,7 +1848,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) case XDP_SETUP_PROG: return nicvf_xdp_setup(nic, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = !!nic->xdp_prog; xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; return 0; default: diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 426b0ccb1fc6..51762428b40e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11841,7 +11841,6 @@ static int i40e_xdp(struct net_device *dev, case XDP_SETUP_PROG: return i40e_xdp_setup(vsi, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = i40e_enabled_xdp_vsi(vsi); xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; return 0; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a8e21becb619..3862fea1c923 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9966,7 +9966,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_SETUP_PROG: return ixgbe_xdp_setup(dev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = !!(adapter->xdp_prog); xdp->prog_id = adapter->xdp_prog ? adapter->xdp_prog->aux->id : 0; return 0; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 59416eddd840..d86446d202d5 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4462,7 +4462,6 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_SETUP_PROG: return ixgbevf_xdp_setup(dev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = !!(adapter->xdp_prog); xdp->prog_id = adapter->xdp_prog ? adapter->xdp_prog->aux->id : 0; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 65eb06e017e4..6785661d1a72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2926,7 +2926,6 @@ static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp) return mlx4_xdp_set(dev, xdp->prog); case XDP_QUERY_PROG: xdp->prog_id = mlx4_xdp_query(dev); - xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bbd2fd0b2e06..e4a9a0768a81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4192,7 +4192,6 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) return mlx5e_xdp_set(dev, xdp->prog); case XDP_QUERY_PROG: xdp->prog_id = mlx5e_xdp_query(dev); - xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7df5ca37bfb8..d20714598613 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3459,9 +3459,6 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags, xdp->extack); case XDP_QUERY_PROG: - xdp->prog_attached = !!nn->xdp_prog; - if (nn->dp.bpf_offload_xdp) - xdp->prog_attached = XDP_ATTACHED_HW; xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; return 0; diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index b823bfe2ea4d..f9a327c821eb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1116,7 +1116,6 @@ int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_SETUP_PROG: return qede_xdp_set(edev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = !!edev->xdp_prog; xdp->prog_id = edev->xdp_prog ? edev->xdp_prog->aux->id : 0; return 0; default: diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 75c25306d234..712e6f918065 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -567,7 +567,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) nsim_bpf_destroy_prog(bpf->offload.prog); return 0; case XDP_QUERY_PROG: - bpf->prog_attached = ns->xdp_prog_mode; bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0; bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0; return 0; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a192a017cc68..49a50219d0da 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1268,7 +1268,6 @@ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp) return tun_xdp_set(dev, xdp->prog, xdp->extack); case XDP_QUERY_PROG: xdp->prog_id = tun_xdp_query(dev); - xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 53085c63277b..2ff08bc103a9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2343,7 +2343,6 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) return virtnet_xdp_set(dev, xdp->prog, xdp->extack); case XDP_QUERY_PROG: xdp->prog_id = virtnet_xdp_query(dev); - xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b683971e500d..69a664789b33 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -819,10 +819,6 @@ enum bpf_netdev_command { */ XDP_SETUP_PROG, XDP_SETUP_PROG_HW, - /* Check if a bpf program is set on the device. The callee should - * set @prog_attached to one of XDP_ATTACHED_* values, note that "true" - * is equivalent to XDP_ATTACHED_DRV. - */ XDP_QUERY_PROG, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_VERIFIER_PREP, @@ -849,7 +845,6 @@ struct netdev_bpf { }; /* XDP_QUERY_PROG */ struct { - u8 prog_attached; u32 prog_id; /* flags with which program was installed */ u32 prog_flags; diff --git a/net/core/dev.c b/net/core/dev.c index 89825c1eccdc..9fa3b3705a8e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4926,7 +4926,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) break; case XDP_QUERY_PROG: - xdp->prog_attached = !!old; xdp->prog_id = old ? old->aux->id : 0; break; @@ -7593,13 +7592,13 @@ void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, WARN_ON(bpf_op(dev, xdp) < 0); } -static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op) +static bool __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op) { struct netdev_bpf xdp; __dev_xdp_query(dev, bpf_op, &xdp); - return xdp.prog_attached; + return xdp.prog_id; } static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, @@ -7634,7 +7633,7 @@ static void dev_xdp_uninstall(struct net_device *dev) return; __dev_xdp_query(dev, ndo_bpf, &xdp); - if (xdp.prog_attached == XDP_ATTACHED_NONE) + if (!xdp.prog_id) return; /* Program removal should always succeed */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b40242459907..02ebc056a688 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1372,9 +1372,13 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) return XDP_ATTACHED_NONE; __dev_xdp_query(dev, ops->ndo_bpf, &xdp); - *prog_id = xdp.prog_id; + if (!xdp.prog_id) + return XDP_ATTACHED_NONE; - return xdp.prog_attached; + *prog_id = xdp.prog_id; + if (xdp.prog_flags & XDP_FLAGS_HW_MODE) + return XDP_ATTACHED_HW; + return XDP_ATTACHED_DRV; } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) From 05296620f6d14dce0030b87e1e57891a770fb65c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:40 -0700 Subject: [PATCH 46/61] xdp: factor out common program/flags handling from drivers Basic operations drivers perform during xdp setup and query can be moved to helpers in the core. Encapsulate program and flags into a structure and add helpers. Note that the structure is intended as the "main" program information source in the driver. Most drivers will additionally place the program pointer in their fast path or ring structures. The helpers don't have a huge impact now, but they will decrease the code duplication when programs can be installed in HW and driver at the same time. Encapsulating the basic operations in helpers will hopefully also reduce the number of changes to drivers which adopt them. Helpers could really be static inline, but they depend on definition of struct netdev_bpf which means they'd have to be placed in netdevice.h, an already 4500 line header. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 6 ++-- .../ethernet/netronome/nfp/nfp_net_common.c | 28 ++++++--------- drivers/net/netdevsim/bpf.c | 16 +++------ drivers/net/netdevsim/netdevsim.h | 4 +-- include/net/xdp.h | 13 +++++++ net/core/xdp.c | 34 +++++++++++++++++++ tools/testing/selftests/bpf/test_offload.py | 4 +-- 7 files changed, 67 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 2a71a9ffd095..2021dda595b7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -553,8 +553,7 @@ struct nfp_net_dp { * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table - * @xdp_flags: Flags with which XDP prog was loaded - * @xdp_prog: XDP prog (for ctrl path, both DRV and HW modes) + * @xdp: Information about the attached XDP program * @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware @@ -610,8 +609,7 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; - u32 xdp_flags; - struct bpf_prog *xdp_prog; + struct xdp_attachment_info xdp; unsigned int max_tx_rings; unsigned int max_rx_rings; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index d20714598613..4bb589dbffbc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3417,34 +3417,29 @@ nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog, return nfp_net_ring_reconfig(nn, dp, extack); } -static int -nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags, - struct netlink_ext_ack *extack) +static int nfp_net_xdp_setup(struct nfp_net *nn, struct netdev_bpf *bpf) { struct bpf_prog *drv_prog, *offload_prog; int err; - if (nn->xdp_prog && (flags ^ nn->xdp_flags) & XDP_FLAGS_MODES) + if (!xdp_attachment_flags_ok(&nn->xdp, bpf)) return -EBUSY; /* Load both when no flags set to allow easy activation of driver path * when program is replaced by one which can't be offloaded. */ - drv_prog = flags & XDP_FLAGS_HW_MODE ? NULL : prog; - offload_prog = flags & XDP_FLAGS_DRV_MODE ? NULL : prog; + drv_prog = bpf->flags & XDP_FLAGS_HW_MODE ? NULL : bpf->prog; + offload_prog = bpf->flags & XDP_FLAGS_DRV_MODE ? NULL : bpf->prog; - err = nfp_net_xdp_setup_drv(nn, drv_prog, extack); + err = nfp_net_xdp_setup_drv(nn, drv_prog, bpf->extack); if (err) return err; - err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack); - if (err && flags & XDP_FLAGS_HW_MODE) + err = nfp_app_xdp_offload(nn->app, nn, offload_prog, bpf->extack); + if (err && bpf->flags & XDP_FLAGS_HW_MODE) return err; - if (nn->xdp_prog) - bpf_prog_put(nn->xdp_prog); - nn->xdp_prog = prog; - nn->xdp_flags = flags; + xdp_attachment_setup(&nn->xdp, bpf); return 0; } @@ -3456,12 +3451,9 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: case XDP_SETUP_PROG_HW: - return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags, - xdp->extack); + return nfp_net_xdp_setup(nn, xdp); case XDP_QUERY_PROG: - xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; - xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; - return 0; + return xdp_attachment_query(&nn->xdp, xdp); default: return nfp_app_bpf(nn->app, nn, xdp); } diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 712e6f918065..c485d97b5df4 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -199,10 +199,8 @@ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf) { int err; - if (ns->xdp_prog && (bpf->flags ^ ns->xdp_flags) & XDP_FLAGS_MODES) { - NSIM_EA(bpf->extack, "program loaded with different flags"); + if (!xdp_attachment_flags_ok(&ns->xdp, bpf)) return -EBUSY; - } if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); @@ -219,11 +217,7 @@ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf) return err; } - if (ns->xdp_prog) - bpf_prog_put(ns->xdp_prog); - - ns->xdp_prog = bpf->prog; - ns->xdp_flags = bpf->flags; + xdp_attachment_setup(&ns->xdp, bpf); if (!bpf->prog) ns->xdp_prog_mode = XDP_ATTACHED_NONE; @@ -567,9 +561,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) nsim_bpf_destroy_prog(bpf->offload.prog); return 0; case XDP_QUERY_PROG: - bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0; - bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0; - return 0; + return xdp_attachment_query(&ns->xdp, bpf); case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); if (err) @@ -636,6 +628,6 @@ void nsim_bpf_uninit(struct netdevsim *ns) { WARN_ON(!list_empty(&ns->bpf_bound_progs)); WARN_ON(!list_empty(&ns->bpf_bound_maps)); - WARN_ON(ns->xdp_prog); + WARN_ON(ns->xdp.prog); WARN_ON(ns->bpf_offloaded); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index d8a7cc995e88..69ffb4a2d14b 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -18,6 +18,7 @@ #include #include #include +#include #define DRV_NAME "netdevsim" @@ -67,9 +68,8 @@ struct netdevsim { struct bpf_prog *bpf_offloaded; u32 bpf_offloaded_id; - u32 xdp_flags; + struct xdp_attachment_info xdp; int xdp_prog_mode; - struct bpf_prog *xdp_prog; u32 prog_id_gen; diff --git a/include/net/xdp.h b/include/net/xdp.h index 2deea7166a34..fcb033f51d8c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -144,4 +144,17 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) return unlikely(xdp->data_meta > xdp->data); } +struct xdp_attachment_info { + struct bpf_prog *prog; + u32 flags; +}; + +struct netdev_bpf; +int xdp_attachment_query(struct xdp_attachment_info *info, + struct netdev_bpf *bpf); +bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, + struct netdev_bpf *bpf); +void xdp_attachment_setup(struct xdp_attachment_info *info, + struct netdev_bpf *bpf); + #endif /* __LINUX_NET_XDP_H__ */ diff --git a/net/core/xdp.c b/net/core/xdp.c index 31c58719b5a9..57285383ed00 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -3,8 +3,11 @@ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. * Released under terms in GPL version 2. See COPYING. */ +#include +#include #include #include +#include #include #include #include @@ -370,3 +373,34 @@ void xdp_return_buff(struct xdp_buff *xdp) __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); } EXPORT_SYMBOL_GPL(xdp_return_buff); + +int xdp_attachment_query(struct xdp_attachment_info *info, + struct netdev_bpf *bpf) +{ + bpf->prog_id = info->prog ? info->prog->aux->id : 0; + bpf->prog_flags = info->prog ? info->flags : 0; + return 0; +} +EXPORT_SYMBOL_GPL(xdp_attachment_query); + +bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, + struct netdev_bpf *bpf) +{ + if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { + NL_SET_ERR_MSG(bpf->extack, + "program loaded with different flags"); + return false; + } + return true; +} +EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); + +void xdp_attachment_setup(struct xdp_attachment_info *info, + struct netdev_bpf *bpf) +{ + if (info->prog) + bpf_prog_put(info->prog); + info->prog = bpf->prog; + info->flags = bpf->flags; +} +EXPORT_SYMBOL_GPL(xdp_attachment_setup); diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index f8d9bd81d9a4..40401e9e9351 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -821,7 +821,7 @@ try: ret, _, err = sim.set_xdp(obj, "", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") - check_extack_nsim(err, "program loaded with different flags.", args) + check_extack(err, "program loaded with different flags.", args) start_test("Test XDP prog remove with bad flags...") ret, _, err = sim.unset_xdp("offload", force=True, @@ -831,7 +831,7 @@ try: ret, _, err = sim.unset_xdp("", force=True, fail=False, include_stderr=True) fail(ret == 0, "Removed program with a bad mode") - check_extack_nsim(err, "program loaded with different flags.", args) + check_extack(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") ret, _ = sim.set_mtu(9000, fail=False) From a25717d2b604347d9af8da81deea7b08e8c94220 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:41 -0700 Subject: [PATCH 47/61] xdp: support simultaneous driver and hw XDP attachment Split the query of HW-attached program from the software one. Introduce new .ndo_bpf command to query HW-attached program. This will allow drivers to install different programs in HW and SW at the same time. Netlink can now also carry multiple programs on dump (in which case mode will be set to XDP_ATTACHED_MULTI and user has to check per-attachment point attributes, IFLA_XDP_PROG_ID will not be present). We reuse IFLA_XDP_PROG_ID skb space for second mode, so rtnl_xdp_size() doesn't need to be updated. Note that the installation side is still not there, since all drivers currently reject installing more than one program at the time. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- .../ethernet/netronome/nfp/nfp_net_common.c | 6 ++ drivers/net/netdevsim/bpf.c | 6 ++ include/linux/netdevice.h | 7 +- include/uapi/linux/if_link.h | 1 + net/core/dev.c | 47 ++++++---- net/core/rtnetlink.c | 93 +++++++++++-------- 6 files changed, 97 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 4bb589dbffbc..bb1e72e8dbc2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3453,6 +3453,12 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) case XDP_SETUP_PROG_HW: return nfp_net_xdp_setup(nn, xdp); case XDP_QUERY_PROG: + if (nn->dp.bpf_offload_xdp) + return 0; + return xdp_attachment_query(&nn->xdp, xdp); + case XDP_QUERY_PROG_HW: + if (!nn->dp.bpf_offload_xdp) + return 0; return xdp_attachment_query(&nn->xdp, xdp); default: return nfp_app_bpf(nn->app, nn, xdp); diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index c485d97b5df4..5544c9b51173 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -561,6 +561,12 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) nsim_bpf_destroy_prog(bpf->offload.prog); return 0; case XDP_QUERY_PROG: + if (ns->xdp_prog_mode != XDP_ATTACHED_DRV) + return 0; + return xdp_attachment_query(&ns->xdp, bpf); + case XDP_QUERY_PROG_HW: + if (ns->xdp_prog_mode != XDP_ATTACHED_HW) + return 0; return xdp_attachment_query(&ns->xdp, bpf); case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69a664789b33..2422c0e88f5c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -820,6 +820,7 @@ enum bpf_netdev_command { XDP_SETUP_PROG, XDP_SETUP_PROG_HW, XDP_QUERY_PROG, + XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_VERIFIER_PREP, BPF_OFFLOAD_TRANSLATE, @@ -843,7 +844,7 @@ struct netdev_bpf { struct bpf_prog *prog; struct netlink_ext_ack *extack; }; - /* XDP_QUERY_PROG */ + /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */ struct { u32 prog_id; /* flags with which program was installed */ @@ -3533,8 +3534,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, - struct netdev_bpf *xdp); +u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, + enum bpf_netdev_command cmd); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bc86c2b105ec..8759cfb8aa2e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -920,6 +920,7 @@ enum { XDP_ATTACHED_DRV, XDP_ATTACHED_SKB, XDP_ATTACHED_HW, + XDP_ATTACHED_MULTI, }; enum { diff --git a/net/core/dev.c b/net/core/dev.c index 9fa3b3705a8e..993cdc3cd086 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7582,21 +7582,19 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, - struct netdev_bpf *xdp) -{ - memset(xdp, 0, sizeof(*xdp)); - xdp->command = XDP_QUERY_PROG; - - /* Query must always succeed. */ - WARN_ON(bpf_op(dev, xdp) < 0); -} - -static bool __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op) +u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, + enum bpf_netdev_command cmd) { struct netdev_bpf xdp; - __dev_xdp_query(dev, bpf_op, &xdp); + if (!bpf_op) + return 0; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = cmd; + + /* Query must always succeed. */ + WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); return xdp.prog_id; } @@ -7632,12 +7630,19 @@ static void dev_xdp_uninstall(struct net_device *dev) if (!ndo_bpf) return; - __dev_xdp_query(dev, ndo_bpf, &xdp); - if (!xdp.prog_id) - return; + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_QUERY_PROG; + WARN_ON(ndo_bpf(dev, &xdp)); + if (xdp.prog_id) + WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, + NULL)); - /* Program removal should always succeed */ - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL)); + /* Remove HW offload */ + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_QUERY_PROG_HW; + if (!ndo_bpf(dev, &xdp) && xdp.prog_id) + WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, + NULL)); } /** @@ -7653,12 +7658,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags) { const struct net_device_ops *ops = dev->netdev_ops; + enum bpf_netdev_command query; struct bpf_prog *prog = NULL; bpf_op_t bpf_op, bpf_chk; int err; ASSERT_RTNL(); + query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; + bpf_op = bpf_chk = ops->ndo_bpf; if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) return -EOPNOTSUPP; @@ -7668,10 +7676,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { - if (bpf_chk && __dev_xdp_attached(dev, bpf_chk)) + if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || + __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, bpf_op)) + __dev_xdp_query(dev, bpf_op, query)) return -EBUSY; prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02ebc056a688..c9929ef17539 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -964,7 +964,7 @@ static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ nla_total_size(1) + /* XDP_ATTACHED */ - nla_total_size(4) + /* XDP_PROG_ID */ + nla_total_size(4) + /* XDP_PROG_ID (or 1st mode) */ nla_total_size(4); /* XDP__PROG_ID */ return xdp_size; @@ -1354,37 +1354,57 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } -static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) +static u32 rtnl_xdp_prog_skb(struct net_device *dev) { - const struct net_device_ops *ops = dev->netdev_ops; const struct bpf_prog *generic_xdp_prog; - struct netdev_bpf xdp; ASSERT_RTNL(); - *prog_id = 0; generic_xdp_prog = rtnl_dereference(dev->xdp_prog); - if (generic_xdp_prog) { - *prog_id = generic_xdp_prog->aux->id; - return XDP_ATTACHED_SKB; - } - if (!ops->ndo_bpf) - return XDP_ATTACHED_NONE; + if (!generic_xdp_prog) + return 0; + return generic_xdp_prog->aux->id; +} - __dev_xdp_query(dev, ops->ndo_bpf, &xdp); - if (!xdp.prog_id) - return XDP_ATTACHED_NONE; +static u32 rtnl_xdp_prog_drv(struct net_device *dev) +{ + return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG); +} - *prog_id = xdp.prog_id; - if (xdp.prog_flags & XDP_FLAGS_HW_MODE) - return XDP_ATTACHED_HW; - return XDP_ATTACHED_DRV; +static u32 rtnl_xdp_prog_hw(struct net_device *dev) +{ + return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, + XDP_QUERY_PROG_HW); +} + +static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev, + u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr, + u32 (*get_prog_id)(struct net_device *dev)) +{ + u32 curr_id; + int err; + + curr_id = get_prog_id(dev); + if (!curr_id) + return 0; + + *prog_id = curr_id; + err = nla_put_u32(skb, attr, curr_id); + if (err) + return err; + + if (*mode != XDP_ATTACHED_NONE) + *mode = XDP_ATTACHED_MULTI; + else + *mode = tgt_mode; + + return 0; } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { - u32 prog_attr, prog_id; struct nlattr *xdp; + u32 prog_id; int err; u8 mode; @@ -1392,35 +1412,26 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) if (!xdp) return -EMSGSIZE; - mode = rtnl_xdp_attached_mode(dev, &prog_id); + prog_id = 0; + mode = XDP_ATTACHED_NONE; + if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB, + IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb)) + goto err_cancel; + if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV, + IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv)) + goto err_cancel; + if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW, + IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw)) + goto err_cancel; + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode); if (err) goto err_cancel; - if (prog_id) { + if (prog_id && mode != XDP_ATTACHED_MULTI) { err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id); if (err) goto err_cancel; - - switch (mode) { - case XDP_ATTACHED_DRV: - prog_attr = IFLA_XDP_DRV_PROG_ID; - break; - case XDP_ATTACHED_SKB: - prog_attr = IFLA_XDP_SKB_PROG_ID; - break; - case XDP_ATTACHED_HW: - prog_attr = IFLA_XDP_HW_PROG_ID; - break; - case XDP_ATTACHED_NONE: - default: - err = -EINVAL; - goto err_cancel; - } - - err = nla_put_u32(skb, prog_attr, prog_id); - if (err) - goto err_cancel; } nla_nest_end(skb, xdp); From 799e173d7125155c00e9492c8212c5e41333049f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:42 -0700 Subject: [PATCH 48/61] netdevsim: add support for simultaneous driver and hw XDP Allow netdevsim to accept driver and offload attachment of XDP BPF programs at the same time. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/netdevsim/bpf.c | 32 +++++++-------------- drivers/net/netdevsim/netdev.c | 3 +- drivers/net/netdevsim/netdevsim.h | 2 +- tools/testing/selftests/bpf/test_offload.py | 8 ------ 4 files changed, 12 insertions(+), 33 deletions(-) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 5544c9b51173..c36d2a768202 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -92,7 +92,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = { static bool nsim_xdp_offload_active(struct netdevsim *ns) { - return ns->xdp_prog_mode == XDP_ATTACHED_HW; + return ns->xdp_hw.prog; } static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) @@ -195,11 +195,13 @@ static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf) return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns)); } -static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf) +static int +nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, + struct xdp_attachment_info *xdp) { int err; - if (!xdp_attachment_flags_ok(&ns->xdp, bpf)) + if (!xdp_attachment_flags_ok(xdp, bpf)) return -EBUSY; if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { @@ -217,14 +219,7 @@ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf) return err; } - xdp_attachment_setup(&ns->xdp, bpf); - - if (!bpf->prog) - ns->xdp_prog_mode = XDP_ATTACHED_NONE; - else if (bpf->command == XDP_SETUP_PROG) - ns->xdp_prog_mode = XDP_ATTACHED_DRV; - else - ns->xdp_prog_mode = XDP_ATTACHED_HW; + xdp_attachment_setup(xdp, bpf); return 0; } @@ -284,10 +279,6 @@ static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled"); return -EINVAL; } - if (nsim_xdp_offload_active(ns)) { - NSIM_EA(bpf->extack, "xdp offload active, can't load drv prog"); - return -EBUSY; - } return 0; } @@ -561,25 +552,21 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) nsim_bpf_destroy_prog(bpf->offload.prog); return 0; case XDP_QUERY_PROG: - if (ns->xdp_prog_mode != XDP_ATTACHED_DRV) - return 0; return xdp_attachment_query(&ns->xdp, bpf); case XDP_QUERY_PROG_HW: - if (ns->xdp_prog_mode != XDP_ATTACHED_HW) - return 0; - return xdp_attachment_query(&ns->xdp, bpf); + return xdp_attachment_query(&ns->xdp_hw, bpf); case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); if (err) return err; - return nsim_xdp_set_prog(ns, bpf); + return nsim_xdp_set_prog(ns, bpf, &ns->xdp); case XDP_SETUP_PROG_HW: err = nsim_setup_prog_hw_checks(ns, bpf); if (err) return err; - return nsim_xdp_set_prog(ns, bpf); + return nsim_xdp_set_prog(ns, bpf, &ns->xdp_hw); case BPF_OFFLOAD_MAP_ALLOC: if (!ns->bpf_map_accept) return -EOPNOTSUPP; @@ -635,5 +622,6 @@ void nsim_bpf_uninit(struct netdevsim *ns) WARN_ON(!list_empty(&ns->bpf_bound_progs)); WARN_ON(!list_empty(&ns->bpf_bound_maps)); WARN_ON(ns->xdp.prog); + WARN_ON(ns->xdp_hw.prog); WARN_ON(ns->bpf_offloaded); } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index b2f9d0df93b0..a7b179f0d954 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -228,8 +228,7 @@ static int nsim_change_mtu(struct net_device *dev, int new_mtu) { struct netdevsim *ns = netdev_priv(dev); - if (ns->xdp_prog_mode == XDP_ATTACHED_DRV && - new_mtu > NSIM_XDP_MAX_MTU) + if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU) return -EBUSY; dev->mtu = new_mtu; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 69ffb4a2d14b..0aeabbe81cc6 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -69,7 +69,7 @@ struct netdevsim { u32 bpf_offloaded_id; struct xdp_attachment_info xdp; - int xdp_prog_mode; + struct xdp_attachment_info xdp_hw; u32 prog_id_gen; diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 40401e9e9351..4f982a0255c2 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -814,20 +814,12 @@ try: "Device parameters reported for non-offloaded program") start_test("Test XDP prog replace with bad flags...") - ret, _, err = sim.set_xdp(obj, "offload", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Replaced XDP program with a program in different mode") - check_extack_nsim(err, "program loaded with different flags.", args) ret, _, err = sim.set_xdp(obj, "", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") check_extack(err, "program loaded with different flags.", args) start_test("Test XDP prog remove with bad flags...") - ret, _, err = sim.unset_xdp("offload", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode mode") - check_extack_nsim(err, "program loaded with different flags.", args) ret, _, err = sim.unset_xdp("", force=True, fail=False, include_stderr=True) fail(ret == 0, "Removed program with a bad mode") From 99dadb6e3ec130800d199ec42fff03242b1be1eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:43 -0700 Subject: [PATCH 49/61] selftests/bpf: add test for multiple programs Add tests for having an XDP program attached in the driver and another one attached in HW simultaneously. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 63 +++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 4f982a0255c2..b746227eaff2 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -339,6 +339,11 @@ class NetdevSim: self.dfs = DebugfsDir(self.dfs_dir) return self.dfs + def dfs_read(self, f): + path = os.path.join(self.dfs_dir, f) + _, data = cmd('cat %s' % (path)) + return data.strip() + def dfs_num_bound_progs(self): path = os.path.join(self.dfs_dir, "bpf_bound_progs") _, progs = cmd('ls %s' % (path)) @@ -814,6 +819,10 @@ try: "Device parameters reported for non-offloaded program") start_test("Test XDP prog replace with bad flags...") + ret, _, err = sim.set_xdp(obj, "generic", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + fail(err.count("File exists") != 1, "Replaced driver XDP with generic") ret, _, err = sim.set_xdp(obj, "", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") @@ -883,6 +892,60 @@ try: rm(pin_file) bpftool_prog_list_wait(expected=0) + start_test("Test multi-attachment XDP - attach...") + sim.set_xdp(obj, "offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + fail("prog" not in xdp, "Base program not reported in single program mode") + fail(len(ipl["xdp"]["attached"]) != 1, + "Wrong attached program count with one program") + + sim.set_xdp(obj, "") + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + offloaded2 = sim.dfs_read("bpf_offloaded_id") + + fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") + fail("prog" in two_xdps, "Base program reported in multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after driver activated") + fail(len(two_xdps["attached"]) != 2, + "Wrong attached program count with two programs") + fail(two_xdps["attached"][0]["prog"]["id"] == + two_xdps["attached"][1]["prog"]["id"], + "offloaded and drv programs have the same id") + fail(offloaded != offloaded2, + "offload ID changed after loading driver program") + + start_test("Test multi-attachment XDP - replace...") + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(err.count("busy") != 1, "Replaced one of programs without -force") + + start_test("Test multi-attachment XDP - detach...") + ret, _, err = sim.unset_xdp("drv", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode") + check_extack(err, "program loaded with different flags.", args) + + sim.unset_xdp("offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + + fail(xdp["mode"] != 1, "Bad mode reported after multiple programs") + fail("prog" not in xdp, + "Base program not reported after multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after driver activated") + fail(len(ipl["xdp"]["attached"]) != 1, + "Wrong attached program count with remaining programs") + fail(offloaded != "0", "offload ID reported with only driver program left") + + start_test("Test multi-attachment XDP - device remove...") + sim.set_xdp(obj, "offload") + sim.remove() + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + start_test("Test mixing of TC and XDP...") sim.tc_add_ingress() sim.set_xdp(obj, "offload") From 5f4284015e29c2de501d83eb647c8ec8802b58ac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:44 -0700 Subject: [PATCH 50/61] nfp: add support for simultaneous driver and hw XDP Split handling of offloaded and driver programs completely. Since offloaded programs always come with XDP_FLAGS_HW_MODE set in reality there could be no sharing, anyway, programs would only be installed in driver or in hardware. Splitting the handling allows us to install programs in HW and in driver at the same time. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 11 +--- drivers/net/ethernet/netronome/nfp/nfp_net.h | 6 +- .../ethernet/netronome/nfp/nfp_net_common.c | 59 ++++++++----------- 3 files changed, 31 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 4dbf7cba6377..b95b94d008cf 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -66,26 +66,19 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { bool running, xdp_running; - int ret; if (!nfp_net_ebpf_capable(nn)) return -EINVAL; running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; - xdp_running = running && nn->dp.bpf_offload_xdp; + xdp_running = running && nn->xdp_hw.prog; if (!prog && !xdp_running) return 0; if (prog && running && !xdp_running) return -EBUSY; - ret = nfp_net_bpf_offload(nn, prog, running, extack); - /* Stop offload if replace not possible */ - if (ret) - return ret; - - nn->dp.bpf_offload_xdp = !!prog; - return ret; + return nfp_net_bpf_offload(nn, prog, running, extack); } static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 2021dda595b7..8970ec981e11 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -485,7 +485,6 @@ struct nfp_stat_pair { * @dev: Backpointer to struct device * @netdev: Backpointer to net_device structure * @is_vf: Is the driver attached to a VF? - * @bpf_offload_xdp: Offloaded BPF program is XDP * @chained_metadata_format: Firemware will use new metadata format * @rx_dma_dir: Mapping direction for RX buffers * @rx_dma_off: Offset at which DMA packets (for XDP headroom) @@ -510,7 +509,6 @@ struct nfp_net_dp { struct net_device *netdev; u8 is_vf:1; - u8 bpf_offload_xdp:1; u8 chained_metadata_format:1; u8 rx_dma_dir; @@ -553,7 +551,8 @@ struct nfp_net_dp { * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table - * @xdp: Information about the attached XDP program + * @xdp: Information about the driver XDP program + * @xdp_hw: Information about the HW XDP program * @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware @@ -610,6 +609,7 @@ struct nfp_net { u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; struct xdp_attachment_info xdp; + struct xdp_attachment_info xdp_hw; unsigned int max_tx_rings; unsigned int max_rx_rings; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index bb1e72e8dbc2..a712e83c3f0f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1710,8 +1710,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) } } - if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && - dp->bpf_offload_xdp) && !meta.portid) { + if (xdp_prog && !meta.portid) { void *orig_data = rxbuf->frag + pkt_off; unsigned int dma_off; int act; @@ -3393,14 +3392,18 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev, nfp_net_set_vxlan_port(nn, idx, 0); } -static int -nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog, - struct netlink_ext_ack *extack) +static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf) { + struct bpf_prog *prog = bpf->prog; struct nfp_net_dp *dp; + int err; + + if (!xdp_attachment_flags_ok(&nn->xdp, bpf)) + return -EBUSY; if (!prog == !nn->dp.xdp_prog) { WRITE_ONCE(nn->dp.xdp_prog, prog); + xdp_attachment_setup(&nn->xdp, bpf); return 0; } @@ -3414,33 +3417,26 @@ nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog, dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0; /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */ - return nfp_net_ring_reconfig(nn, dp, extack); -} - -static int nfp_net_xdp_setup(struct nfp_net *nn, struct netdev_bpf *bpf) -{ - struct bpf_prog *drv_prog, *offload_prog; - int err; - - if (!xdp_attachment_flags_ok(&nn->xdp, bpf)) - return -EBUSY; - - /* Load both when no flags set to allow easy activation of driver path - * when program is replaced by one which can't be offloaded. - */ - drv_prog = bpf->flags & XDP_FLAGS_HW_MODE ? NULL : bpf->prog; - offload_prog = bpf->flags & XDP_FLAGS_DRV_MODE ? NULL : bpf->prog; - - err = nfp_net_xdp_setup_drv(nn, drv_prog, bpf->extack); + err = nfp_net_ring_reconfig(nn, dp, bpf->extack); if (err) return err; - err = nfp_app_xdp_offload(nn->app, nn, offload_prog, bpf->extack); - if (err && bpf->flags & XDP_FLAGS_HW_MODE) + xdp_attachment_setup(&nn->xdp, bpf); + return 0; +} + +static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf) +{ + int err; + + if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf)) + return -EBUSY; + + err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack); + if (err) return err; - xdp_attachment_setup(&nn->xdp, bpf); - + xdp_attachment_setup(&nn->xdp_hw, bpf); return 0; } @@ -3450,16 +3446,13 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: + return nfp_net_xdp_setup_drv(nn, xdp); case XDP_SETUP_PROG_HW: - return nfp_net_xdp_setup(nn, xdp); + return nfp_net_xdp_setup_hw(nn, xdp); case XDP_QUERY_PROG: - if (nn->dp.bpf_offload_xdp) - return 0; return xdp_attachment_query(&nn->xdp, xdp); case XDP_QUERY_PROG_HW: - if (!nn->dp.bpf_offload_xdp) - return 0; - return xdp_attachment_query(&nn->xdp, xdp); + return xdp_attachment_query(&nn->xdp_hw, xdp); default: return nfp_app_bpf(nn->app, nn, xdp); } From d23b27c02f03438ac1867b4b6a484b92f536bd28 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 13 Jul 2018 16:35:14 +0200 Subject: [PATCH 51/61] samples/bpf: xdp_redirect_cpu handle parsing of double VLAN tagged packets People noticed that the code match on IEEE 802.1ad (ETH_P_8021AD) ethertype, and this implies Q-in-Q or double tagged VLANs. Thus, we better parse the next VLAN header too. It is even marked as a TODO. This is relevant for real world use-cases, as XDP cpumap redirect can be used when the NIC RSS hashing is broken. E.g. the ixgbe driver HW cannot handle double tagged VLAN packets, and places everything into a single RX queue. Using cpumap redirect, users can redistribute traffic across CPUs to solve this, which is faster than the network stacks RPS solution. It is left as an exerise how to distribute the packets across CPUs. It would be convenient to use the RX hash, but that is not _yet_ exposed to XDP programs. For now, users can code their own hash, as I've demonstrated in the Suricata code (where Q-in-Q is handled correctly). Reported-by: Florian Maury Reported-by: Marek Majkowski Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_redirect_cpu_kern.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c index 303e9e7161f3..8cb703671b04 100644 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ b/samples/bpf/xdp_redirect_cpu_kern.c @@ -134,7 +134,16 @@ bool parse_eth(struct ethhdr *eth, void *data_end, return false; eth_type = vlan_hdr->h_vlan_encapsulated_proto; } - /* TODO: Handle double VLAN tagged packet */ + /* Handle double VLAN tagged packet */ + if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } *eth_proto = ntohs(eth_type); *l3_offset = offset; From db42a21a1e8d11a30a06050281c2723ec78b63a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Jul 2018 19:08:59 -0700 Subject: [PATCH 52/61] tools: include reallocarray feature test in FEATURE_TESTS_BASIC perf propagates its feature check results to libbpf. This means features for which perf probes must be a superset of libbpf's required features. perf depends on FEATURE_TESTS_BASIC for its list of features. commit 531b014e7a2f ("tools: bpf: make use of reallocarray") added reallocarray use to libbpf, make perf also perform the reallocarray feature check. Fixes: 531b014e7a2f ("tools: bpf: make use of reallocarray") Reported-by: Guenter Roeck Signed-off-by: Jakub Kicinski Tested-by: Guenter Roeck Signed-off-by: Daniel Borkmann --- tools/build/Makefile.feature | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 5b6dda3b1ca8..f216b2f5c3d7 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -57,6 +57,7 @@ FEATURE_TESTS_BASIC := \ libunwind-aarch64 \ pthread-attr-setaffinity-np \ pthread-barrier \ + reallocarray \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ From 92b57121ca79b286bef4f304e887272f3f2d86bb Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Fri, 13 Jul 2018 21:57:02 -0700 Subject: [PATCH 53/61] bpf: btf: export btf types and name by offset from lib This patch introduces btf__resolve_type() function and exports two existing functions from libbpf. btf__resolve_type follows modifier types like const and typedef until it hits a type which actually takes up memory, and then returns it. This function follows similar pattern to btf__resolve_size but instead of computing size, it just returns the type. These functions will be used in the followig patch which parses information inside array of `struct btf_type *`. btf_name_by_offset is used for printing variable names. Signed-off-by: Okash Khawaja Acked-by: Martin KaFai Lau Acked-by: Song Liu Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 65 +++++++++++++++++++++++++++++++-------------- tools/lib/bpf/btf.h | 3 +++ 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8c54a4b6f187..03161be094b4 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -17,6 +17,11 @@ #define BTF_MAX_NR_TYPES 65535 +#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \ + ((k) == BTF_KIND_VOLATILE) || \ + ((k) == BTF_KIND_CONST) || \ + ((k) == BTF_KIND_RESTRICT)) + static struct btf_type btf_void; struct btf { @@ -33,14 +38,6 @@ struct btf { int fd; }; -static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset) -{ - if (offset < btf->hdr->str_len) - return &btf->strings[offset]; - else - return NULL; -} - static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -190,15 +187,6 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } -static const struct btf_type *btf_type_by_id(const struct btf *btf, - uint32_t type_id) -{ - if (type_id > btf->nr_types) - return NULL; - - return btf->types[type_id]; -} - static bool btf_type_is_void(const struct btf_type *t) { return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD; @@ -234,7 +222,7 @@ int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) int64_t size = -1; int i; - t = btf_type_by_id(btf, type_id); + t = btf__type_by_id(btf, type_id); for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { size = btf_type_size(t); @@ -259,7 +247,7 @@ int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) return -EINVAL; } - t = btf_type_by_id(btf, type_id); + t = btf__type_by_id(btf, type_id); } if (size < 0) @@ -271,6 +259,26 @@ int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) return nelems * size; } +int btf__resolve_type(const struct btf *btf, __u32 type_id) +{ + const struct btf_type *t; + int depth = 0; + + t = btf__type_by_id(btf, type_id); + while (depth < MAX_RESOLVE_DEPTH && + !btf_type_is_void_or_null(t) && + IS_MODIFIER(BTF_INFO_KIND(t->info))) { + type_id = t->type; + t = btf__type_by_id(btf, type_id); + depth++; + } + + if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t)) + return -EINVAL; + + return type_id; +} + int32_t btf__find_by_name(const struct btf *btf, const char *type_name) { uint32_t i; @@ -280,7 +288,7 @@ int32_t btf__find_by_name(const struct btf *btf, const char *type_name) for (i = 1; i <= btf->nr_types; i++) { const struct btf_type *t = btf->types[i]; - const char *name = btf_name_by_offset(btf, t->name_off); + const char *name = btf__name_by_offset(btf, t->name_off); if (name && !strcmp(type_name, name)) return i; @@ -371,3 +379,20 @@ int btf__fd(const struct btf *btf) { return btf->fd; } + +const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +{ + if (offset < btf->hdr->str_len) + return &btf->strings[offset]; + else + return NULL; +} + +const struct btf_type *btf__type_by_id(const struct btf *btf, + __u32 type_id) +{ + if (type_id > btf->nr_types) + return NULL; + + return btf->types[type_id]; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 74bb344035bb..24f361d99a5e 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -17,6 +17,9 @@ void btf__free(struct btf *btf); struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log); int32_t btf__find_by_name(const struct btf *btf, const char *type_name); int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id); +int btf__resolve_type(const struct btf *btf, __u32 type_id); int btf__fd(const struct btf *btf); +const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id); #endif From b12d6ec09730b2646e85c348e599b592348dd0e3 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Fri, 13 Jul 2018 21:57:03 -0700 Subject: [PATCH 54/61] bpf: btf: add btf print functionality This consumes functionality exported in the previous patch. It does the main job of printing with BTF data. This is used in the following patch to provide a more readable output of a map's dump. It relies on json_writer to do json printing. Below is sample output where map keys are ints and values are of type struct A: typedef int int_type; enum E { E0, E1, }; struct B { int x; int y; }; struct A { int m; unsigned long long n; char o; int p[8]; int q[4][8]; enum E r; void *s; struct B t; const int u; int_type v; unsigned int w1: 3; unsigned int w2: 3; }; $ sudo bpftool map dump id 14 [{ "key": 0, "value": { "m": 1, "n": 2, "o": "c", "p": [15,16,17,18,15,16,17,18 ], "q": [[25,26,27,28,25,26,27,28 ],[35,36,37,38,35,36,37,38 ],[45,46,47,48,45,46,47,48 ],[55,56,57,58,55,56,57,58 ] ], "r": 1, "s": 0x7ffd80531cf8, "t": { "x": 5, "y": 10 }, "u": 100, "v": 20, "w1": 0x7, "w2": 0x3 } } ] This patch uses json's {} and [] to imply struct/union and array. More explicit information can be added later. For example, a command line option can be introduced to print whether a key or value is struct or union, name of a struct etc. This will however come at the expense of duplicating info when, for example, printing an array of structs. enums are printed as ints without their names. Signed-off-by: Okash Khawaja Acked-by: Martin KaFai Lau Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/btf_dumper.c | 251 +++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 15 ++ 2 files changed, 266 insertions(+) create mode 100644 tools/bpf/bpftool/btf_dumper.c diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c new file mode 100644 index 000000000000..55bc512a1831 --- /dev/null +++ b/tools/bpf/bpftool/btf_dumper.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ + +#include +#include /* for (FILE *) used by json_writer */ +#include +#include +#include +#include +#include + +#include "btf.h" +#include "json_writer.h" +#include "main.h" + +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) +#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) +#define BITS_ROUNDUP_BYTES(bits) \ + (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) + +static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, + __u8 bit_offset, const void *data); + +static void btf_dumper_ptr(const void *data, json_writer_t *jw, + bool is_plain_text) +{ + if (is_plain_text) + jsonw_printf(jw, "%p", *(unsigned long *)data); + else + jsonw_printf(jw, "%u", *(unsigned long *)data); +} + +static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, + const void *data) +{ + int actual_type_id; + + actual_type_id = btf__resolve_type(d->btf, type_id); + if (actual_type_id < 0) + return actual_type_id; + + return btf_dumper_do_type(d, actual_type_id, 0, data); +} + +static void btf_dumper_enum(const void *data, json_writer_t *jw) +{ + jsonw_printf(jw, "%d", *(int *)data); +} + +static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, + const void *data) +{ + const struct btf_type *t = btf__type_by_id(d->btf, type_id); + struct btf_array *arr = (struct btf_array *)(t + 1); + long long elem_size; + int ret = 0; + __u32 i; + + elem_size = btf__resolve_size(d->btf, arr->type); + if (elem_size < 0) + return elem_size; + + jsonw_start_array(d->jw); + for (i = 0; i < arr->nelems; i++) { + ret = btf_dumper_do_type(d, arr->type, 0, + data + i * elem_size); + if (ret) + break; + } + + jsonw_end_array(d->jw); + return ret; +} + +static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, + const void *data, json_writer_t *jw, + bool is_plain_text) +{ + int left_shift_bits, right_shift_bits; + int nr_bits = BTF_INT_BITS(int_type); + int total_bits_offset; + int bytes_to_copy; + int bits_to_copy; + __u64 print_num; + + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); + data += BITS_ROUNDDOWN_BYTES(total_bits_offset); + bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + bits_to_copy = bit_offset + nr_bits; + bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy); + + print_num = 0; + memcpy(&print_num, data, bytes_to_copy); +#if defined(__BIG_ENDIAN_BITFIELD) + left_shift_bits = bit_offset; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + left_shift_bits = 64 - bits_to_copy; +#else +#error neither big nor little endian +#endif + right_shift_bits = 64 - nr_bits; + + print_num <<= left_shift_bits; + print_num >>= right_shift_bits; + if (is_plain_text) + jsonw_printf(jw, "0x%llx", print_num); + else + jsonw_printf(jw, "%llu", print_num); +} + +static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, + const void *data, json_writer_t *jw, + bool is_plain_text) +{ + __u32 *int_type; + __u32 nr_bits; + + int_type = (__u32 *)(t + 1); + nr_bits = BTF_INT_BITS(*int_type); + /* if this is bit field */ + if (bit_offset || BTF_INT_OFFSET(*int_type) || + BITS_PER_BYTE_MASKED(nr_bits)) { + btf_dumper_int_bits(*int_type, bit_offset, data, jw, + is_plain_text); + return 0; + } + + switch (BTF_INT_ENCODING(*int_type)) { + case 0: + if (BTF_INT_BITS(*int_type) == 64) + jsonw_printf(jw, "%lu", *(__u64 *)data); + else if (BTF_INT_BITS(*int_type) == 32) + jsonw_printf(jw, "%u", *(__u32 *)data); + else if (BTF_INT_BITS(*int_type) == 16) + jsonw_printf(jw, "%hu", *(__u16 *)data); + else if (BTF_INT_BITS(*int_type) == 8) + jsonw_printf(jw, "%hhu", *(__u8 *)data); + else + btf_dumper_int_bits(*int_type, bit_offset, data, jw, + is_plain_text); + break; + case BTF_INT_SIGNED: + if (BTF_INT_BITS(*int_type) == 64) + jsonw_printf(jw, "%ld", *(long long *)data); + else if (BTF_INT_BITS(*int_type) == 32) + jsonw_printf(jw, "%d", *(int *)data); + else if (BTF_INT_BITS(*int_type) == 16) + jsonw_printf(jw, "%hd", *(short *)data); + else if (BTF_INT_BITS(*int_type) == 8) + jsonw_printf(jw, "%hhd", *(char *)data); + else + btf_dumper_int_bits(*int_type, bit_offset, data, jw, + is_plain_text); + break; + case BTF_INT_CHAR: + if (isprint(*(char *)data)) + jsonw_printf(jw, "\"%c\"", *(char *)data); + else + if (is_plain_text) + jsonw_printf(jw, "0x%hhx", *(char *)data); + else + jsonw_printf(jw, "\"\\u00%02hhx\"", + *(char *)data); + break; + case BTF_INT_BOOL: + jsonw_bool(jw, *(int *)data); + break; + default: + /* shouldn't happen */ + return -EINVAL; + } + + return 0; +} + +static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, + const void *data) +{ + const struct btf_type *t; + struct btf_member *m; + const void *data_off; + int ret = 0; + int i, vlen; + + t = btf__type_by_id(d->btf, type_id); + if (!t) + return -EINVAL; + + vlen = BTF_INFO_VLEN(t->info); + jsonw_start_object(d->jw); + m = (struct btf_member *)(t + 1); + + for (i = 0; i < vlen; i++) { + data_off = data + BITS_ROUNDDOWN_BYTES(m[i].offset); + jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off)); + ret = btf_dumper_do_type(d, m[i].type, + BITS_PER_BYTE_MASKED(m[i].offset), + data_off); + if (ret) + break; + } + + jsonw_end_object(d->jw); + + return ret; +} + +static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, + __u8 bit_offset, const void *data) +{ + const struct btf_type *t = btf__type_by_id(d->btf, type_id); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + return btf_dumper_int(t, bit_offset, data, d->jw, + d->is_plain_text); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return btf_dumper_struct(d, type_id, data); + case BTF_KIND_ARRAY: + return btf_dumper_array(d, type_id, data); + case BTF_KIND_ENUM: + btf_dumper_enum(data, d->jw); + return 0; + case BTF_KIND_PTR: + btf_dumper_ptr(data, d->jw, d->is_plain_text); + return 0; + case BTF_KIND_UNKN: + jsonw_printf(d->jw, "(unknown)"); + return 0; + case BTF_KIND_FWD: + /* map key or value can't be forward */ + jsonw_printf(d->jw, "(fwd-kind-invalid)"); + return -EINVAL; + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf_dumper_modifier(d, type_id, data); + default: + jsonw_printf(d->jw, "(unsupported-kind"); + return -EINVAL; + } +} + +int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, + const void *data) +{ + return btf_dumper_do_type(d, type_id, 0, data); +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 41004bb2644a..238e734d75b3 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -150,4 +150,19 @@ unsigned int get_page_size(void); unsigned int get_possible_cpus(void); const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); +struct btf_dumper { + const struct btf *btf; + json_writer_t *jw; + bool is_plain_text; +}; + +/* btf_dumper_type - print data along with type information + * @d: an instance containing context for dumping types + * @type_id: index in btf->types array. this points to the type to be dumped + * @data: pointer the actual data, i.e. the values to be printed + * + * Returns zero on success and negative error code otherwise + */ +int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, + const void *data); #endif From 2d3feca8c44f315624b7c0f5b8361e2f54405c12 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Fri, 13 Jul 2018 21:57:04 -0700 Subject: [PATCH 55/61] bpf: btf: print map dump and lookup with btf info This patch augments the output of bpftool's map dump and map lookup commands to print data along side btf info, if the correspondin btf info is available. The outputs for each of map dump and map lookup commands are augmented in two ways: 1. when neither of -j and -p are supplied, btf-ful map data is printed whose aim is human readability. This means no commitments for json- or backward- compatibility. 2. when either -j or -p are supplied, a new json object named "formatted" is added for each key-value pair. This object contains the same data as the key-value pair, but with btf info. "formatted" object promises json- and backward- compatibility. Below is a sample output. $ bpftool map dump -p id 8 [{ "key": ["0x0f","0x00","0x00","0x00" ], "value": ["0x03", "0x00", "0x00", "0x00", ... ], "formatted": { "key": 15, "value": { "int_field": 3, ... } } } ] This patch calls btf_dumper introduced in previous patch to accomplish the above. Indeed, btf-ful info is only displayed if btf data for the given map is available. Otherwise existing output is displayed as-is. Signed-off-by: Okash Khawaja Acked-by: Martin KaFai Lau Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 219 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 202 insertions(+), 17 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e2baec1122fb..9c8191845585 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,8 @@ #include +#include "btf.h" +#include "json_writer.h" #include "main.h" static const char * const map_type_name[] = { @@ -148,8 +151,109 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } +static int do_dump_btf(const struct btf_dumper *d, + struct bpf_map_info *map_info, void *key, + void *value) +{ + int ret; + + /* start of key-value pair */ + jsonw_start_object(d->jw); + + jsonw_name(d->jw, "key"); + + ret = btf_dumper_type(d, map_info->btf_key_type_id, key); + if (ret) + goto err_end_obj; + + jsonw_name(d->jw, "value"); + + ret = btf_dumper_type(d, map_info->btf_value_type_id, value); + +err_end_obj: + /* end of key-value pair */ + jsonw_end_object(d->jw); + + return ret; +} + +static int get_btf(struct bpf_map_info *map_info, struct btf **btf) +{ + struct bpf_btf_info btf_info = { 0 }; + __u32 len = sizeof(btf_info); + __u32 last_size; + int btf_fd; + void *ptr; + int err; + + err = 0; + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id); + if (btf_fd < 0) + return 0; + + /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + * let's start with a sane default - 4KiB here - and resize it only if + * bpf_obj_get_info_by_fd() needs a bigger buffer. + */ + btf_info.btf_size = 4096; + last_size = btf_info.btf_size; + ptr = malloc(last_size); + if (!ptr) { + err = -ENOMEM; + goto exit_free; + } + + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + + if (!err && btf_info.btf_size > last_size) { + void *temp_ptr; + + last_size = btf_info.btf_size; + temp_ptr = realloc(ptr, last_size); + if (!temp_ptr) { + err = -ENOMEM; + goto exit_free; + } + ptr = temp_ptr; + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + } + + if (err || btf_info.btf_size > last_size) { + err = errno; + goto exit_free; + } + + *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); + if (IS_ERR(*btf)) { + err = PTR_ERR(btf); + *btf = NULL; + } + +exit_free: + close(btf_fd); + free(ptr); + + return err; +} + +static json_writer_t *get_btf_writer(void) +{ + json_writer_t *jw = jsonw_new(stdout); + + if (!jw) + return NULL; + jsonw_pretty(jw, true); + + return jw; +} + static void print_entry_json(struct bpf_map_info *info, unsigned char *key, - unsigned char *value) + unsigned char *value, struct btf *btf) { jsonw_start_object(json_wtr); @@ -158,6 +262,16 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, print_hex_data_json(key, info->key_size); jsonw_name(json_wtr, "value"); print_hex_data_json(value, info->value_size); + if (btf) { + struct btf_dumper d = { + .btf = btf, + .jw = json_wtr, + .is_plain_text = false, + }; + + jsonw_name(json_wtr, "formatted"); + do_dump_btf(&d, info, key, value); + } } else { unsigned int i, n; @@ -508,10 +622,12 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { + struct bpf_map_info info = {}; void *key, *value, *prev_key; unsigned int num_elems = 0; - struct bpf_map_info info = {}; __u32 len = sizeof(info); + json_writer_t *btf_wtr; + struct btf *btf = NULL; int err; int fd; @@ -537,8 +653,27 @@ static int do_dump(int argc, char **argv) } prev_key = NULL; + + err = get_btf(&info, &btf); + if (err) { + p_err("failed to get btf"); + goto exit_free; + } + if (json_output) jsonw_start_array(json_wtr); + else + if (btf) { + btf_wtr = get_btf_writer(); + if (!btf_wtr) { + p_info("failed to create json writer for btf. falling back to plain output"); + btf__free(btf); + btf = NULL; + } else { + jsonw_start_array(btf_wtr); + } + } + while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@ -549,9 +684,19 @@ static int do_dump(int argc, char **argv) if (!bpf_map_lookup_elem(fd, key, value)) { if (json_output) - print_entry_json(&info, key, value); + print_entry_json(&info, key, value, btf); else - print_entry_plain(&info, key, value); + if (btf) { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + do_dump_btf(&d, &info, key, value); + } else { + print_entry_plain(&info, key, value); + } } else { if (json_output) { jsonw_name(json_wtr, "key"); @@ -574,14 +719,19 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - else + else if (btf) { + jsonw_end_array(btf_wtr); + jsonw_destroy(&btf_wtr); + } else { printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : ""); + } exit_free: free(key); free(value); close(fd); + btf__free(btf); return err; } @@ -637,6 +787,8 @@ static int do_lookup(int argc, char **argv) { struct bpf_map_info info = {}; __u32 len = sizeof(info); + json_writer_t *btf_wtr; + struct btf *btf = NULL; void *key, *value; int err; int fd; @@ -661,27 +813,60 @@ static int do_lookup(int argc, char **argv) goto exit_free; err = bpf_map_lookup_elem(fd, key, value); - if (!err) { - if (json_output) - print_entry_json(&info, key, value); - else - print_entry_plain(&info, key, value); - } else if (errno == ENOENT) { - if (json_output) { - jsonw_null(json_wtr); + if (err) { + if (errno == ENOENT) { + if (json_output) { + jsonw_null(json_wtr); + } else { + printf("key:\n"); + fprint_hex(stdout, key, info.key_size, " "); + printf("\n\nNot found\n"); + } } else { - printf("key:\n"); - fprint_hex(stdout, key, info.key_size, " "); - printf("\n\nNot found\n"); + p_err("lookup failed: %s", strerror(errno)); + } + + goto exit_free; + } + + /* here means bpf_map_lookup_elem() succeeded */ + err = get_btf(&info, &btf); + if (err) { + p_err("failed to get btf"); + goto exit_free; + } + + if (json_output) { + print_entry_json(&info, key, value, btf); + } else if (btf) { + /* if here json_wtr wouldn't have been initialised, + * so let's create separate writer for btf + */ + btf_wtr = get_btf_writer(); + if (!btf_wtr) { + p_info("failed to create json writer for btf. falling back to plain output"); + btf__free(btf); + btf = NULL; + print_entry_plain(&info, key, value); + } else { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + do_dump_btf(&d, &info, key, value); + jsonw_destroy(&btf_wtr); } } else { - p_err("lookup failed: %s", strerror(errno)); + print_entry_plain(&info, key, value); } exit_free: free(key); free(value); close(fd); + btf__free(btf); return err; } From f333ee0cdb27ba201e6cc0c99c76b1364aa29b86 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:32 -0700 Subject: [PATCH 56/61] bpf: Add BPF_SOCK_OPS_TCP_LISTEN_CB Add new TCP-BPF callback that is called on listen(2) right after socket transition to TCP_LISTEN state. It fills the gap for listening sockets in TCP-BPF. For example BPF program can set BPF_SOCK_OPS_STATE_CB_FLAG when socket becomes listening and track later transition from TCP_LISTEN to TCP_CLOSE with BPF_SOCK_OPS_STATE_CB callback. Before there was no way to do it with TCP-BPF and other options were much harder to work with. E.g. socket state tracking can be done with tracepoints (either raw or regular) but they can't be attached to cgroup and their lifetime has to be managed separately. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ net/ipv4/af_inet.c | 1 + 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6bcb287a888d..870113916cac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2555,6 +2555,9 @@ enum { * Arg1: old_state * Arg2: new_state */ + BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after + * socket transition to LISTEN state. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c716be13d58c..f2a0a3bab6b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -229,6 +229,7 @@ int inet_listen(struct socket *sock, int backlog) err = inet_csk_listen_start(sk, backlog); if (err) goto out; + tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL); } sk->sk_max_ack_backlog = backlog; err = 0; From 060a7fccd394f1600be86536b1d3e8cdd66637c2 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:33 -0700 Subject: [PATCH 57/61] bpf: Sync bpf.h to tools/ Sync BPF_SOCK_OPS_TCP_LISTEN_CB related UAPI changes to tools/. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6bcb287a888d..870113916cac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2555,6 +2555,9 @@ enum { * Arg1: old_state * Arg2: new_state */ + BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after + * socket transition to LISTEN state. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect From 04c13411151c220b41998020c32ac97f33b58683 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:34 -0700 Subject: [PATCH 58/61] selftests/bpf: Fix const'ness in cgroup_helpers Lack of const in cgroup helpers signatures forces to write ugly client code. Fix it. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/cgroup_helpers.c | 6 +++--- tools/testing/selftests/bpf/cgroup_helpers.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index c87b4e052ce9..cf16948aad4a 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -118,7 +118,7 @@ static int join_cgroup_from_top(char *cgroup_path) * * On success, it returns 0, otherwise on failure it returns 1. */ -int join_cgroup(char *path) +int join_cgroup(const char *path) { char cgroup_path[PATH_MAX + 1]; @@ -158,7 +158,7 @@ void cleanup_cgroup_environment(void) * On success, it returns the file descriptor. On failure it returns 0. * If there is a failure, it prints the error to stderr. */ -int create_and_get_cgroup(char *path) +int create_and_get_cgroup(const char *path) { char cgroup_path[PATH_MAX + 1]; int fd; @@ -186,7 +186,7 @@ int create_and_get_cgroup(char *path) * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(char *path) +unsigned long long get_cgroup_id(const char *path) { int dirfd, err, flags, mount_id, fhsize; union { diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 20a4a5dcd469..d64bb8957090 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -9,10 +9,10 @@ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) -int create_and_get_cgroup(char *path); -int join_cgroup(char *path); +int create_and_get_cgroup(const char *path); +int join_cgroup(const char *path); int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); -unsigned long long get_cgroup_id(char *path); +unsigned long long get_cgroup_id(const char *path); #endif From c65267e5ff418c81d7fea7025850ed4f190a1289 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:35 -0700 Subject: [PATCH 59/61] selftests/bpf: Switch test_tcpbpf_user to cgroup_helpers Switch to cgroup_helpers to simplify the code and fix cgroup cleanup: before cgroup was not cleaned up after the test. It also removes SYSTEM macro, that only printed error, but didn't terminate the test. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 1 + .../testing/selftests/bpf/test_tcpbpf_user.c | 55 +++++++------------ 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7a6214e9ae58..478bf1bcbbf5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -61,6 +61,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c +$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 84ab5163c828..fa97ec6428de 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -1,25 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include -#include #include -#include -#include -#include #include -#include -#include #include -#include -#include #include #include -#include "bpf_util.h" + #include "bpf_rlimit.h" -#include +#include "bpf_util.h" +#include "cgroup_helpers.h" + #include "test_tcpbpf.h" static int bpf_find_map(const char *test, struct bpf_object *obj, @@ -35,42 +28,32 @@ static int bpf_find_map(const char *test, struct bpf_object *obj, return bpf_map__fd(map); } -#define SYSTEM(CMD) \ - do { \ - if (system(CMD)) { \ - printf("system(%s) FAILS!\n", CMD); \ - } \ - } while (0) - int main(int argc, char **argv) { const char *file = "test_tcpbpf_kern.o"; struct tcpbpf_globals g = {0}; - int cg_fd, prog_fd, map_fd; + const char *cg_path = "/foo"; bool debug_flag = false; int error = EXIT_FAILURE; struct bpf_object *obj; - char cmd[100], *dir; - struct stat buffer; + int prog_fd, map_fd; + int cg_fd = -1; __u32 key = 0; - int pid; int rv; if (argc > 1 && strcmp(argv[1], "-d") == 0) debug_flag = true; - dir = "/tmp/cgroupv2/foo"; + if (setup_cgroup_environment()) + goto err; - if (stat(dir, &buffer) != 0) { - SYSTEM("mkdir -p /tmp/cgroupv2"); - SYSTEM("mount -t cgroup2 none /tmp/cgroupv2"); - SYSTEM("mkdir -p /tmp/cgroupv2/foo"); - } - pid = (int) getpid(); - sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid); - SYSTEM(cmd); + cg_fd = create_and_get_cgroup(cg_path); + if (!cg_fd) + goto err; + + if (join_cgroup(cg_path)) + goto err; - cg_fd = open(dir, O_DIRECTORY, O_RDONLY); if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; @@ -83,7 +66,10 @@ int main(int argc, char **argv) goto err; } - SYSTEM("./tcp_server.py"); + if (system("./tcp_server.py")) { + printf("FAILED: TCP server\n"); + goto err; + } map_fd = bpf_find_map(__func__, obj, "global_map"); if (map_fd < 0) @@ -123,6 +109,7 @@ int main(int argc, char **argv) error = 0; err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + close(cg_fd); + cleanup_cgroup_environment(); return error; - } From 2044e4ef0be29f0154b078a956a3d8331315298a Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:36 -0700 Subject: [PATCH 60/61] selftests/bpf: Better verification in test_tcpbpf Reduce amount of copy/paste for debug info when result is verified in the test and keep that info together with values being checked so that they won't get out of sync. It also improves debug experience: instead of checking manually what doesn't match in debug output for all fields, only unexpected field is printed. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- .../testing/selftests/bpf/test_tcpbpf_user.c | 64 +++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index fa97ec6428de..971f1644b9c7 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -15,6 +16,42 @@ #include "test_tcpbpf.h" +#define EXPECT_EQ(expected, actual, fmt) \ + do { \ + if ((expected) != (actual)) { \ + printf(" Value of: " #actual "\n" \ + " Actual: %" fmt "\n" \ + " Expected: %" fmt "\n", \ + (actual), (expected)); \ + goto err; \ + } \ + } while (0) + +int verify_result(const struct tcpbpf_globals *result) +{ + __u32 expected_events; + + expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | + (1 << BPF_SOCK_OPS_RWND_INIT) | + (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | + (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_NEEDS_ECN) | + (1 << BPF_SOCK_OPS_STATE_CB)); + + EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); + EXPECT_EQ(501ULL, result->bytes_received, "llu"); + EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); + EXPECT_EQ(1, result->data_segs_in, PRIu32); + EXPECT_EQ(1, result->data_segs_out, PRIu32); + EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); + EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); + + return 0; +err: + return -1; +} + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -33,7 +70,6 @@ int main(int argc, char **argv) const char *file = "test_tcpbpf_kern.o"; struct tcpbpf_globals g = {0}; const char *cg_path = "/foo"; - bool debug_flag = false; int error = EXIT_FAILURE; struct bpf_object *obj; int prog_fd, map_fd; @@ -41,9 +77,6 @@ int main(int argc, char **argv) __u32 key = 0; int rv; - if (argc > 1 && strcmp(argv[1], "-d") == 0) - debug_flag = true; - if (setup_cgroup_environment()) goto err; @@ -81,30 +114,11 @@ int main(int argc, char **argv) goto err; } - if (g.bytes_received != 501 || g.bytes_acked != 1002 || - g.data_segs_in != 1 || g.data_segs_out != 1 || - (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 || - g.good_cb_test_rv != 0) { + if (verify_result(&g)) { printf("FAILED: Wrong stats\n"); - if (debug_flag) { - printf("\n"); - printf("bytes_received: %d (expecting 501)\n", - (int)g.bytes_received); - printf("bytes_acked: %d (expecting 1002)\n", - (int)g.bytes_acked); - printf("data_segs_in: %d (expecting 1)\n", - g.data_segs_in); - printf("data_segs_out: %d (expecting 1)\n", - g.data_segs_out); - printf("event_map: 0x%x (at least 0x47e)\n", - g.event_map); - printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n", - g.bad_cb_test_rv); - printf("good_cb_test_rv:0x%x (expecting 0)\n", - g.good_cb_test_rv); - } goto err; } + printf("PASSED!\n"); error = 0; err: From 78d8e26d46bc2ed73ab6a0e369a342478fda4ce0 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:37 -0700 Subject: [PATCH 61/61] selftests/bpf: Test case for BPF_SOCK_OPS_TCP_LISTEN_CB Cover new TCP-BPF callback in test_tcpbpf: when listen() is called on socket, set BPF_SOCK_OPS_STATE_CB_FLAG so that BPF_SOCK_OPS_STATE_CB callback can be called on future state transition, and when such a transition happens (TCP_LISTEN -> TCP_CLOSE), track it in the map and verify it in user space later. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tcpbpf.h | 1 + tools/testing/selftests/bpf/test_tcpbpf_kern.c | 17 ++++++++++++----- tools/testing/selftests/bpf/test_tcpbpf_user.c | 4 +++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h index 2fe43289943c..7bcfa6207005 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf.h +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -12,5 +12,6 @@ struct tcpbpf_globals { __u32 good_cb_test_rv; __u64 bytes_received; __u64 bytes_acked; + __u32 num_listen; }; #endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 3e645ee41ed5..4b7fd540cea9 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -96,15 +96,22 @@ int bpf_testcb(struct bpf_sock_ops *skops) if (!gp) break; g = *gp; - g.total_retrans = skops->total_retrans; - g.data_segs_in = skops->data_segs_in; - g.data_segs_out = skops->data_segs_out; - g.bytes_received = skops->bytes_received; - g.bytes_acked = skops->bytes_acked; + if (skops->args[0] == BPF_TCP_LISTEN) { + g.num_listen++; + } else { + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + } bpf_map_update_elem(&global_map, &key, &g, BPF_ANY); } break; + case BPF_SOCK_OPS_TCP_LISTEN_CB: + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; default: rv = -1; } diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 971f1644b9c7..a275c2971376 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -37,7 +37,8 @@ int verify_result(const struct tcpbpf_globals *result) (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | (1 << BPF_SOCK_OPS_NEEDS_ECN) | - (1 << BPF_SOCK_OPS_STATE_CB)); + (1 << BPF_SOCK_OPS_STATE_CB) | + (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); EXPECT_EQ(501ULL, result->bytes_received, "llu"); @@ -46,6 +47,7 @@ int verify_result(const struct tcpbpf_globals *result) EXPECT_EQ(1, result->data_segs_out, PRIu32); EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); + EXPECT_EQ(1, result->num_listen, PRIu32); return 0; err: