From 7445cf31d2e25e3f8ad7b1c5342e624c09ab23a2 Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:54 +0000 Subject: [PATCH 1/4] bpf: Add function for XDP meta data length check This commit prepares to use the XDP meta data length check in multiple places by making it into a static inline function instead of a literal. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-2-zeffron@riotgames.com --- include/net/xdp.h | 5 +++++ net/core/filter.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index 5533f0ab2afc..ad5b02dcb6f4 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) return unlikely(xdp->data_meta > xdp->data); } +static inline bool xdp_metalen_invalid(unsigned long metalen) +{ + return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); +} + struct xdp_attachment_info { struct bpf_prog *prog; u32 flags; diff --git a/net/core/filter.c b/net/core/filter.c index d70187ce851b..f2c15b2a057a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -77,6 +77,7 @@ #include #include #include +#include static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -3880,8 +3881,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) if (unlikely(meta < xdp_frame_end || meta > xdp->data)) return -EINVAL; - if (unlikely((metalen & (sizeof(__u32) - 1)) || - (metalen > 32))) + if (unlikely(xdp_metalen_invalid(metalen))) return -EACCES; xdp->data_meta = meta; From 47316f4a305367794fc04f23e5c778678d8f1d8e Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:55 +0000 Subject: [PATCH 2/4] bpf: Support input xdp_md context in BPF_PROG_TEST_RUN Support passing a xdp_md via ctx_in/ctx_out in bpf_attr for BPF_PROG_TEST_RUN. The intended use case is to pass some XDP meta data to the test runs of XDP programs that are used as tail calls. For programs that use bpf_prog_test_run_xdp, support xdp_md input and output. Unlike with an actual xdp_md during a non-test run, data_meta must be 0 because it must point to the start of the provided user data. From the initial xdp_md, use data and data_end to adjust the pointers in the generated xdp_buff. All other non-zero fields are prohibited (with EINVAL). If the user has set ctx_out/ctx_size_out, copy the (potentially different) xdp_md back to the userspace. We require all fields of input xdp_md except the ones we explicitly support to be set to zero. The expectation is that in the future we might add support for more fields and we want to fail explicitly if the user runs the program on the kernel where we don't yet support them. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-3-zeffron@riotgames.com --- include/uapi/linux/bpf.h | 3 -- net/bpf/test_run.c | 67 +++++++++++++++++++++++++++++++++++----- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf9252c7381e..b46a383e8db7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index aa47af349ba8..229c5deb813c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -15,6 +15,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -687,6 +688,22 @@ out: return ret; } +static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) +{ + if (!xdp_md) + return 0; + + if (xdp_md->egress_ifindex != 0) + return -EINVAL; + + if (xdp_md->ingress_ifindex != 0 || xdp_md->rx_queue_index != 0) + return -EINVAL; + + xdp->data = xdp->data_meta + xdp_md->data; + + return 0; +} + int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { @@ -697,35 +714,69 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, struct netdev_rx_queue *rxqueue; struct xdp_buff xdp = {}; u32 retval, duration; + struct xdp_md *ctx; u32 max_data_sz; void *data; - int ret; + int ret = -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (ctx) { + /* There can't be user provided data before the meta data */ + if (ctx->data_meta || ctx->data_end != size || + ctx->data > ctx->data_end || + unlikely(xdp_metalen_invalid(ctx->data))) + goto free_ctx; + /* Meta data is allocated from the headroom */ + headroom -= ctx->data; + } /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); - if (IS_ERR(data)) - return PTR_ERR(data); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto free_ctx; + } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, size, true); + ret = xdp_convert_md_to_buff(ctx, &xdp); + if (ret) + goto free_data; + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); if (ret) goto out; - if (xdp.data != data + headroom || xdp.data_end != xdp.data + size) - size = xdp.data_end - xdp.data; - ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); + + if (xdp.data_meta != data + headroom || + xdp.data_end != xdp.data_meta + size) + size = xdp.data_end - xdp.data_meta; + + if (ctx) { + ctx->data = xdp.data - xdp.data_meta; + ctx->data_end = xdp.data_end - xdp.data_meta; + } + + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, + duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct xdp_md)); + out: bpf_prog_change_xdp(prog, NULL); +free_data: kfree(data); +free_ctx: + kfree(ctx); return ret; } From ec94670fcb3bdeaf3baaa8d86f54e90a5557f53b Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:56 +0000 Subject: [PATCH 3/4] bpf: Support specifying ingress via xdp_md context in BPF_PROG_TEST_RUN Support specifying the ingress_ifindex and rx_queue_index of xdp_md contexts for BPF_PROG_TEST_RUN. The intended use case is to allow testing XDP programs that make decisions based on the ingress interface or RX queue. If ingress_ifindex is specified, look up the device by the provided index in the current namespace and use its xdp_rxq for the xdp_buff. If the rx_queue_index is out of range, or is non-zero when the ingress_ifindex is 0, return -EINVAL. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-4-zeffron@riotgames.com --- net/bpf/test_run.c | 56 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 229c5deb813c..cda8375bbbaf 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -690,18 +690,60 @@ out: static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) { + unsigned int ingress_ifindex, rx_queue_index; + struct netdev_rx_queue *rxqueue; + struct net_device *device; + if (!xdp_md) return 0; if (xdp_md->egress_ifindex != 0) return -EINVAL; - if (xdp_md->ingress_ifindex != 0 || xdp_md->rx_queue_index != 0) + ingress_ifindex = xdp_md->ingress_ifindex; + rx_queue_index = xdp_md->rx_queue_index; + + if (!ingress_ifindex && rx_queue_index) return -EINVAL; - xdp->data = xdp->data_meta + xdp_md->data; + if (ingress_ifindex) { + device = dev_get_by_index(current->nsproxy->net_ns, + ingress_ifindex); + if (!device) + return -ENODEV; + if (rx_queue_index >= device->real_num_rx_queues) + goto free_dev; + + rxqueue = __netif_get_rx_queue(device, rx_queue_index); + + if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq)) + goto free_dev; + + xdp->rxq = &rxqueue->xdp_rxq; + /* The device is now tracked in the xdp->rxq for later + * dev_put() + */ + } + + xdp->data = xdp->data_meta + xdp_md->data; return 0; + +free_dev: + dev_put(device); + return -EINVAL; +} + +static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) +{ + if (!xdp_md) + return; + + xdp_md->data = xdp->data - xdp->data_meta; + xdp_md->data_end = xdp->data_end - xdp->data_meta; + + if (xdp_md->ingress_ifindex) + dev_put(xdp->rxq->dev); } int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, @@ -753,6 +795,11 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); + /* We convert the xdp_buff back to an xdp_md before checking the return + * code so the reference count of any held netdevice will be decremented + * even if the test run failed. + */ + xdp_convert_buff_to_md(&xdp, ctx); if (ret) goto out; @@ -760,11 +807,6 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.data_end != xdp.data_meta + size) size = xdp.data_end - xdp.data_meta; - if (ctx) { - ctx->data = xdp.data - xdp.data_meta; - ctx->data_end = xdp.data_end - xdp.data_meta; - } - ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, duration); if (!ret) From 939b9c6890da97ea19822e3bd295816175b86fbd Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:57 +0000 Subject: [PATCH 4/4] selftests/bpf: Add test for xdp_md context in BPF_PROG_TEST_RUN Add a test for using xdp_md as a context to BPF_PROG_TEST_RUN for XDP programs. The test uses a BPF program that takes in a return value from XDP meta data, then reduces the size of the XDP meta data by 4 bytes. Test cases validate the possible failure cases for passing in invalid xdp_md contexts, that the return value is successfully passed in, and that the adjusted meta data is successfully copied out. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-5-zeffron@riotgames.com --- .../bpf/prog_tests/xdp_context_test_run.c | 105 ++++++++++++++++++ .../bpf/progs/test_xdp_context_test_run.c | 20 ++++ 2 files changed, 125 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c new file mode 100644 index 000000000000..ab4952b9fb1d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "test_xdp_context_test_run.skel.h" + +void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, + __u32 data_meta, __u32 data, __u32 data_end, + __u32 ingress_ifindex, __u32 rx_queue_index, + __u32 egress_ifindex) +{ + struct xdp_md ctx = { + .data = data, + .data_end = data_end, + .data_meta = data_meta, + .ingress_ifindex = ingress_ifindex, + .rx_queue_index = rx_queue_index, + .egress_ifindex = egress_ifindex, + }; + int err; + + opts.ctx_in = &ctx; + opts.ctx_size_in = sizeof(ctx); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(errno, EINVAL, "errno-EINVAL"); + ASSERT_ERR(err, "bpf_prog_test_run"); +} + +void test_xdp_context_test_run(void) +{ + struct test_xdp_context_test_run *skel = NULL; + char data[sizeof(pkt_v4) + sizeof(__u32)]; + char bad_ctx[sizeof(struct xdp_md) + 1]; + struct xdp_md ctx_in, ctx_out; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = sizeof(data), + .ctx_out = &ctx_out, + .ctx_size_out = sizeof(ctx_out), + .repeat = 1, + ); + int err, prog_fd; + + skel = test_xdp_context_test_run__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + return; + prog_fd = bpf_program__fd(skel->progs.xdp_context); + + /* Data past the end of the kernel's struct xdp_md must be 0 */ + bad_ctx[sizeof(bad_ctx) - 1] = 1; + opts.ctx_in = bad_ctx; + opts.ctx_size_in = sizeof(bad_ctx); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(errno, E2BIG, "extradata-errno"); + ASSERT_ERR(err, "bpf_prog_test_run(extradata)"); + + *(__u32 *)data = XDP_PASS; + *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4; + opts.ctx_in = &ctx_in; + opts.ctx_size_in = sizeof(ctx_in); + memset(&ctx_in, 0, sizeof(ctx_in)); + ctx_in.data_meta = 0; + ctx_in.data = sizeof(__u32); + ctx_in.data_end = ctx_in.data + sizeof(pkt_v4); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(err, "bpf_prog_test_run(valid)"); + ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval"); + ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize"); + ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize"); + ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta"); + ASSERT_EQ(ctx_out.data, 0, "valid-data"); + ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend"); + + /* Meta data's size must be a multiple of 4 */ + test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0); + + /* data_meta must reference the start of data */ + test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), + 0, 0, 0); + + /* Meta data must be 32 bytes or smaller */ + test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0); + + /* Total size of data must match data_end - data_meta */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), + sizeof(data) - 1, 0, 0, 0); + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), + sizeof(data) + 1, 0, 0, 0); + + /* RX queue cannot be specified without specifying an ingress */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 0, 1, 0); + + /* Interface 1 is always the loopback interface which always has only + * one RX queue (index 0). This makes index 1 an invalid rx queue index + * for interface 1. + */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 1, 1, 0); + + /* The egress cannot be specified */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 0, 0, 1); + + test_xdp_context_test_run__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c new file mode 100644 index 000000000000..d7b88cd05afd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +SEC("xdp") +int xdp_context(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + __u32 *metadata = (void *)(long)xdp->data_meta; + __u32 ret; + + if (metadata + 1 > data) + return XDP_ABORTED; + ret = *metadata; + if (bpf_xdp_adjust_meta(xdp, 4)) + return XDP_ABORTED; + return ret; +} + +char _license[] SEC("license") = "GPL";