forked from Minki/linux
bpf: selftests: test skb->tstamp in redirect_neigh
This patch adds tests on forwarding the delivery_time for the following cases - tcp/udp + ip4/ip6 + bpf_redirect_neigh - tcp/udp + ip4/ip6 + ip[6]_forward - bpf_skb_set_delivery_time - The old rcv timestamp expectation on tc-bpf@ingress Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
8d21ec0e46
commit
c803475fd8
@ -17,6 +17,8 @@
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/time_types.h>
|
||||
#include <linux/net_tstamp.h>
|
||||
#include <sched.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
@ -29,6 +31,11 @@
|
||||
#include "test_tc_neigh_fib.skel.h"
|
||||
#include "test_tc_neigh.skel.h"
|
||||
#include "test_tc_peer.skel.h"
|
||||
#include "test_tc_dtime.skel.h"
|
||||
|
||||
#ifndef TCP_TX_DELAY
|
||||
#define TCP_TX_DELAY 37
|
||||
#endif
|
||||
|
||||
#define NS_SRC "ns_src"
|
||||
#define NS_FWD "ns_fwd"
|
||||
@ -61,6 +68,7 @@
|
||||
#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
|
||||
|
||||
#define TIMEOUT_MILLIS 10000
|
||||
#define NSEC_PER_SEC 1000000000ULL
|
||||
|
||||
#define log_err(MSG, ...) \
|
||||
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
|
||||
@ -440,6 +448,431 @@ static int set_forwarding(bool enable)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rcv_tstamp(int fd, const char *expected, size_t s)
|
||||
{
|
||||
struct __kernel_timespec pkt_ts = {};
|
||||
char ctl[CMSG_SPACE(sizeof(pkt_ts))];
|
||||
struct timespec now_ts;
|
||||
struct msghdr msg = {};
|
||||
__u64 now_ns, pkt_ns;
|
||||
struct cmsghdr *cmsg;
|
||||
struct iovec iov;
|
||||
char data[32];
|
||||
int ret;
|
||||
|
||||
iov.iov_base = data;
|
||||
iov.iov_len = sizeof(data);
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
msg.msg_control = &ctl;
|
||||
msg.msg_controllen = sizeof(ctl);
|
||||
|
||||
ret = recvmsg(fd, &msg, 0);
|
||||
if (!ASSERT_EQ(ret, s, "recvmsg"))
|
||||
return;
|
||||
ASSERT_STRNEQ(data, expected, s, "expected rcv data");
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&msg);
|
||||
if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
|
||||
cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
|
||||
memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
|
||||
|
||||
pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
|
||||
ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
|
||||
|
||||
ret = clock_gettime(CLOCK_REALTIME, &now_ts);
|
||||
ASSERT_OK(ret, "clock_gettime");
|
||||
now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
|
||||
|
||||
if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
|
||||
ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
|
||||
"check rcv tstamp");
|
||||
}
|
||||
|
||||
static void snd_tstamp(int fd, char *b, size_t s)
|
||||
{
|
||||
struct sock_txtime opt = { .clockid = CLOCK_TAI };
|
||||
char ctl[CMSG_SPACE(sizeof(__u64))];
|
||||
struct timespec now_ts;
|
||||
struct msghdr msg = {};
|
||||
struct cmsghdr *cmsg;
|
||||
struct iovec iov;
|
||||
__u64 now_ns;
|
||||
int ret;
|
||||
|
||||
ret = clock_gettime(CLOCK_TAI, &now_ts);
|
||||
ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
|
||||
now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
|
||||
|
||||
iov.iov_base = b;
|
||||
iov.iov_len = s;
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
msg.msg_control = &ctl;
|
||||
msg.msg_controllen = sizeof(ctl);
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&msg);
|
||||
cmsg->cmsg_level = SOL_SOCKET;
|
||||
cmsg->cmsg_type = SCM_TXTIME;
|
||||
cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
|
||||
*(__u64 *)CMSG_DATA(cmsg) = now_ns;
|
||||
|
||||
ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
|
||||
ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
|
||||
|
||||
ret = sendmsg(fd, &msg, 0);
|
||||
ASSERT_EQ(ret, s, "sendmsg");
|
||||
}
|
||||
|
||||
static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
|
||||
{
|
||||
int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
|
||||
char buf[] = "testing testing";
|
||||
struct nstoken *nstoken;
|
||||
|
||||
nstoken = open_netns(NS_DST);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
|
||||
return;
|
||||
listen_fd = start_server(family, type, addr, port, 0);
|
||||
close_netns(nstoken);
|
||||
|
||||
if (!ASSERT_GE(listen_fd, 0, "listen"))
|
||||
return;
|
||||
|
||||
/* Ensure the kernel puts the (rcv) timestamp for all skb */
|
||||
err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
|
||||
&opt, sizeof(opt));
|
||||
if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
|
||||
goto done;
|
||||
|
||||
if (type == SOCK_STREAM) {
|
||||
/* Ensure the kernel set EDT when sending out rst/ack
|
||||
* from the kernel's ctl_sk.
|
||||
*/
|
||||
err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
|
||||
sizeof(opt));
|
||||
if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
|
||||
goto done;
|
||||
}
|
||||
|
||||
nstoken = open_netns(NS_SRC);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns src"))
|
||||
goto done;
|
||||
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
|
||||
close_netns(nstoken);
|
||||
|
||||
if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
|
||||
goto done;
|
||||
|
||||
if (type == SOCK_STREAM) {
|
||||
int n;
|
||||
|
||||
accept_fd = accept(listen_fd, NULL, NULL);
|
||||
if (!ASSERT_GE(accept_fd, 0, "accept"))
|
||||
goto done;
|
||||
|
||||
n = write(client_fd, buf, sizeof(buf));
|
||||
if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
|
||||
goto done;
|
||||
rcv_tstamp(accept_fd, buf, sizeof(buf));
|
||||
} else {
|
||||
snd_tstamp(client_fd, buf, sizeof(buf));
|
||||
rcv_tstamp(listen_fd, buf, sizeof(buf));
|
||||
}
|
||||
|
||||
done:
|
||||
close(listen_fd);
|
||||
if (accept_fd != -1)
|
||||
close(accept_fd);
|
||||
if (client_fd != -1)
|
||||
close(client_fd);
|
||||
}
|
||||
|
||||
static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
|
||||
{
|
||||
struct nstoken *nstoken;
|
||||
|
||||
#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
|
||||
#define PIN(__prog) ({ \
|
||||
int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
|
||||
if (!ASSERT_OK(err, "pin " #__prog)) \
|
||||
goto fail; \
|
||||
})
|
||||
|
||||
/* setup ns_src tc progs */
|
||||
nstoken = open_netns(NS_SRC);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
|
||||
return -1;
|
||||
PIN(egress_host);
|
||||
PIN(ingress_host);
|
||||
SYS("tc qdisc add dev veth_src clsact");
|
||||
SYS("tc filter add dev veth_src ingress bpf da object-pinned "
|
||||
PIN_FNAME(ingress_host));
|
||||
SYS("tc filter add dev veth_src egress bpf da object-pinned "
|
||||
PIN_FNAME(egress_host));
|
||||
close_netns(nstoken);
|
||||
|
||||
/* setup ns_dst tc progs */
|
||||
nstoken = open_netns(NS_DST);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
|
||||
return -1;
|
||||
PIN(egress_host);
|
||||
PIN(ingress_host);
|
||||
SYS("tc qdisc add dev veth_dst clsact");
|
||||
SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
|
||||
PIN_FNAME(ingress_host));
|
||||
SYS("tc filter add dev veth_dst egress bpf da object-pinned "
|
||||
PIN_FNAME(egress_host));
|
||||
close_netns(nstoken);
|
||||
|
||||
/* setup ns_fwd tc progs */
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
|
||||
return -1;
|
||||
PIN(ingress_fwdns_prio100);
|
||||
PIN(egress_fwdns_prio100);
|
||||
PIN(ingress_fwdns_prio101);
|
||||
PIN(egress_fwdns_prio101);
|
||||
SYS("tc qdisc add dev veth_dst_fwd clsact");
|
||||
SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio101));
|
||||
SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio101));
|
||||
SYS("tc qdisc add dev veth_src_fwd clsact");
|
||||
SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio101));
|
||||
SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio101));
|
||||
close_netns(nstoken);
|
||||
|
||||
#undef PIN
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
close_netns(nstoken);
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum {
|
||||
INGRESS_FWDNS_P100,
|
||||
INGRESS_FWDNS_P101,
|
||||
EGRESS_FWDNS_P100,
|
||||
EGRESS_FWDNS_P101,
|
||||
INGRESS_ENDHOST,
|
||||
EGRESS_ENDHOST,
|
||||
SET_DTIME,
|
||||
__MAX_CNT,
|
||||
};
|
||||
|
||||
const char *cnt_names[] = {
|
||||
"ingress_fwdns_p100",
|
||||
"ingress_fwdns_p101",
|
||||
"egress_fwdns_p100",
|
||||
"egress_fwdns_p101",
|
||||
"ingress_endhost",
|
||||
"egress_endhost",
|
||||
"set_dtime",
|
||||
};
|
||||
|
||||
enum {
|
||||
TCP_IP6_CLEAR_DTIME,
|
||||
TCP_IP4,
|
||||
TCP_IP6,
|
||||
UDP_IP4,
|
||||
UDP_IP6,
|
||||
TCP_IP4_RT_FWD,
|
||||
TCP_IP6_RT_FWD,
|
||||
UDP_IP4_RT_FWD,
|
||||
UDP_IP6_RT_FWD,
|
||||
UKN_TEST,
|
||||
__NR_TESTS,
|
||||
};
|
||||
|
||||
const char *test_names[] = {
|
||||
"tcp ip6 clear dtime",
|
||||
"tcp ip4",
|
||||
"tcp ip6",
|
||||
"udp ip4",
|
||||
"udp ip6",
|
||||
"tcp ip4 rt fwd",
|
||||
"tcp ip6 rt fwd",
|
||||
"udp ip4 rt fwd",
|
||||
"udp ip6 rt fwd",
|
||||
};
|
||||
|
||||
static const char *dtime_cnt_str(int test, int cnt)
|
||||
{
|
||||
static char name[64];
|
||||
|
||||
snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
static const char *dtime_err_str(int test, int cnt)
|
||||
{
|
||||
static char name[64];
|
||||
|
||||
snprintf(name, sizeof(name), "%s %s errs", test_names[test],
|
||||
cnt_names[cnt]);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
|
||||
{
|
||||
int i, t = TCP_IP6_CLEAR_DTIME;
|
||||
__u32 *dtimes = skel->bss->dtimes[t];
|
||||
__u32 *errs = skel->bss->errs[t];
|
||||
|
||||
skel->bss->test = t;
|
||||
test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0);
|
||||
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P100));
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P101));
|
||||
ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
|
||||
dtime_cnt_str(t, EGRESS_FWDNS_P100));
|
||||
ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
|
||||
dtime_cnt_str(t, EGRESS_FWDNS_P101));
|
||||
ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
|
||||
dtime_cnt_str(t, EGRESS_ENDHOST));
|
||||
ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
|
||||
dtime_cnt_str(t, INGRESS_ENDHOST));
|
||||
|
||||
for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
|
||||
ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
|
||||
}
|
||||
|
||||
static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
|
||||
{
|
||||
__u32 *dtimes, *errs;
|
||||
const char *addr;
|
||||
int i, t;
|
||||
|
||||
if (family == AF_INET) {
|
||||
t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
|
||||
addr = IP4_DST;
|
||||
} else {
|
||||
t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
|
||||
addr = IP6_DST;
|
||||
}
|
||||
|
||||
dtimes = skel->bss->dtimes[t];
|
||||
errs = skel->bss->errs[t];
|
||||
|
||||
skel->bss->test = t;
|
||||
test_inet_dtime(family, SOCK_STREAM, addr, 0);
|
||||
|
||||
/* fwdns_prio100 prog does not read delivery_time_type, so
|
||||
* kernel puts the (rcv) timetamp in __sk_buff->tstamp
|
||||
*/
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P100));
|
||||
for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
|
||||
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
|
||||
|
||||
for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
|
||||
ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
|
||||
}
|
||||
|
||||
static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
|
||||
{
|
||||
__u32 *dtimes, *errs;
|
||||
const char *addr;
|
||||
int i, t;
|
||||
|
||||
if (family == AF_INET) {
|
||||
t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
|
||||
addr = IP4_DST;
|
||||
} else {
|
||||
t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
|
||||
addr = IP6_DST;
|
||||
}
|
||||
|
||||
dtimes = skel->bss->dtimes[t];
|
||||
errs = skel->bss->errs[t];
|
||||
|
||||
skel->bss->test = t;
|
||||
test_inet_dtime(family, SOCK_DGRAM, addr, 0);
|
||||
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P100));
|
||||
/* non mono delivery time is not forwarded */
|
||||
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
|
||||
dtime_cnt_str(t, INGRESS_FWDNS_P100));
|
||||
for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
|
||||
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
|
||||
|
||||
for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
|
||||
ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
|
||||
}
|
||||
|
||||
static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct test_tc_dtime *skel;
|
||||
struct nstoken *nstoken;
|
||||
int err;
|
||||
|
||||
skel = test_tc_dtime__open();
|
||||
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
|
||||
return;
|
||||
|
||||
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
|
||||
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
|
||||
|
||||
err = test_tc_dtime__load(skel);
|
||||
if (!ASSERT_OK(err, "test_tc_dtime__load"))
|
||||
goto done;
|
||||
|
||||
if (netns_load_dtime_bpf(skel))
|
||||
goto done;
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
goto done;
|
||||
err = set_forwarding(false);
|
||||
close_netns(nstoken);
|
||||
if (!ASSERT_OK(err, "disable forwarding"))
|
||||
goto done;
|
||||
|
||||
test_tcp_clear_dtime(skel);
|
||||
|
||||
test_tcp_dtime(skel, AF_INET, true);
|
||||
test_tcp_dtime(skel, AF_INET6, true);
|
||||
test_udp_dtime(skel, AF_INET, true);
|
||||
test_udp_dtime(skel, AF_INET6, true);
|
||||
|
||||
/* Test the kernel ip[6]_forward path instead
|
||||
* of bpf_redirect_neigh().
|
||||
*/
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
goto done;
|
||||
err = set_forwarding(true);
|
||||
close_netns(nstoken);
|
||||
if (!ASSERT_OK(err, "enable forwarding"))
|
||||
goto done;
|
||||
|
||||
test_tcp_dtime(skel, AF_INET, false);
|
||||
test_tcp_dtime(skel, AF_INET6, false);
|
||||
test_udp_dtime(skel, AF_INET, false);
|
||||
test_udp_dtime(skel, AF_INET6, false);
|
||||
|
||||
done:
|
||||
test_tc_dtime__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct nstoken *nstoken = NULL;
|
||||
@ -787,6 +1220,7 @@ static void *test_tc_redirect_run_tests(void *arg)
|
||||
RUN_TEST(tc_redirect_peer_l3);
|
||||
RUN_TEST(tc_redirect_neigh);
|
||||
RUN_TEST(tc_redirect_neigh_fib);
|
||||
RUN_TEST(tc_redirect_dtime);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
349
tools/testing/selftests/bpf/progs/test_tc_dtime.c
Normal file
349
tools/testing/selftests/bpf/progs/test_tc_dtime.c
Normal file
@ -0,0 +1,349 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2022 Meta
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
|
||||
* | |
|
||||
* ns_src | ns_fwd | ns_dst
|
||||
*
|
||||
* ns_src and ns_dst: ENDHOST namespace
|
||||
* ns_fwd: Fowarding namespace
|
||||
*/
|
||||
|
||||
#define ctx_ptr(field) (void *)(long)(field)
|
||||
|
||||
#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
|
||||
#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
|
||||
|
||||
#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
|
||||
0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
|
||||
#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
|
||||
0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
|
||||
|
||||
#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
|
||||
a.s6_addr32[1] == b.s6_addr32[1] && \
|
||||
a.s6_addr32[2] == b.s6_addr32[2] && \
|
||||
a.s6_addr32[3] == b.s6_addr32[3])
|
||||
|
||||
volatile const __u32 IFINDEX_SRC;
|
||||
volatile const __u32 IFINDEX_DST;
|
||||
|
||||
#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
|
||||
#define INGRESS_FWDNS_MAGIC 0x1b9fbeef
|
||||
#define EGRESS_FWDNS_MAGIC 0x2b9fbeef
|
||||
|
||||
enum {
|
||||
INGRESS_FWDNS_P100,
|
||||
INGRESS_FWDNS_P101,
|
||||
EGRESS_FWDNS_P100,
|
||||
EGRESS_FWDNS_P101,
|
||||
INGRESS_ENDHOST,
|
||||
EGRESS_ENDHOST,
|
||||
SET_DTIME,
|
||||
__MAX_CNT,
|
||||
};
|
||||
|
||||
enum {
|
||||
TCP_IP6_CLEAR_DTIME,
|
||||
TCP_IP4,
|
||||
TCP_IP6,
|
||||
UDP_IP4,
|
||||
UDP_IP6,
|
||||
TCP_IP4_RT_FWD,
|
||||
TCP_IP6_RT_FWD,
|
||||
UDP_IP4_RT_FWD,
|
||||
UDP_IP6_RT_FWD,
|
||||
UKN_TEST,
|
||||
__NR_TESTS,
|
||||
};
|
||||
|
||||
enum {
|
||||
SRC_NS = 1,
|
||||
DST_NS,
|
||||
};
|
||||
|
||||
__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
|
||||
__u32 errs[__NR_TESTS][__MAX_CNT] = {};
|
||||
__u32 test = 0;
|
||||
|
||||
static void inc_dtimes(__u32 idx)
|
||||
{
|
||||
if (test < __NR_TESTS)
|
||||
dtimes[test][idx]++;
|
||||
else
|
||||
dtimes[UKN_TEST][idx]++;
|
||||
}
|
||||
|
||||
static void inc_errs(__u32 idx)
|
||||
{
|
||||
if (test < __NR_TESTS)
|
||||
errs[test][idx]++;
|
||||
else
|
||||
errs[UKN_TEST][idx]++;
|
||||
}
|
||||
|
||||
static int skb_proto(int type)
|
||||
{
|
||||
return type & 0xff;
|
||||
}
|
||||
|
||||
static int skb_ns(int type)
|
||||
{
|
||||
return (type >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static bool fwdns_clear_dtime(void)
|
||||
{
|
||||
return test == TCP_IP6_CLEAR_DTIME;
|
||||
}
|
||||
|
||||
static bool bpf_fwd(void)
|
||||
{
|
||||
return test < TCP_IP4_RT_FWD;
|
||||
}
|
||||
|
||||
/* -1: parse error: TC_ACT_SHOT
|
||||
* 0: not testing traffic: TC_ACT_OK
|
||||
* >0: first byte is the inet_proto, second byte has the netns
|
||||
* of the sender
|
||||
*/
|
||||
static int skb_get_type(struct __sk_buff *skb)
|
||||
{
|
||||
void *data_end = ctx_ptr(skb->data_end);
|
||||
void *data = ctx_ptr(skb->data);
|
||||
__u8 inet_proto = 0, ns = 0;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct iphdr *iph;
|
||||
|
||||
switch (skb->protocol) {
|
||||
case __bpf_htons(ETH_P_IP):
|
||||
iph = data + sizeof(struct ethhdr);
|
||||
if (iph + 1 > data_end)
|
||||
return -1;
|
||||
if (iph->saddr == ip4_src)
|
||||
ns = SRC_NS;
|
||||
else if (iph->saddr == ip4_dst)
|
||||
ns = DST_NS;
|
||||
inet_proto = iph->protocol;
|
||||
break;
|
||||
case __bpf_htons(ETH_P_IPV6):
|
||||
ip6h = data + sizeof(struct ethhdr);
|
||||
if (ip6h + 1 > data_end)
|
||||
return -1;
|
||||
if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
|
||||
ns = SRC_NS;
|
||||
else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
|
||||
ns = DST_NS;
|
||||
inet_proto = ip6h->nexthdr;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns)
|
||||
return 0;
|
||||
|
||||
return (ns << 8 | inet_proto);
|
||||
}
|
||||
|
||||
/* format: direction@iface@netns
|
||||
* egress@veth_(src|dst)@ns_(src|dst)
|
||||
*/
|
||||
SEC("tc")
|
||||
int egress_host(struct __sk_buff *skb)
|
||||
{
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
if (skb_type == -1)
|
||||
return TC_ACT_SHOT;
|
||||
if (!skb_type)
|
||||
return TC_ACT_OK;
|
||||
|
||||
if (skb_proto(skb_type) == IPPROTO_TCP) {
|
||||
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO &&
|
||||
skb->tstamp)
|
||||
inc_dtimes(EGRESS_ENDHOST);
|
||||
else
|
||||
inc_errs(EGRESS_ENDHOST);
|
||||
} else {
|
||||
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_UNSPEC &&
|
||||
skb->tstamp)
|
||||
inc_dtimes(EGRESS_ENDHOST);
|
||||
else
|
||||
inc_errs(EGRESS_ENDHOST);
|
||||
}
|
||||
|
||||
skb->tstamp = EGRESS_ENDHOST_MAGIC;
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
/* ingress@veth_(src|dst)@ns_(src|dst) */
|
||||
SEC("tc")
|
||||
int ingress_host(struct __sk_buff *skb)
|
||||
{
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
if (skb_type == -1)
|
||||
return TC_ACT_SHOT;
|
||||
if (!skb_type)
|
||||
return TC_ACT_OK;
|
||||
|
||||
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO &&
|
||||
skb->tstamp == EGRESS_FWDNS_MAGIC)
|
||||
inc_dtimes(INGRESS_ENDHOST);
|
||||
else
|
||||
inc_errs(INGRESS_ENDHOST);
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
|
||||
SEC("tc")
|
||||
int ingress_fwdns_prio100(struct __sk_buff *skb)
|
||||
{
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
if (skb_type == -1)
|
||||
return TC_ACT_SHOT;
|
||||
if (!skb_type)
|
||||
return TC_ACT_OK;
|
||||
|
||||
/* delivery_time is only available to the ingress
|
||||
* if the tc-bpf checks the skb->delivery_time_type.
|
||||
*/
|
||||
if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
|
||||
inc_errs(INGRESS_FWDNS_P100);
|
||||
|
||||
if (fwdns_clear_dtime())
|
||||
skb->tstamp = 0;
|
||||
|
||||
return TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
|
||||
SEC("tc")
|
||||
int egress_fwdns_prio100(struct __sk_buff *skb)
|
||||
{
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
if (skb_type == -1)
|
||||
return TC_ACT_SHOT;
|
||||
if (!skb_type)
|
||||
return TC_ACT_OK;
|
||||
|
||||
/* delivery_time is always available to egress even
|
||||
* the tc-bpf did not use the delivery_time_type.
|
||||
*/
|
||||
if (skb->tstamp == INGRESS_FWDNS_MAGIC)
|
||||
inc_dtimes(EGRESS_FWDNS_P100);
|
||||
else
|
||||
inc_errs(EGRESS_FWDNS_P100);
|
||||
|
||||
if (fwdns_clear_dtime())
|
||||
skb->tstamp = 0;
|
||||
|
||||
return TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
|
||||
SEC("tc")
|
||||
int ingress_fwdns_prio101(struct __sk_buff *skb)
|
||||
{
|
||||
__u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
if (skb_type == -1 || !skb_type)
|
||||
/* Should have handled in prio100 */
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
if (skb_proto(skb_type) == IPPROTO_UDP)
|
||||
expected_dtime = 0;
|
||||
|
||||
if (skb->delivery_time_type) {
|
||||
if (fwdns_clear_dtime() ||
|
||||
skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO ||
|
||||
skb->tstamp != expected_dtime)
|
||||
inc_errs(INGRESS_FWDNS_P101);
|
||||
else
|
||||
inc_dtimes(INGRESS_FWDNS_P101);
|
||||
} else {
|
||||
if (!fwdns_clear_dtime() && expected_dtime)
|
||||
inc_errs(INGRESS_FWDNS_P101);
|
||||
}
|
||||
|
||||
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) {
|
||||
skb->tstamp = INGRESS_FWDNS_MAGIC;
|
||||
} else {
|
||||
if (bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_DELIVERY_TIME_MONO))
|
||||
inc_errs(SET_DTIME);
|
||||
if (!bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_DELIVERY_TIME_UNSPEC))
|
||||
inc_errs(SET_DTIME);
|
||||
}
|
||||
|
||||
if (skb_ns(skb_type) == SRC_NS)
|
||||
return bpf_fwd() ?
|
||||
bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
|
||||
else
|
||||
return bpf_fwd() ?
|
||||
bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
|
||||
}
|
||||
|
||||
/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
|
||||
SEC("tc")
|
||||
int egress_fwdns_prio101(struct __sk_buff *skb)
|
||||
{
|
||||
int skb_type;
|
||||
|
||||
skb_type = skb_get_type(skb);
|
||||
if (skb_type == -1 || !skb_type)
|
||||
/* Should have handled in prio100 */
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
if (skb->delivery_time_type) {
|
||||
if (fwdns_clear_dtime() ||
|
||||
skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO ||
|
||||
skb->tstamp != INGRESS_FWDNS_MAGIC)
|
||||
inc_errs(EGRESS_FWDNS_P101);
|
||||
else
|
||||
inc_dtimes(EGRESS_FWDNS_P101);
|
||||
} else {
|
||||
if (!fwdns_clear_dtime())
|
||||
inc_errs(EGRESS_FWDNS_P101);
|
||||
}
|
||||
|
||||
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) {
|
||||
skb->tstamp = EGRESS_FWDNS_MAGIC;
|
||||
} else {
|
||||
if (bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_DELIVERY_TIME_MONO))
|
||||
inc_errs(SET_DTIME);
|
||||
if (!bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC,
|
||||
BPF_SKB_DELIVERY_TIME_UNSPEC))
|
||||
inc_errs(SET_DTIME);
|
||||
}
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
Loading…
Reference in New Issue
Block a user