diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c04410137d9..837024512baf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1478,13 +1478,27 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * Use with ENCAP_L3 flags to further specify the tunnel type. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2624,9 +2638,18 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ diff --git a/net/core/filter.c b/net/core/filter.c index d2511fe46db3..c1d19b074d6c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2963,42 +2963,113 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) } } -static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) +#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \ + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + +#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \ + BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ + BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ + BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + +static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, + u64 flags) { - u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK; + unsigned int gso_type = SKB_GSO_DODGY; + u16 mac_len, inner_net, inner_trans; int ret; - if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) - return -ENOTSUPP; + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { + /* udp gso_size delineates datagrams, only allow if fixed */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || + !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + return -ENOTSUPP; + } - ret = skb_cow(skb, len_diff); + ret = skb_cow_head(skb, len_diff); if (unlikely(ret < 0)) return ret; + if (encap) { + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -ENOTSUPP; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + return -EINVAL; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE && + flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + return -EINVAL; + + if (skb->encapsulation) + return -EALREADY; + + mac_len = skb->network_header - skb->mac_header; + inner_net = skb->network_header; + inner_trans = skb->transport_header; + } + ret = bpf_skb_net_hdr_push(skb, off, len_diff); if (unlikely(ret < 0)) return ret; + if (encap) { + /* inner mac == inner_net on l3 encap */ + skb->inner_mac_header = inner_net; + skb->inner_network_header = inner_net; + skb->inner_transport_header = inner_trans; + skb_set_inner_protocol(skb, skb->protocol); + + skb->encapsulation = 1; + skb_set_network_header(skb, mac_len); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + gso_type |= SKB_GSO_UDP_TUNNEL; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE) + gso_type |= SKB_GSO_GRE; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + gso_type |= SKB_GSO_IPXIP6; + else + gso_type |= SKB_GSO_IPXIP4; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE || + flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) { + int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr); + + skb_set_transport_header(skb, mac_len + nh_len); + } + } + if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); /* Due to header grow, MSS needs to be downgraded. */ - skb_decrease_gso_size(shinfo, len_diff); + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + skb_decrease_gso_size(shinfo, len_diff); + /* Header must be checked, and gso_segs recomputed. */ - shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_type |= gso_type; shinfo->gso_segs = 0; } return 0; } -static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, + u64 flags) { - u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) - return -ENOTSUPP; + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { + /* udp gso_size delineates datagrams, only allow if fixed */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || + !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + return -ENOTSUPP; + } ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) @@ -3012,7 +3083,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) struct skb_shared_info *shinfo = skb_shinfo(skb); /* Due to header shrink, MSS can be upgraded. */ - skb_increase_gso_size(shinfo, len_diff); + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + skb_increase_gso_size(shinfo, len_diff); + /* Header must be checked, and gso_segs recomputed. */ shinfo->gso_type |= SKB_GSO_DODGY; shinfo->gso_segs = 0; @@ -3027,49 +3100,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb) SKB_MAX_ALLOC; } -static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) +BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, + u32, mode, u64, flags) { - bool trans_same = skb->transport_header == skb->network_header; u32 len_cur, len_diff_abs = abs(len_diff); u32 len_min = bpf_skb_net_base_len(skb); u32 len_max = __bpf_skb_max_len(skb); __be16 proto = skb->protocol; bool shrink = len_diff < 0; + u32 off; int ret; + if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK)) + return -EINVAL; if (unlikely(len_diff_abs > 0xfffU)) return -EFAULT; if (unlikely(proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))) return -ENOTSUPP; + off = skb_mac_header_len(skb); + switch (mode) { + case BPF_ADJ_ROOM_NET: + off += bpf_skb_net_base_len(skb); + break; + case BPF_ADJ_ROOM_MAC: + break; + default: + return -ENOTSUPP; + } + len_cur = skb->len - skb_network_offset(skb); - if (skb_transport_header_was_set(skb) && !trans_same) - len_cur = skb_network_header_len(skb); if ((shrink && (len_diff_abs >= len_cur || len_cur - len_diff_abs < len_min)) || (!shrink && (skb->len + len_diff_abs > len_max && !skb_is_gso(skb)))) return -ENOTSUPP; - ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : - bpf_skb_net_grow(skb, len_diff_abs); + ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) : + bpf_skb_net_grow(skb, off, len_diff_abs, flags); bpf_compute_data_pointers(skb); return ret; } -BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, - u32, mode, u64, flags) -{ - if (unlikely(flags)) - return -EINVAL; - if (likely(mode == BPF_ADJ_ROOM_NET)) - return bpf_skb_adjust_net(skb, len_diff); - - return -ENOTSUPP; -} - static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .func = bpf_skb_adjust_room, .gpl_only = false, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3c04410137d9..837024512baf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1478,13 +1478,27 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * Use with ENCAP_L3 flags to further specify the tunnel type. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2624,9 +2638,18 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index edd59707cb1f..cdcc54ddf4b9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -52,7 +52,8 @@ TEST_PROGS := test_kmod.sh \ test_flow_dissector.sh \ test_xdp_vlan.sh \ test_lwt_ip_encap.sh \ - test_tcp_check_syncookie.sh + test_tcp_check_syncookie.sh \ + test_tc_tunnel.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 37f947ec44ed..a42f4fc4dc11 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y CONFIG_FTRACE_SYSCALLS=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_GRE=y diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c new file mode 100644 index 000000000000..f541c2de947d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* In-place tunneling */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_endian.h" +#include "bpf_helpers.h" + +static const int cfg_port = 8000; + +struct grev4hdr { + struct iphdr ip; + __be16 flags; + __be16 protocol; +} __attribute__((packed)); + +struct grev6hdr { + struct ipv6hdr ip; + __be16 flags; + __be16 protocol; +} __attribute__((packed)); + +static __always_inline void set_ipv4_csum(struct iphdr *iph) +{ + __u16 *iph16 = (__u16 *)iph; + __u32 csum; + int i; + + iph->check = 0; + +#pragma clang loop unroll(full) + for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) + csum += *iph16++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre) +{ + struct grev4hdr h_outer; + struct iphdr iph_inner; + struct tcphdr tcph; + __u64 flags; + int olen; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + /* filter only packets we want */ + if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4; + if (with_gre) { + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; + olen = sizeof(h_outer); + } else { + olen = sizeof(h_outer.ip); + } + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + h_outer.ip = iph_inner; + h_outer.ip.tot_len = bpf_htons(olen + + bpf_htons(h_outer.ip.tot_len)); + if (with_gre) { + h_outer.ip.protocol = IPPROTO_GRE; + h_outer.protocol = bpf_htons(ETH_P_IP); + h_outer.flags = 0; + } else { + h_outer.ip.protocol = IPPROTO_IPIP; + } + + set_ipv4_csum((void *)&h_outer.ip); + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre) +{ + struct ipv6hdr iph_inner; + struct grev6hdr h_outer; + struct tcphdr tcph; + __u64 flags; + int olen; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + /* filter only packets we want */ + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; + if (with_gre) { + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; + olen = sizeof(h_outer); + } else { + olen = sizeof(h_outer.ip); + } + + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + h_outer.ip = iph_inner; + h_outer.ip.payload_len = bpf_htons(olen + + bpf_ntohs(h_outer.ip.payload_len)); + if (with_gre) { + h_outer.ip.nexthdr = IPPROTO_GRE; + h_outer.protocol = bpf_htons(ETH_P_IPV6); + h_outer.flags = 0; + } else { + h_outer.ip.nexthdr = IPPROTO_IPV6; + } + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +SEC("encap_ipip") +int __encap_ipip(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, false); + else + return TC_ACT_OK; +} + +SEC("encap_gre") +int __encap_gre(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, true); + else + return TC_ACT_OK; +} + +SEC("encap_ip6tnl") +int __encap_ip6tnl(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, false); + else + return TC_ACT_OK; +} + +SEC("encap_ip6gre") +int __encap_ip6gre(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, true); + else + return TC_ACT_OK; +} + +static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) +{ + char buf[sizeof(struct grev6hdr)]; + int olen; + + switch (proto) { + case IPPROTO_IPIP: + case IPPROTO_IPV6: + olen = len; + break; + case IPPROTO_GRE: + olen = len + 4 /* gre hdr */; + break; + default: + return TC_ACT_OK; + } + + if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO)) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +static int decap_ipv4(struct __sk_buff *skb) +{ + struct iphdr iph_outer; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, + sizeof(iph_outer)) < 0) + return TC_ACT_OK; + + if (iph_outer.ihl != 5) + return TC_ACT_OK; + + return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), + iph_outer.protocol); +} + +static int decap_ipv6(struct __sk_buff *skb) +{ + struct ipv6hdr iph_outer; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, + sizeof(iph_outer)) < 0) + return TC_ACT_OK; + + return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), + iph_outer.nexthdr); +} + +SEC("decap") +int decap_f(struct __sk_buff *skb) +{ + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + return decap_ipv4(skb); + case __bpf_constant_htons(ETH_P_IPV6): + return decap_ipv6(skb); + default: + /* does not match, ignore */ + return TC_ACT_OK; + } +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh new file mode 100755 index 000000000000..dcf320626931 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# In-place tunneling + +# must match the port that the bpf program filters on +readonly port=8000 + +readonly ns_prefix="ns-$$-" +readonly ns1="${ns_prefix}1" +readonly ns2="${ns_prefix}2" + +readonly ns1_v4=192.168.1.1 +readonly ns2_v4=192.168.1.2 +readonly ns1_v6=fd::1 +readonly ns2_v6=fd::2 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +setup() { + ip netns add "${ns1}" + ip netns add "${ns2}" + + ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \ + peer name veth2 mtu 1500 netns "${ns2}" + + ip netns exec "${ns1}" ethtool -K veth1 tso off + + ip -netns "${ns1}" link set veth1 up + ip -netns "${ns2}" link set veth2 up + + ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1 + ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2 + ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad + ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad + + # clamp route to reserve room for tunnel headers + ip -netns "${ns1}" -4 route flush table main + ip -netns "${ns1}" -6 route flush table main + ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1 + ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + ip netns del "${ns2}" + ip netns del "${ns1}" + + if [[ -f "${outfile}" ]]; then + rm "${outfile}" + fi + if [[ -f "${infile}" ]]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}" + echo $? +} + +verify_data() { + wait "${server_pid}" + # sha1sum returns two fields [sha1] [filepath] + # convert to bash array and access first elem + insum=($(sha1sum ${infile})) + outsum=($(sha1sum ${outfile})) + if [[ "${insum[0]}" != "${outsum[0]}" ]]; then + echo "data mismatch" + exit 1 + fi +} + +set -e + +# no arguments: automated test, run all +if [[ "$#" -eq "0" ]]; then + echo "ipip" + $0 ipv4 ipip 100 + + echo "ip6ip6" + $0 ipv6 ip6tnl 100 + + echo "ip gre" + $0 ipv4 gre 100 + + echo "ip6 gre" + $0 ipv6 ip6gre 100 + + echo "ip gre gso" + $0 ipv4 gre 2000 + + echo "ip6 gre gso" + $0 ipv6 ip6gre 2000 + + echo "OK. All tests passed" + exit 0 +fi + +if [[ "$#" -ne "3" ]]; then + echo "Usage: $0" + echo " or: $0 " + exit 1 +fi + +case "$1" in +"ipv4") + readonly addr1="${ns1_v4}" + readonly addr2="${ns2_v4}" + readonly netcat_opt=-4 + ;; +"ipv6") + readonly addr1="${ns1_v6}" + readonly addr2="${ns2_v6}" + readonly netcat_opt=-6 + ;; +*) + echo "unknown arg: $1" + exit 1 + ;; +esac + +readonly tuntype=$2 +readonly datalen=$3 + +echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}" + +trap cleanup EXIT + +setup + +# basic communication works +echo "test basic connectivity" +server_listen +client_connect +verify_data + +# clientside, insert bpf program to encap all TCP to port ${port} +# client can no longer connect +ip netns exec "${ns1}" tc qdisc add dev veth1 clsact +ip netns exec "${ns1}" tc filter add dev veth1 egress \ + bpf direct-action object-file ./test_tc_tunnel.o \ + section "encap_${tuntype}" +echo "test bpf encap without decap (expect failure)" +server_listen +! client_connect + +# serverside, insert decap module +# server is still running +# client can connect again +ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \ + remote "${addr1}" local "${addr2}" +ip netns exec "${ns2}" ip link set dev testtun0 up +echo "test bpf encap with tunnel device decap" +client_connect +verify_data + +# serverside, use BPF for decap +ip netns exec "${ns2}" ip link del dev testtun0 +ip netns exec "${ns2}" tc qdisc add dev veth2 clsact +ip netns exec "${ns2}" tc filter add dev veth2 ingress \ + bpf direct-action object-file ./test_tc_tunnel.o section decap +server_listen +echo "test bpf encap with bpf decap" +client_connect +verify_data + +echo OK