abcce733ad
Make it clear that this XDP program depend on the network stack to do the ARP resolution. This is connected with the BPF_FIB_LKUP_RET_NO_NEIGH return code from bpf_fib_lookup(). Another common mistake (seen via XDP-tutorial) is that users don't realize that sysctl net.ipv{4,6}.conf.all.forwarding setting is honored by bpf_fib_lookup. Reported-by: Anton Protopopov <a.s.protopopov@gmail.com> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Reviewed-by: David Ahern <dsahern@gmail.com> Acked-by: Yonghong Song <yhs@fb.com> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
160 lines
4.6 KiB
C
160 lines
4.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*/
|
|
#define KBUILD_MODNAME "foo"
|
|
#include <uapi/linux/bpf.h>
|
|
#include <linux/in.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/if_packet.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/ipv6.h>
|
|
|
|
#include "bpf_helpers.h"
|
|
|
|
#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
|
|
|
|
/* For TX-traffic redirect requires net_device ifindex to be in this devmap */
|
|
struct bpf_map_def SEC("maps") xdp_tx_ports = {
|
|
.type = BPF_MAP_TYPE_DEVMAP,
|
|
.key_size = sizeof(int),
|
|
.value_size = sizeof(int),
|
|
.max_entries = 64,
|
|
};
|
|
|
|
/* from include/net/ip.h */
|
|
static __always_inline int ip_decrease_ttl(struct iphdr *iph)
|
|
{
|
|
u32 check = (__force u32)iph->check;
|
|
|
|
check += (__force u32)htons(0x0100);
|
|
iph->check = (__force __sum16)(check + (check >= 0xFFFF));
|
|
return --iph->ttl;
|
|
}
|
|
|
|
static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
|
|
{
|
|
void *data_end = (void *)(long)ctx->data_end;
|
|
void *data = (void *)(long)ctx->data;
|
|
struct bpf_fib_lookup fib_params;
|
|
struct ethhdr *eth = data;
|
|
struct ipv6hdr *ip6h;
|
|
struct iphdr *iph;
|
|
u16 h_proto;
|
|
u64 nh_off;
|
|
int rc;
|
|
|
|
nh_off = sizeof(*eth);
|
|
if (data + nh_off > data_end)
|
|
return XDP_DROP;
|
|
|
|
__builtin_memset(&fib_params, 0, sizeof(fib_params));
|
|
|
|
h_proto = eth->h_proto;
|
|
if (h_proto == htons(ETH_P_IP)) {
|
|
iph = data + nh_off;
|
|
|
|
if (iph + 1 > data_end)
|
|
return XDP_DROP;
|
|
|
|
if (iph->ttl <= 1)
|
|
return XDP_PASS;
|
|
|
|
fib_params.family = AF_INET;
|
|
fib_params.tos = iph->tos;
|
|
fib_params.l4_protocol = iph->protocol;
|
|
fib_params.sport = 0;
|
|
fib_params.dport = 0;
|
|
fib_params.tot_len = ntohs(iph->tot_len);
|
|
fib_params.ipv4_src = iph->saddr;
|
|
fib_params.ipv4_dst = iph->daddr;
|
|
} else if (h_proto == htons(ETH_P_IPV6)) {
|
|
struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
|
|
struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
|
|
|
|
ip6h = data + nh_off;
|
|
if (ip6h + 1 > data_end)
|
|
return XDP_DROP;
|
|
|
|
if (ip6h->hop_limit <= 1)
|
|
return XDP_PASS;
|
|
|
|
fib_params.family = AF_INET6;
|
|
fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
|
|
fib_params.l4_protocol = ip6h->nexthdr;
|
|
fib_params.sport = 0;
|
|
fib_params.dport = 0;
|
|
fib_params.tot_len = ntohs(ip6h->payload_len);
|
|
*src = ip6h->saddr;
|
|
*dst = ip6h->daddr;
|
|
} else {
|
|
return XDP_PASS;
|
|
}
|
|
|
|
fib_params.ifindex = ctx->ingress_ifindex;
|
|
|
|
rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
|
|
/*
|
|
* Some rc (return codes) from bpf_fib_lookup() are important,
|
|
* to understand how this XDP-prog interacts with network stack.
|
|
*
|
|
* BPF_FIB_LKUP_RET_NO_NEIGH:
|
|
* Even if route lookup was a success, then the MAC-addresses are also
|
|
* needed. This is obtained from arp/neighbour table, but if table is
|
|
* (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid
|
|
* doing ARP lookup directly from XDP, then send packet to normal
|
|
* network stack via XDP_PASS and expect it will do ARP resolution.
|
|
*
|
|
* BPF_FIB_LKUP_RET_FWD_DISABLED:
|
|
* The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
|
|
* setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
|
|
* enabled this on ingress device.
|
|
*/
|
|
if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
|
|
/* Verify egress index has been configured as TX-port.
|
|
* (Note: User can still have inserted an egress ifindex that
|
|
* doesn't support XDP xmit, which will result in packet drops).
|
|
*
|
|
* Note: lookup in devmap supported since 0cdbb4b09a0.
|
|
* If not supported will fail with:
|
|
* cannot pass map_type 14 into func bpf_map_lookup_elem#1:
|
|
*/
|
|
if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
|
|
return XDP_PASS;
|
|
|
|
if (h_proto == htons(ETH_P_IP))
|
|
ip_decrease_ttl(iph);
|
|
else if (h_proto == htons(ETH_P_IPV6))
|
|
ip6h->hop_limit--;
|
|
|
|
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
|
|
memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
|
|
return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
|
|
}
|
|
|
|
return XDP_PASS;
|
|
}
|
|
|
|
SEC("xdp_fwd")
|
|
int xdp_fwd_prog(struct xdp_md *ctx)
|
|
{
|
|
return xdp_fwd_flags(ctx, 0);
|
|
}
|
|
|
|
SEC("xdp_fwd_direct")
|
|
int xdp_fwd_direct_prog(struct xdp_md *ctx)
|
|
{
|
|
return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|