flow_dissector: implements eBPF parser

This eBPF program extracts basic/control/ip address/ports keys from
incoming packets. It supports recursive parsing for IP encapsulation,
and VLAN, along with IPv4/IPv6 and extension headers.  This program is
meant to show how flow dissection and key extraction can be done in
eBPF.

Link: http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf
Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Petar Penkov 2018-09-14 07:46:21 -07:00 committed by Alexei Starovoitov
parent c22fbae76c
commit 9c98b13cc3
2 changed files with 374 additions and 1 deletions

View File

@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o
test_skb_cgroup_id_kern.o bpf_flow.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \

View File

@ -0,0 +1,373 @@
// SPDX-License-Identifier: GPL-2.0
#include <limits.h>
#include <stddef.h>
#include <stdbool.h>
#include <string.h>
#include <linux/pkt_cls.h>
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/icmp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_packet.h>
#include <sys/socket.h>
#include <linux/if_tunnel.h>
#include <linux/mpls.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
int _version SEC("version") = 1;
#define PROG(F) SEC(#F) int bpf_func_##F
/* These are the identifiers of the BPF programs that will be used in tail
* calls. Name is limited to 16 characters, with the terminating character and
* bpf_func_ above, we have only 6 to work with, anything after will be cropped.
*/
enum {
IP,
IPV6,
IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
IPV6FR, /* Fragmentation IPv6 Extension Header */
MPLS,
VLAN,
};
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
#define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8
struct vlan_hdr {
__be16 h_vlan_TCI;
__be16 h_vlan_encapsulated_proto;
};
struct gre_hdr {
__be16 flags;
__be16 proto;
};
struct frag_hdr {
__u8 nexthdr;
__u8 reserved;
__be16 frag_off;
__be32 identification;
};
struct bpf_map_def SEC("maps") jmp_table = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 8
};
static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
__u16 hdr_size,
void *buffer)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
__u16 nhoff = skb->flow_keys->nhoff;
__u8 *hdr;
/* Verifies this variable offset does not overflow */
if (nhoff > (USHRT_MAX - hdr_size))
return NULL;
hdr = data + nhoff;
if (hdr + hdr_size <= data_end)
return hdr;
if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
return NULL;
return buffer;
}
/* Dispatches on ETHERTYPE */
static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
{
struct bpf_flow_keys *keys = skb->flow_keys;
keys->n_proto = proto;
switch (proto) {
case bpf_htons(ETH_P_IP):
bpf_tail_call(skb, &jmp_table, IP);
break;
case bpf_htons(ETH_P_IPV6):
bpf_tail_call(skb, &jmp_table, IPV6);
break;
case bpf_htons(ETH_P_MPLS_MC):
case bpf_htons(ETH_P_MPLS_UC):
bpf_tail_call(skb, &jmp_table, MPLS);
break;
case bpf_htons(ETH_P_8021Q):
case bpf_htons(ETH_P_8021AD):
bpf_tail_call(skb, &jmp_table, VLAN);
break;
default:
/* Protocol not supported */
return BPF_DROP;
}
return BPF_DROP;
}
SEC("dissect")
int dissect(struct __sk_buff *skb)
{
if (!skb->vlan_present)
return parse_eth_proto(skb, skb->protocol);
else
return parse_eth_proto(skb, skb->vlan_proto);
}
/* Parses on IPPROTO_* */
static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
{
struct bpf_flow_keys *keys = skb->flow_keys;
void *data_end = (void *)(long)skb->data_end;
struct icmphdr *icmp, _icmp;
struct gre_hdr *gre, _gre;
struct ethhdr *eth, _eth;
struct tcphdr *tcp, _tcp;
struct udphdr *udp, _udp;
keys->ip_proto = proto;
switch (proto) {
case IPPROTO_ICMP:
icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
if (!icmp)
return BPF_DROP;
return BPF_OK;
case IPPROTO_IPIP:
keys->is_encap = true;
return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
case IPPROTO_IPV6:
keys->is_encap = true;
return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
case IPPROTO_GRE:
gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
if (!gre)
return BPF_DROP;
if (bpf_htons(gre->flags & GRE_VERSION))
/* Only inspect standard GRE packets with version 0 */
return BPF_OK;
keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
if (GRE_IS_CSUM(gre->flags))
keys->nhoff += 4; /* Step over chksum and Padding */
if (GRE_IS_KEY(gre->flags))
keys->nhoff += 4; /* Step over key */
if (GRE_IS_SEQ(gre->flags))
keys->nhoff += 4; /* Step over sequence number */
keys->is_encap = true;
if (gre->proto == bpf_htons(ETH_P_TEB)) {
eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
&_eth);
if (!eth)
return BPF_DROP;
keys->nhoff += sizeof(*eth);
return parse_eth_proto(skb, eth->h_proto);
} else {
return parse_eth_proto(skb, gre->proto);
}
case IPPROTO_TCP:
tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
if (!tcp)
return BPF_DROP;
if (tcp->doff < 5)
return BPF_DROP;
if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
return BPF_DROP;
keys->thoff = keys->nhoff;
keys->sport = tcp->source;
keys->dport = tcp->dest;
return BPF_OK;
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
if (!udp)
return BPF_DROP;
keys->thoff = keys->nhoff;
keys->sport = udp->source;
keys->dport = udp->dest;
return BPF_OK;
default:
return BPF_DROP;
}
return BPF_DROP;
}
static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
{
struct bpf_flow_keys *keys = skb->flow_keys;
keys->ip_proto = nexthdr;
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
bpf_tail_call(skb, &jmp_table, IPV6OP);
break;
case IPPROTO_FRAGMENT:
bpf_tail_call(skb, &jmp_table, IPV6FR);
break;
default:
return parse_ip_proto(skb, nexthdr);
}
return BPF_DROP;
}
PROG(IP)(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
struct bpf_flow_keys *keys = skb->flow_keys;
void *data = (void *)(long)skb->data;
struct iphdr *iph, _iph;
bool done = false;
iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
if (!iph)
return BPF_DROP;
/* IP header cannot be smaller than 20 bytes */
if (iph->ihl < 5)
return BPF_DROP;
keys->addr_proto = ETH_P_IP;
keys->ipv4_src = iph->saddr;
keys->ipv4_dst = iph->daddr;
keys->nhoff += iph->ihl << 2;
if (data + keys->nhoff > data_end)
return BPF_DROP;
if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
keys->is_frag = true;
if (iph->frag_off & bpf_htons(IP_OFFSET))
/* From second fragment on, packets do not have headers
* we can parse.
*/
done = true;
else
keys->is_first_frag = true;
}
if (done)
return BPF_OK;
return parse_ip_proto(skb, iph->protocol);
}
PROG(IPV6)(struct __sk_buff *skb)
{
struct bpf_flow_keys *keys = skb->flow_keys;
struct ipv6hdr *ip6h, _ip6h;
ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
if (!ip6h)
return BPF_DROP;
keys->addr_proto = ETH_P_IPV6;
memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
keys->nhoff += sizeof(struct ipv6hdr);
return parse_ipv6_proto(skb, ip6h->nexthdr);
}
PROG(IPV6OP)(struct __sk_buff *skb)
{
struct ipv6_opt_hdr *ip6h, _ip6h;
ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
if (!ip6h)
return BPF_DROP;
/* hlen is in 8-octets and does not include the first 8 bytes
* of the header
*/
skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
return parse_ipv6_proto(skb, ip6h->nexthdr);
}
PROG(IPV6FR)(struct __sk_buff *skb)
{
struct bpf_flow_keys *keys = skb->flow_keys;
struct frag_hdr *fragh, _fragh;
fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
if (!fragh)
return BPF_DROP;
keys->nhoff += sizeof(*fragh);
keys->is_frag = true;
if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
keys->is_first_frag = true;
return parse_ipv6_proto(skb, fragh->nexthdr);
}
PROG(MPLS)(struct __sk_buff *skb)
{
struct mpls_label *mpls, _mpls;
mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
if (!mpls)
return BPF_DROP;
return BPF_OK;
}
PROG(VLAN)(struct __sk_buff *skb)
{
struct bpf_flow_keys *keys = skb->flow_keys;
struct vlan_hdr *vlan, _vlan;
__be16 proto;
/* Peek back to see if single or double-tagging */
if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
sizeof(proto)))
return BPF_DROP;
/* Account for double-tagging */
if (proto == bpf_htons(ETH_P_8021AD)) {
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
if (!vlan)
return BPF_DROP;
if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
return BPF_DROP;
keys->nhoff += sizeof(*vlan);
}
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
if (!vlan)
return BPF_DROP;
keys->nhoff += sizeof(*vlan);
/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
return BPF_DROP;
return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
}
char __license[] SEC("license") = "GPL";